Small fixes in different places
[gofetch.git] / src / be / nikiroo / gofetch / support / Slashdot.java
CommitLineData
73785268
NR
1package be.nikiroo.gofetch.support;
2
3import java.io.IOException;
4import java.io.InputStream;
5import java.net.URL;
6import java.util.ArrayList;
7import java.util.List;
8
9import org.jsoup.helper.DataUtil;
10import org.jsoup.nodes.Document;
11import org.jsoup.nodes.Element;
27008a87 12import org.jsoup.nodes.Node;
73785268
NR
13import org.jsoup.select.Elements;
14
15import be.nikiroo.gofetch.data.Comment;
16import be.nikiroo.gofetch.data.Story;
b34d1f35 17import be.nikiroo.utils.StringUtils;
73785268 18
70b18499
NR
19/**
20 * Support <a href='https://slashdot.org/'>https://slashdot.org/</a>.
21 *
22 * @author niki
23 */
73785268
NR
24public class Slashdot extends BasicSupport {
25 @Override
26 public String getDescription() {
27 return "Slashdot: News for nerds, stuff that matters!";
28 }
29
30 @Override
31 public List<Story> list() throws IOException {
32 List<Story> list = new ArrayList<Story>();
33
34 URL url = new URL("https://slashdot.org/");
136ab801 35 InputStream in = downloader.open(url);
73785268 36 Document doc = DataUtil.load(in, "UTF-8", url.toString());
100a8395
NR
37 Elements articles = doc.getElementsByTag("header");
38 for (Element article : articles) {
39 Elements titles = article.getElementsByClass("story-title");
73785268
NR
40 if (titles.size() == 0) {
41 continue;
42 }
70b18499 43
73785268
NR
44 Element title = titles.get(0);
45
46 String id = "" + title.attr("id");
47 if (id.startsWith("title-")) {
48 id = id.substring("title-".length());
49 }
50
51 Elements links = title.getElementsByTag("a");
c9cffa91
NR
52 String intUrl = "";
53 String extUrl = "";
73785268
NR
54 if (links.size() > 0) {
55 intUrl = links.get(0).absUrl("href");
56 }
57 if (links.size() > 1) {
58 extUrl = links.get(1).absUrl("href");
59 }
60
61 String details = "";
100a8395 62 Elements detailsElements = article.getElementsByClass("details");
73785268
NR
63 if (detailsElements.size() > 0) {
64 details = detailsElements.get(0).text();
65 }
66
b34d1f35
NR
67 // details:
68 // "Posted by AUTHOR on DATE from the further-crackdown dept."
69 String author = "";
70 int pos = details.indexOf(" on ");
71 if (details.startsWith("Posted by ") && pos >= 0) {
72 author = details.substring("Posted by ".length(), pos).trim();
73 }
74 pos = details.indexOf(" from the ");
75 if (pos >= 0) {
76 details = details.substring(pos).trim();
77 }
78
73785268
NR
79 String body = "";
80 Element bodyElement = doc.getElementById("text-" + id);
81 if (bodyElement != null) {
82 body = bodyElement.text();
83 }
84
b34d1f35
NR
85 String categ = "";
86 Element categElement = doc.getElementsByClass("topic").first();
87 if (categElement != null) {
88 categ = StringUtils.unhtml(categElement.text()).trim();
89 }
90
91 String date = "";
92 Element dateElement = doc.getElementsByTag("time").first();
93 if (dateElement != null) {
94 date = StringUtils.unhtml(dateElement.text()).trim();
c9cffa91
NR
95 if (date.startsWith("on ")) {
96 date = date.substring("on ".length());
97 }
b34d1f35
NR
98 }
99
100 list.add(new Story(getType(), id, title.text(), author, date,
101 categ, details, intUrl, extUrl, body));
73785268
NR
102 }
103
104 return list;
105 }
106
107 @Override
5c056aad 108 public void fetch(Story story) throws IOException {
73785268
NR
109 List<Comment> comments = new ArrayList<Comment>();
110
111 URL url = new URL(story.getUrlInternal());
136ab801 112 InputStream in = downloader.open(url);
73785268
NR
113 Document doc = DataUtil.load(in, "UTF-8", url.toString());
114 Element listing = doc.getElementById("commentlisting");
115 if (listing != null) {
116 comments.addAll(getComments(listing));
117 }
118
5c056aad 119 story.setComments(comments);
73785268
NR
120 }
121
122 private List<Comment> getComments(Element listing) {
123 List<Comment> comments = new ArrayList<Comment>();
27008a87 124 Comment lastComment = null;
73785268
NR
125 for (Element commentElement : listing.children()) {
126 if (commentElement.hasClass("comment")) {
27008a87
NR
127 if (!commentElement.hasClass("hidden")) {
128 lastComment = getComment(commentElement);
129 comments.add(lastComment);
130 }
131
132 List<Comment> subComments = new ArrayList<Comment>();
133 for (Element child : commentElement.children()) {
134 if (child.id().contains("commtree_")) {
135 subComments.addAll(getComments(child));
136 }
137 }
138
139 if (lastComment == null) {
140 comments.addAll(subComments);
141 } else {
142 lastComment.addAll(subComments);
73785268
NR
143 }
144 }
145 }
27008a87 146
73785268
NR
147 return comments;
148 }
149
27008a87
NR
150 /**
151 * Get a comment from the given element.
152 *
153 * @param commentElement
154 * the element to get the comment of.
155 *
156 * @return the comment, <b>NOT</b> including sub-comments
157 */
73785268 158 private Comment getComment(Element commentElement) {
27008a87
NR
159 String title = firstOrEmpty(commentElement, "title").text();
160 String author = firstOrEmpty(commentElement, "by").text();
161 String date = firstOrEmpty(commentElement, "otherdetails").text();
162 Element content = firstOrEmpty(commentElement, "commentBody");
163
164 return new Comment(commentElement.id(), author, title, date,
165 toLines(content));
166 }
73785268 167
27008a87 168 private List<String> toLines(Element element) {
20217360 169 return toLines(element, new BasicElementProcessor() {
27008a87
NR
170 @Override
171 public String processText(String text) {
172 while (text.startsWith(">")) { // comment in one-liners
173 text = text.substring(1).trim();
174 }
73785268 175
27008a87 176 return text;
73785268 177 }
73785268 178
27008a87
NR
179 @Override
180 public boolean detectQuote(Node node) {
181 if (node instanceof Element) {
182 Element elementNode = (Element) node;
183 if (elementNode.tagName().equals("blockquote")
184 || elementNode.hasClass("quote")
185 || (elementNode.tagName().equals("p")
186 && elementNode.textNodes().size() == 1 && elementNode
187 .textNodes().get(0).getWholeText()
188 .startsWith(">"))) {
189 return true;
190 }
191 }
73785268 192
27008a87
NR
193 return false;
194 }
27008a87 195 });
73785268
NR
196 }
197}