Fix bug due to 'id'-handling change
[gofetch.git] / src / be / nikiroo / gofetch / support / Slashdot.java
1 package be.nikiroo.gofetch.support;
2
3 import java.io.IOException;
4 import java.net.URL;
5 import java.util.AbstractMap;
6 import java.util.ArrayList;
7 import java.util.List;
8 import java.util.Map.Entry;
9
10 import org.jsoup.nodes.Document;
11 import org.jsoup.nodes.Element;
12 import org.jsoup.nodes.Node;
13 import org.jsoup.select.Elements;
14
15 /**
16 * Support <a href='https://slashdot.org/'>https://slashdot.org/</a>.
17 *
18 * @author niki
19 */
20 public class Slashdot extends BasicSupport {
21 @Override
22 public String getDescription() {
23 return "Slashdot: News for nerds, stuff that matters!";
24 }
25
26 @Override
27 protected List<Entry<URL, String>> getUrls() throws IOException {
28 List<Entry<URL, String>> urls = new ArrayList<Entry<URL, String>>();
29 urls.add(new AbstractMap.SimpleEntry<URL, String>(new URL(
30 "https://slashdot.org/"), ""));
31 return urls;
32 }
33
34 @Override
35 protected List<Element> getArticles(Document doc) {
36 return doc.getElementsByTag("header");
37 }
38
39 @Override
40 protected String getArticleId(Document doc, Element article) {
41 Element title = article.getElementsByClass("story-title").first();
42 if (title != null) {
43 String id = title.attr("id");
44 if (id.startsWith("title-")) {
45 id = id.substring("title-".length());
46 }
47
48 return id;
49 }
50
51 return "";
52 }
53
54 @Override
55 protected String getArticleTitle(Document doc, Element article) {
56 Element title = article.getElementsByClass("story-title").first();
57 if (title != null) {
58 return title.text();
59 }
60
61 return "";
62 }
63
64 @Override
65 protected String getArticleAuthor(Document doc, Element article) {
66 // details: "Posted by AUTHOR on DATE from the further-crackdown dept."
67 String details = getArticleDetailsReal(article);
68 int pos = details.indexOf(" on ");
69 if (details.startsWith("Posted by ") && pos >= 0) {
70 return details.substring("Posted by ".length(), pos).trim();
71 }
72
73 return "";
74 }
75
76 @Override
77 protected String getArticleDate(Document doc, Element article) {
78 // Do not try bad articles
79 if (getArticleId(doc, article).isEmpty()) {
80 return "";
81 }
82
83 Element dateElement = doc.getElementsByTag("time").first();
84 if (dateElement != null) {
85 String date = dateElement.text().trim();
86 if (date.startsWith("on ")) {
87 date = date.substring("on ".length());
88 }
89
90 return date;
91 }
92
93 return "";
94 }
95
96 @Override
97 protected String getArticleCategory(Document doc, Element article,
98 String currentCategory) {
99 Element categElement = doc.getElementsByClass("topic").first();
100 if (categElement != null) {
101 return categElement.text();
102 }
103
104 return "";
105 }
106
107 @Override
108 protected String getArticleDetails(Document doc, Element article) {
109 // details: "Posted by AUTHOR on DATE from the further-crackdown dept."
110 String details = getArticleDetailsReal(article);
111 int pos = details.indexOf(" from the ");
112 if (pos >= 0) {
113 return details.substring(pos).trim();
114 }
115
116 return "";
117 }
118
119 @Override
120 protected String getArticleIntUrl(Document doc, Element article) {
121 Element title = article.getElementsByClass("story-title").first();
122 if (title != null) {
123 Elements links = title.getElementsByTag("a");
124 if (links.size() > 0) {
125 return links.get(0).absUrl("href");
126 }
127 }
128 return "";
129 }
130
131 @Override
132 protected String getArticleExtUrl(Document doc, Element article) {
133 Element title = article.getElementsByClass("story-title").first();
134 if (title != null) {
135 Elements links = title.getElementsByTag("a");
136 if (links.size() > 1) {
137 return links.get(1).absUrl("href");
138 }
139 }
140 return "";
141 }
142
143 @Override
144 protected String getArticleContent(Document doc, Element article) {
145 Element contentElement = doc //
146 .getElementById("text-" + getArticleId(doc, article));
147 if (contentElement != null) {
148 return contentElement.text();
149 }
150
151 return "";
152 }
153
154 @Override
155 protected Element getFullArticle(Document doc) {
156 return null;
157 }
158
159 @Override
160 protected List<Element> getFullArticleCommentPosts(Document doc, URL intUrl) {
161 List<Element> commentElements = new ArrayList<Element>();
162 Element listing = doc.getElementById("commentlisting");
163 if (listing != null) {
164 for (Element commentElement : listing.children()) {
165 if (commentElement.hasClass("comment")) {
166 commentElements.add(commentElement);
167 }
168 }
169 }
170
171 return commentElements;
172 }
173
174 @Override
175 protected ElementProcessor getElementProcessorFullArticle() {
176 return null;
177 }
178
179 @Override
180 protected List<Element> getCommentCommentPosts(Document doc,
181 Element container) {
182 List<Element> commentElements = new ArrayList<Element>();
183 for (Element child : container.children()) {
184 if (child.id().contains("commtree_")) {
185 for (Element sub : child.children()) {
186 if (sub.hasClass("comment")) {
187 commentElements.add(sub);
188 }
189 }
190 }
191 }
192
193 return commentElements;
194 }
195
196 @Override
197 protected String getCommentId(Element post) {
198 if (post.hasClass("hidden")) {
199 return "";
200 }
201
202 return post.id();
203 }
204
205 @Override
206 protected String getCommentAuthor(Element post) {
207 if (post.hasClass("hidden")) {
208 return "";
209 }
210
211 Element author = post.getElementsByClass("by").first();
212 if (author != null) {
213 return author.text();
214 }
215
216 return "";
217 }
218
219 @Override
220 protected String getCommentTitle(Element post) {
221 if (post.hasClass("hidden")) {
222 return "";
223 }
224
225 Element title = post.getElementsByClass("title").first();
226 if (title != null) {
227 return title.text();
228 }
229
230 return "";
231 }
232
233 @Override
234 protected String getCommentDate(Element post) {
235 if (post.hasClass("hidden")) {
236 return "";
237 }
238
239 Element date = post.getElementsByClass("otherdetails").first();
240 if (date != null) {
241 return date.text();
242 }
243
244 return "";
245 }
246
247 @Override
248 protected Element getCommentContentElement(Element post) {
249 if (post.hasClass("hidden")) {
250 return null;
251 }
252
253 return post.getElementsByClass("commentBody").first();
254 }
255
256 @Override
257 protected ElementProcessor getElementProcessorComment() {
258 return new BasicElementProcessor() {
259 @Override
260 public String processText(String text) {
261 while (text.startsWith(">")) { // comment in one-liners
262 text = text.substring(1).trim();
263 }
264
265 return text;
266 }
267
268 @Override
269 public boolean detectQuote(Node node) {
270 if (node instanceof Element) {
271 Element elementNode = (Element) node;
272 if (elementNode.tagName().equals("blockquote")
273 || elementNode.hasClass("quote")
274 || (elementNode.tagName().equals("p")
275 && elementNode.textNodes().size() == 1 && elementNode
276 .textNodes().get(0).getWholeText()
277 .startsWith(">"))) {
278 return true;
279 }
280 }
281
282 return false;
283 }
284 };
285 }
286
287 private String getArticleDetailsReal(Element article) {
288 Element detailsElement = article.getElementsByClass("details").first();
289 if (detailsElement != null) {
290 return detailsElement.text();
291 }
292
293 return "";
294 }
295 }