057ed9f4172ced34a74129977b30054e98d210f9
[gofetch.git] / src / be / nikiroo / gofetch / support / Slashdot.java
1 package be.nikiroo.gofetch.support;
2
3 import java.io.IOException;
4 import java.net.URL;
5 import java.util.AbstractMap;
6 import java.util.ArrayList;
7 import java.util.List;
8 import java.util.Map.Entry;
9
10 import org.jsoup.nodes.Document;
11 import org.jsoup.nodes.Element;
12 import org.jsoup.nodes.Node;
13 import org.jsoup.select.Elements;
14
15 /**
16 * Support <a href='https://slashdot.org/'>https://slashdot.org/</a>.
17 *
18 * @author niki
19 */
20 public class Slashdot extends BasicSupport {
21 @Override
22 public String getDescription() {
23 return "Slashdot: News for nerds, stuff that matters!";
24 }
25
26 @Override
27 protected List<Entry<URL, String>> getUrls() throws IOException {
28 List<Entry<URL, String>> urls = new ArrayList<Entry<URL, String>>();
29 urls.add(new AbstractMap.SimpleEntry<URL, String>(new URL(
30 "https://slashdot.org/"), ""));
31 return urls;
32 }
33
34 @Override
35 protected List<Element> getArticles(Document doc) {
36 return doc.getElementsByTag("header");
37 }
38
39 @Override
40 protected String getArticleId(Document doc, Element article) {
41 Element title = article.getElementsByClass("story-title").first();
42 if (title != null) {
43 String id = title.attr("id");
44 if (id.startsWith("title-")) {
45 id = id.substring("title-".length());
46 }
47
48 return id;
49 }
50
51 return "";
52 }
53
54 @Override
55 protected String getArticleTitle(Document doc, Element article) {
56 Element title = article.getElementsByClass("story-title").first();
57 if (title != null) {
58 return title.text();
59 }
60
61 return "";
62 }
63
64 @Override
65 protected String getArticleAuthor(Document doc, Element article) {
66 // details: "Posted by AUTHOR on DATE from the further-crackdown dept."
67 String details = getArticleDetailsReal(article);
68 int pos = details.indexOf(" on ");
69 if (details.startsWith("Posted by ") && pos >= 0) {
70 return details.substring("Posted by ".length(), pos).trim();
71 }
72
73 return "";
74 }
75
76 @Override
77 protected String getArticleDate(Document doc, Element article) {
78 // Do not try bad articles
79 if (getArticleId(doc, article).isEmpty()) {
80 return "";
81 }
82
83 Element dateElement = doc.getElementsByTag("time").first();
84 if (dateElement != null) {
85 String date = dateElement.text().trim();
86 if (date.startsWith("on ")) {
87 date = date.substring("on ".length());
88 }
89
90 return date;
91 }
92
93 return "";
94 }
95
96 @Override
97 protected String getArticleCategory(Document doc, Element article,
98 String currentCategory) {
99 Element categElement = doc.getElementsByClass("topic").first();
100 if (categElement != null) {
101 return categElement.text();
102 }
103
104 return "";
105 }
106
107 @Override
108 protected String getArticleDetails(Document doc, Element article) {
109 // details: "Posted by AUTHOR on DATE from the further-crackdown dept."
110 String details = getArticleDetailsReal(article);
111 int pos = details.indexOf(" from the ");
112 if (pos >= 0) {
113 return details.substring(pos).trim();
114 }
115
116 return "";
117 }
118
119 @Override
120 protected String getArticleIntUrl(Document doc, Element article) {
121 Element title = article.getElementsByClass("story-title").first();
122 if (title != null) {
123 Elements links = title.getElementsByTag("a");
124 if (links.size() > 0) {
125 return links.get(0).absUrl("href");
126 }
127 }
128 return "";
129 }
130
131 @Override
132 protected String getArticleExtUrl(Document doc, Element article) {
133 Element title = article.getElementsByClass("story-title").first();
134 if (title != null) {
135 Elements links = title.getElementsByTag("a");
136 if (links.size() > 1) {
137 return links.get(1).absUrl("href");
138 }
139 }
140 return "";
141 }
142
143 @Override
144 protected String getArticleContent(Document doc, Element article) {
145 Element contentElement = doc //
146 .getElementById("text-" + getArticleId(doc, article));
147 if (contentElement != null) {
148 return getArticleText(contentElement);
149 }
150
151 return "";
152 }
153
154 @Override
155 protected Element getFullArticle(Document doc) {
156 return null;
157 }
158
159 @Override
160 protected List<Element> getFullArticleCommentPosts(Document doc, URL intUrl) {
161 List<Element> commentElements = new ArrayList<Element>();
162 Element listing = doc.getElementById("commentlisting");
163 if (listing != null) {
164 for (Element commentElement : listing.children()) {
165 if (commentElement.hasClass("comment")) {
166 commentElements.add(commentElement);
167 }
168 }
169 }
170
171 return commentElements;
172 }
173
174 @Override
175 protected ElementProcessor getElementProcessorFullArticle() {
176 return new BasicElementProcessor() {
177 @Override
178 public boolean detectQuote(Node node) {
179 if (node instanceof Element) {
180 Element element = (Element) node;
181 if (element.tagName().equals("i")) {
182 return true;
183 }
184 }
185 return false;
186 }
187 };
188 }
189
190 @Override
191 protected List<Element> getCommentCommentPosts(Document doc,
192 Element container) {
193 List<Element> commentElements = new ArrayList<Element>();
194 for (Element child : container.children()) {
195 if (child.id().contains("commtree_")) {
196 for (Element sub : child.children()) {
197 if (sub.hasClass("comment")) {
198 commentElements.add(sub);
199 }
200 }
201 }
202 }
203
204 return commentElements;
205 }
206
207 @Override
208 protected String getCommentId(Element post) {
209 if (post.hasClass("hidden")) {
210 return "";
211 }
212
213 return post.id();
214 }
215
216 @Override
217 protected String getCommentAuthor(Element post) {
218 if (post.hasClass("hidden")) {
219 return "";
220 }
221
222 Element author = post.getElementsByClass("by").first();
223 if (author != null) {
224 return author.text();
225 }
226
227 return "";
228 }
229
230 @Override
231 protected String getCommentTitle(Element post) {
232 if (post.hasClass("hidden")) {
233 return "";
234 }
235
236 Element title = post.getElementsByClass("title").first();
237 if (title != null) {
238 return title.text();
239 }
240
241 return "";
242 }
243
244 @Override
245 protected String getCommentDate(Element post) {
246 if (post.hasClass("hidden")) {
247 return "";
248 }
249
250 Element date = post.getElementsByClass("otherdetails").first();
251 if (date != null) {
252 return date.text();
253 }
254
255 return "";
256 }
257
258 @Override
259 protected Element getCommentContentElement(Element post) {
260 if (post.hasClass("hidden")) {
261 return null;
262 }
263
264 return post.getElementsByClass("commentBody").first();
265 }
266
267 @Override
268 protected ElementProcessor getElementProcessorComment() {
269 return new BasicElementProcessor() {
270 @Override
271 public String processText(String text) {
272 while (text.startsWith(">")) { // comment in one-liners
273 text = text.substring(1).trim();
274 }
275
276 return text;
277 }
278
279 @Override
280 public boolean detectQuote(Node node) {
281 if (node instanceof Element) {
282 Element elementNode = (Element) node;
283 if (elementNode.tagName().equals("blockquote")
284 || elementNode.hasClass("quote")
285 || (elementNode.tagName().equals("p")
286 && elementNode.textNodes().size() == 1 && elementNode
287 .textNodes().get(0).getWholeText()
288 .startsWith(">"))) {
289 return true;
290 }
291 }
292
293 return false;
294 }
295 };
296 }
297
298 private String getArticleDetailsReal(Element article) {
299 Element detailsElement = article.getElementsByClass("details").first();
300 if (detailsElement != null) {
301 return detailsElement.text();
302 }
303
304 return "";
305 }
306 }