6fb51a6dd32cb5f0bd37692165560ca1fdb915b9
[gofetch.git] / src / be / nikiroo / gofetch / support / Slashdot.java
1 package be.nikiroo.gofetch.support;
2
3 import java.io.IOException;
4 import java.net.URL;
5 import java.util.AbstractMap;
6 import java.util.ArrayList;
7 import java.util.List;
8 import java.util.Map.Entry;
9
10 import org.jsoup.nodes.Document;
11 import org.jsoup.nodes.Element;
12 import org.jsoup.nodes.Node;
13 import org.jsoup.select.Elements;
14
15 /**
16 * Support <a href='https://slashdot.org/'>https://slashdot.org/</a>.
17 *
18 * @author niki
19 */
20 public class Slashdot extends BasicSupport {
21 @Override
22 public String getDescription() {
23 return "Slashdot: News for nerds, stuff that matters!";
24 }
25
26 @Override
27 protected List<Entry<URL, String>> getUrls() throws IOException {
28 List<Entry<URL, String>> urls = new ArrayList<Entry<URL, String>>();
29 urls.add(new AbstractMap.SimpleEntry<URL, String>(new URL(
30 "https://slashdot.org/"), ""));
31 return urls;
32 }
33
34 @Override
35 protected List<Element> getArticles(Document doc) {
36 return doc.getElementsByTag("header");
37 }
38
39 @Override
40 protected String getArticleId(Document doc, Element article) {
41 Element title = article.getElementsByClass("story-title").first();
42 if (title != null) {
43 String id = title.attr("id");
44 if (id.startsWith("title-")) {
45 id = id.substring("title-".length());
46 }
47
48 while (id.length() < 10) {
49 id = "0" + id;
50 }
51
52 return id;
53 }
54
55 return "";
56 }
57
58 @Override
59 protected String getArticleTitle(Document doc, Element article) {
60 Element title = article.getElementsByClass("story-title").first();
61 if (title != null) {
62 return title.text();
63 }
64
65 return "";
66 }
67
68 @Override
69 protected String getArticleAuthor(Document doc, Element article) {
70 // details: "Posted by AUTHOR on DATE from the further-crackdown dept."
71 String details = getArticleDetailsReal(article);
72 int pos = details.indexOf(" on ");
73 if (details.startsWith("Posted by ") && pos >= 0) {
74 return details.substring("Posted by ".length(), pos).trim();
75 }
76
77 return "";
78 }
79
80 @Override
81 protected String getArticleDate(Document doc, Element article) {
82 // Do not try bad articles
83 if (getArticleId(doc, article).isEmpty()) {
84 return "";
85 }
86
87 Element dateElement = doc.getElementsByTag("time").first();
88 if (dateElement != null) {
89 String date = dateElement.text().trim();
90 if (date.startsWith("on ")) {
91 date = date.substring("on ".length());
92 }
93
94 return date;
95 }
96
97 return "";
98 }
99
100 @Override
101 protected String getArticleCategory(Document doc, Element article,
102 String currentCategory) {
103 Element categElement = doc.getElementsByClass("topic").first();
104 if (categElement != null) {
105 return categElement.text();
106 }
107
108 return "";
109 }
110
111 @Override
112 protected String getArticleDetails(Document doc, Element article) {
113 // details: "Posted by AUTHOR on DATE from the further-crackdown dept."
114 String details = getArticleDetailsReal(article);
115 int pos = details.indexOf(" from the ");
116 if (pos >= 0) {
117 return details.substring(pos).trim();
118 }
119
120 return "";
121 }
122
123 @Override
124 protected String getArticleIntUrl(Document doc, Element article) {
125 Element title = article.getElementsByClass("story-title").first();
126 if (title != null) {
127 Elements links = title.getElementsByTag("a");
128 if (links.size() > 0) {
129 return links.get(0).absUrl("href");
130 }
131 }
132 return "";
133 }
134
135 @Override
136 protected String getArticleExtUrl(Document doc, Element article) {
137 Element title = article.getElementsByClass("story-title").first();
138 if (title != null) {
139 Elements links = title.getElementsByTag("a");
140 if (links.size() > 1) {
141 return links.get(1).absUrl("href");
142 }
143 }
144 return "";
145 }
146
147 @Override
148 protected String getArticleContent(Document doc, Element article) {
149 Element contentElement = doc //
150 .getElementById("text-" + getArticleId(doc, article));
151 if (contentElement != null) {
152 return contentElement.text();
153 }
154
155 return "";
156 }
157
158 @Override
159 protected Element getFullArticle(Document doc) {
160 return null;
161 }
162
163 @Override
164 protected List<Element> getFullArticleCommentPosts(Document doc, URL intUrl) {
165 List<Element> commentElements = new ArrayList<Element>();
166 Element listing = doc.getElementById("commentlisting");
167 if (listing != null) {
168 for (Element commentElement : listing.children()) {
169 if (commentElement.hasClass("comment")) {
170 commentElements.add(commentElement);
171 }
172 }
173 }
174
175 return commentElements;
176 }
177
178 @Override
179 protected ElementProcessor getElementProcessorFullArticle() {
180 return null;
181 }
182
183 @Override
184 protected List<Element> getCommentCommentPosts(Document doc,
185 Element container) {
186 List<Element> commentElements = new ArrayList<Element>();
187 for (Element child : container.children()) {
188 if (child.id().contains("commtree_")) {
189 for (Element sub : child.children()) {
190 if (sub.hasClass("comment")) {
191 commentElements.add(sub);
192 }
193 }
194 }
195 }
196
197 return commentElements;
198 }
199
200 @Override
201 protected String getCommentId(Element post) {
202 if (post.hasClass("hidden")) {
203 return "";
204 }
205
206 return post.id();
207 }
208
209 @Override
210 protected String getCommentAuthor(Element post) {
211 if (post.hasClass("hidden")) {
212 return "";
213 }
214
215 Element author = post.getElementsByClass("by").first();
216 if (author != null) {
217 return author.text();
218 }
219
220 return "";
221 }
222
223 @Override
224 protected String getCommentTitle(Element post) {
225 if (post.hasClass("hidden")) {
226 return "";
227 }
228
229 Element title = post.getElementsByClass("title").first();
230 if (title != null) {
231 return title.text();
232 }
233
234 return "";
235 }
236
237 @Override
238 protected String getCommentDate(Element post) {
239 if (post.hasClass("hidden")) {
240 return "";
241 }
242
243 Element date = post.getElementsByClass("otherdetails").first();
244 if (date != null) {
245 return date.text();
246 }
247
248 return "";
249 }
250
251 @Override
252 protected Element getCommentContentElement(Element post) {
253 if (post.hasClass("hidden")) {
254 return null;
255 }
256
257 return post.getElementsByClass("commentBody").first();
258 }
259
260 @Override
261 protected ElementProcessor getElementProcessorComment() {
262 return new BasicElementProcessor() {
263 @Override
264 public String processText(String text) {
265 while (text.startsWith(">")) { // comment in one-liners
266 text = text.substring(1).trim();
267 }
268
269 return text;
270 }
271
272 @Override
273 public boolean detectQuote(Node node) {
274 if (node instanceof Element) {
275 Element elementNode = (Element) node;
276 if (elementNode.tagName().equals("blockquote")
277 || elementNode.hasClass("quote")
278 || (elementNode.tagName().equals("p")
279 && elementNode.textNodes().size() == 1 && elementNode
280 .textNodes().get(0).getWholeText()
281 .startsWith(">"))) {
282 return true;
283 }
284 }
285
286 return false;
287 }
288 };
289 }
290
291 private String getArticleDetailsReal(Element article) {
292 Element detailsElement = article.getElementsByClass("details").first();
293 if (detailsElement != null) {
294 return detailsElement.text();
295 }
296
297 return "";
298 }
299 }