Bug fixes + rework of BasicSupport
[gofetch.git] / src / be / nikiroo / gofetch / support / Slashdot.java
CommitLineData
73785268
NR
1package be.nikiroo.gofetch.support;
2
3import java.io.IOException;
73785268 4import java.net.URL;
3e62b034 5import java.util.AbstractMap;
73785268
NR
6import java.util.ArrayList;
7import java.util.List;
3e62b034 8import java.util.Map.Entry;
73785268 9
73785268
NR
10import org.jsoup.nodes.Document;
11import org.jsoup.nodes.Element;
27008a87 12import org.jsoup.nodes.Node;
73785268
NR
13import org.jsoup.select.Elements;
14
70b18499
NR
15/**
16 * Support <a href='https://slashdot.org/'>https://slashdot.org/</a>.
17 *
18 * @author niki
19 */
73785268
NR
20public class Slashdot extends BasicSupport {
21 @Override
22 public String getDescription() {
23 return "Slashdot: News for nerds, stuff that matters!";
24 }
25
26 @Override
3e62b034
NR
27 protected List<Entry<URL, String>> getUrls() throws IOException {
28 List<Entry<URL, String>> urls = new ArrayList<Entry<URL, String>>();
29 urls.add(new AbstractMap.SimpleEntry<URL, String>(new URL(
30 "https://slashdot.org/"), ""));
31 return urls;
32 }
70b18499 33
3e62b034
NR
34 @Override
35 protected List<Element> getArticles(Document doc) {
36 return doc.getElementsByTag("header");
37 }
73785268 38
3e62b034
NR
39 @Override
40 protected String getArticleId(Document doc, Element article) {
41 Element title = article.getElementsByClass("story-title").first();
42 if (title != null) {
43 String id = title.attr("id");
73785268
NR
44 if (id.startsWith("title-")) {
45 id = id.substring("title-".length());
46 }
47
3e62b034
NR
48 return id;
49 }
50
51 return "";
52 }
53
54 @Override
55 protected String getArticleTitle(Document doc, Element article) {
56 Element title = article.getElementsByClass("story-title").first();
57 if (title != null) {
58 return title.text();
59 }
60
61 return "";
62 }
63
64 @Override
65 protected String getArticleAuthor(Document doc, Element article) {
66 // details: "Posted by AUTHOR on DATE from the further-crackdown dept."
67 String details = getArticleDetailsReal(article);
68 int pos = details.indexOf(" on ");
69 if (details.startsWith("Posted by ") && pos >= 0) {
70 return details.substring("Posted by ".length(), pos).trim();
71 }
72
73 return "";
74 }
75
76 @Override
77 protected String getArticleDate(Document doc, Element article) {
78 // Do not try bad articles
79 if (getArticleId(doc, article).isEmpty()) {
80 return "";
81 }
82
83 Element dateElement = doc.getElementsByTag("time").first();
84 if (dateElement != null) {
85 String date = dateElement.text().trim();
86 if (date.startsWith("on ")) {
87 date = date.substring("on ".length());
88 }
89
90 return date;
91 }
92
93 return "";
94 }
95
96 @Override
97 protected String getArticleCategory(Document doc, Element article,
98 String currentCategory) {
99 Element categElement = doc.getElementsByClass("topic").first();
100 if (categElement != null) {
101 return categElement.text();
102 }
103
104 return "";
105 }
106
107 @Override
108 protected String getArticleDetails(Document doc, Element article) {
109 // details: "Posted by AUTHOR on DATE from the further-crackdown dept."
110 String details = getArticleDetailsReal(article);
111 int pos = details.indexOf(" from the ");
112 if (pos >= 0) {
113 return details.substring(pos).trim();
114 }
115
116 return "";
117 }
118
119 @Override
120 protected String getArticleIntUrl(Document doc, Element article) {
121 Element title = article.getElementsByClass("story-title").first();
122 if (title != null) {
73785268 123 Elements links = title.getElementsByTag("a");
73785268 124 if (links.size() > 0) {
3e62b034 125 return links.get(0).absUrl("href");
73785268 126 }
3e62b034
NR
127 }
128 return "";
129 }
130
131 @Override
132 protected String getArticleExtUrl(Document doc, Element article) {
133 Element title = article.getElementsByClass("story-title").first();
134 if (title != null) {
135 Elements links = title.getElementsByTag("a");
73785268 136 if (links.size() > 1) {
3e62b034 137 return links.get(1).absUrl("href");
73785268 138 }
3e62b034
NR
139 }
140 return "";
141 }
73785268 142
3e62b034
NR
143 @Override
144 protected String getArticleContent(Document doc, Element article) {
145 Element contentElement = doc //
146 .getElementById("text-" + getArticleId(doc, article));
147 if (contentElement != null) {
148 return contentElement.text();
149 }
73785268 150
3e62b034
NR
151 return "";
152 }
b34d1f35 153
3e62b034
NR
154 @Override
155 protected Element getFullArticle(Document doc) {
156 return null;
157 }
73785268 158
3e62b034
NR
159 @Override
160 protected List<Element> getFullArticleCommentPosts(Document doc, URL intUrl) {
161 List<Element> commentElements = new ArrayList<Element>();
162 Element listing = doc.getElementById("commentlisting");
163 if (listing != null) {
164 for (Element commentElement : listing.children()) {
165 if (commentElement.hasClass("comment")) {
166 commentElements.add(commentElement);
167 }
b34d1f35 168 }
3e62b034
NR
169 }
170
171 return commentElements;
172 }
173
174 @Override
175 protected ElementProcessor getElementProcessorFullArticle() {
176 return null;
177 }
b34d1f35 178
3e62b034
NR
179 @Override
180 protected List<Element> getCommentCommentPosts(Document doc,
181 Element container) {
182 List<Element> commentElements = new ArrayList<Element>();
183 for (Element child : container.children()) {
184 if (child.id().contains("commtree_")) {
185 for (Element sub : child.children()) {
186 if (sub.hasClass("comment")) {
187 commentElements.add(sub);
188 }
c9cffa91 189 }
b34d1f35 190 }
3e62b034
NR
191 }
192
193 return commentElements;
194 }
b34d1f35 195
3e62b034
NR
196 @Override
197 protected String getCommentId(Element post) {
198 if (post.hasClass("hidden")) {
199 return "";
73785268
NR
200 }
201
3e62b034 202 return post.id();
73785268
NR
203 }
204
205 @Override
3e62b034
NR
206 protected String getCommentAuthor(Element post) {
207 if (post.hasClass("hidden")) {
208 return "";
209 }
73785268 210
3e62b034
NR
211 Element author = post.getElementsByClass("by").first();
212 if (author != null) {
213 return author.text();
73785268
NR
214 }
215
3e62b034 216 return "";
73785268
NR
217 }
218
3e62b034
NR
219 @Override
220 protected String getCommentTitle(Element post) {
221 if (post.hasClass("hidden")) {
222 return "";
223 }
27008a87 224
3e62b034
NR
225 Element title = post.getElementsByClass("title").first();
226 if (title != null) {
227 return title.text();
228 }
27008a87 229
3e62b034
NR
230 return "";
231 }
232
233 @Override
234 protected String getCommentDate(Element post) {
235 if (post.hasClass("hidden")) {
236 return "";
73785268 237 }
27008a87 238
3e62b034
NR
239 Element date = post.getElementsByClass("otherdetails").first();
240 if (date != null) {
241 return date.text();
242 }
243
244 return "";
73785268
NR
245 }
246
3e62b034
NR
247 @Override
248 protected Element getCommentContentElement(Element post) {
249 if (post.hasClass("hidden")) {
250 return null;
251 }
27008a87 252
3e62b034 253 return post.getElementsByClass("commentBody").first();
27008a87 254 }
73785268 255
3e62b034
NR
256 @Override
257 protected ElementProcessor getElementProcessorComment() {
258 return new BasicElementProcessor() {
27008a87
NR
259 @Override
260 public String processText(String text) {
261 while (text.startsWith(">")) { // comment in one-liners
262 text = text.substring(1).trim();
263 }
73785268 264
27008a87 265 return text;
73785268 266 }
73785268 267
27008a87
NR
268 @Override
269 public boolean detectQuote(Node node) {
270 if (node instanceof Element) {
271 Element elementNode = (Element) node;
272 if (elementNode.tagName().equals("blockquote")
273 || elementNode.hasClass("quote")
274 || (elementNode.tagName().equals("p")
275 && elementNode.textNodes().size() == 1 && elementNode
276 .textNodes().get(0).getWholeText()
277 .startsWith(">"))) {
278 return true;
279 }
280 }
73785268 281
27008a87
NR
282 return false;
283 }
3e62b034
NR
284 };
285 }
286
287 private String getArticleDetailsReal(Element article) {
288 Element detailsElement = article.getElementsByClass("details").first();
289 if (detailsElement != null) {
290 return detailsElement.text();
291 }
292
293 return "";
73785268
NR
294 }
295}