Commit | Line | Data |
---|---|---|
73785268 NR |
1 | package be.nikiroo.gofetch.support; |
2 | ||
3 | import java.io.IOException; | |
73785268 | 4 | import java.net.URL; |
3e62b034 | 5 | import java.util.AbstractMap; |
73785268 NR |
6 | import java.util.ArrayList; |
7 | import java.util.List; | |
3e62b034 | 8 | import java.util.Map.Entry; |
73785268 | 9 | |
73785268 NR |
10 | import org.jsoup.nodes.Document; |
11 | import org.jsoup.nodes.Element; | |
27008a87 | 12 | import org.jsoup.nodes.Node; |
73785268 NR |
13 | import org.jsoup.select.Elements; |
14 | ||
70b18499 NR |
15 | /** |
16 | * Support <a href='https://slashdot.org/'>https://slashdot.org/</a>. | |
17 | * | |
18 | * @author niki | |
19 | */ | |
73785268 NR |
20 | public class Slashdot extends BasicSupport { |
21 | @Override | |
22 | public String getDescription() { | |
23 | return "Slashdot: News for nerds, stuff that matters!"; | |
24 | } | |
25 | ||
26 | @Override | |
3e62b034 NR |
27 | protected List<Entry<URL, String>> getUrls() throws IOException { |
28 | List<Entry<URL, String>> urls = new ArrayList<Entry<URL, String>>(); | |
29 | urls.add(new AbstractMap.SimpleEntry<URL, String>(new URL( | |
30 | "https://slashdot.org/"), "")); | |
31 | return urls; | |
32 | } | |
70b18499 | 33 | |
3e62b034 NR |
34 | @Override |
35 | protected List<Element> getArticles(Document doc) { | |
36 | return doc.getElementsByTag("header"); | |
37 | } | |
73785268 | 38 | |
3e62b034 NR |
39 | @Override |
40 | protected String getArticleId(Document doc, Element article) { | |
41 | Element title = article.getElementsByClass("story-title").first(); | |
42 | if (title != null) { | |
43 | String id = title.attr("id"); | |
73785268 NR |
44 | if (id.startsWith("title-")) { |
45 | id = id.substring("title-".length()); | |
46 | } | |
47 | ||
3e62b034 NR |
48 | return id; |
49 | } | |
50 | ||
51 | return ""; | |
52 | } | |
53 | ||
54 | @Override | |
55 | protected String getArticleTitle(Document doc, Element article) { | |
56 | Element title = article.getElementsByClass("story-title").first(); | |
57 | if (title != null) { | |
58 | return title.text(); | |
59 | } | |
60 | ||
61 | return ""; | |
62 | } | |
63 | ||
64 | @Override | |
65 | protected String getArticleAuthor(Document doc, Element article) { | |
66 | // details: "Posted by AUTHOR on DATE from the further-crackdown dept." | |
67 | String details = getArticleDetailsReal(article); | |
68 | int pos = details.indexOf(" on "); | |
69 | if (details.startsWith("Posted by ") && pos >= 0) { | |
70 | return details.substring("Posted by ".length(), pos).trim(); | |
71 | } | |
72 | ||
73 | return ""; | |
74 | } | |
75 | ||
76 | @Override | |
77 | protected String getArticleDate(Document doc, Element article) { | |
78 | // Do not try bad articles | |
79 | if (getArticleId(doc, article).isEmpty()) { | |
80 | return ""; | |
81 | } | |
82 | ||
83 | Element dateElement = doc.getElementsByTag("time").first(); | |
84 | if (dateElement != null) { | |
85 | String date = dateElement.text().trim(); | |
86 | if (date.startsWith("on ")) { | |
87 | date = date.substring("on ".length()); | |
88 | } | |
89 | ||
90 | return date; | |
91 | } | |
92 | ||
93 | return ""; | |
94 | } | |
95 | ||
96 | @Override | |
97 | protected String getArticleCategory(Document doc, Element article, | |
98 | String currentCategory) { | |
99 | Element categElement = doc.getElementsByClass("topic").first(); | |
100 | if (categElement != null) { | |
101 | return categElement.text(); | |
102 | } | |
103 | ||
104 | return ""; | |
105 | } | |
106 | ||
107 | @Override | |
108 | protected String getArticleDetails(Document doc, Element article) { | |
109 | // details: "Posted by AUTHOR on DATE from the further-crackdown dept." | |
110 | String details = getArticleDetailsReal(article); | |
111 | int pos = details.indexOf(" from the "); | |
112 | if (pos >= 0) { | |
113 | return details.substring(pos).trim(); | |
114 | } | |
115 | ||
116 | return ""; | |
117 | } | |
118 | ||
119 | @Override | |
120 | protected String getArticleIntUrl(Document doc, Element article) { | |
121 | Element title = article.getElementsByClass("story-title").first(); | |
122 | if (title != null) { | |
73785268 | 123 | Elements links = title.getElementsByTag("a"); |
73785268 | 124 | if (links.size() > 0) { |
3e62b034 | 125 | return links.get(0).absUrl("href"); |
73785268 | 126 | } |
3e62b034 NR |
127 | } |
128 | return ""; | |
129 | } | |
130 | ||
131 | @Override | |
132 | protected String getArticleExtUrl(Document doc, Element article) { | |
133 | Element title = article.getElementsByClass("story-title").first(); | |
134 | if (title != null) { | |
135 | Elements links = title.getElementsByTag("a"); | |
73785268 | 136 | if (links.size() > 1) { |
3e62b034 | 137 | return links.get(1).absUrl("href"); |
73785268 | 138 | } |
3e62b034 NR |
139 | } |
140 | return ""; | |
141 | } | |
73785268 | 142 | |
3e62b034 NR |
143 | @Override |
144 | protected String getArticleContent(Document doc, Element article) { | |
145 | Element contentElement = doc // | |
146 | .getElementById("text-" + getArticleId(doc, article)); | |
147 | if (contentElement != null) { | |
e818d449 | 148 | return getArticleText(contentElement); |
3e62b034 | 149 | } |
73785268 | 150 | |
3e62b034 NR |
151 | return ""; |
152 | } | |
b34d1f35 | 153 | |
3e62b034 NR |
154 | @Override |
155 | protected Element getFullArticle(Document doc) { | |
156 | return null; | |
157 | } | |
73785268 | 158 | |
3e62b034 NR |
159 | @Override |
160 | protected List<Element> getFullArticleCommentPosts(Document doc, URL intUrl) { | |
161 | List<Element> commentElements = new ArrayList<Element>(); | |
162 | Element listing = doc.getElementById("commentlisting"); | |
163 | if (listing != null) { | |
164 | for (Element commentElement : listing.children()) { | |
165 | if (commentElement.hasClass("comment")) { | |
166 | commentElements.add(commentElement); | |
167 | } | |
b34d1f35 | 168 | } |
3e62b034 NR |
169 | } |
170 | ||
171 | return commentElements; | |
172 | } | |
173 | ||
174 | @Override | |
175 | protected ElementProcessor getElementProcessorFullArticle() { | |
e818d449 NR |
176 | return new BasicElementProcessor() { |
177 | @Override | |
178 | public boolean detectQuote(Node node) { | |
179 | if (node instanceof Element) { | |
180 | Element element = (Element) node; | |
181 | if (element.tagName().equals("i")) { | |
182 | return true; | |
183 | } | |
184 | } | |
185 | return false; | |
186 | } | |
187 | }; | |
3e62b034 | 188 | } |
b34d1f35 | 189 | |
3e62b034 NR |
190 | @Override |
191 | protected List<Element> getCommentCommentPosts(Document doc, | |
192 | Element container) { | |
193 | List<Element> commentElements = new ArrayList<Element>(); | |
194 | for (Element child : container.children()) { | |
195 | if (child.id().contains("commtree_")) { | |
196 | for (Element sub : child.children()) { | |
197 | if (sub.hasClass("comment")) { | |
198 | commentElements.add(sub); | |
199 | } | |
c9cffa91 | 200 | } |
b34d1f35 | 201 | } |
3e62b034 NR |
202 | } |
203 | ||
204 | return commentElements; | |
205 | } | |
b34d1f35 | 206 | |
3e62b034 NR |
207 | @Override |
208 | protected String getCommentId(Element post) { | |
209 | if (post.hasClass("hidden")) { | |
210 | return ""; | |
73785268 NR |
211 | } |
212 | ||
3e62b034 | 213 | return post.id(); |
73785268 NR |
214 | } |
215 | ||
216 | @Override | |
3e62b034 NR |
217 | protected String getCommentAuthor(Element post) { |
218 | if (post.hasClass("hidden")) { | |
219 | return ""; | |
220 | } | |
73785268 | 221 | |
3e62b034 NR |
222 | Element author = post.getElementsByClass("by").first(); |
223 | if (author != null) { | |
224 | return author.text(); | |
73785268 NR |
225 | } |
226 | ||
3e62b034 | 227 | return ""; |
73785268 NR |
228 | } |
229 | ||
3e62b034 NR |
230 | @Override |
231 | protected String getCommentTitle(Element post) { | |
232 | if (post.hasClass("hidden")) { | |
233 | return ""; | |
234 | } | |
27008a87 | 235 | |
3e62b034 NR |
236 | Element title = post.getElementsByClass("title").first(); |
237 | if (title != null) { | |
238 | return title.text(); | |
239 | } | |
27008a87 | 240 | |
3e62b034 NR |
241 | return ""; |
242 | } | |
243 | ||
244 | @Override | |
245 | protected String getCommentDate(Element post) { | |
246 | if (post.hasClass("hidden")) { | |
247 | return ""; | |
73785268 | 248 | } |
27008a87 | 249 | |
3e62b034 NR |
250 | Element date = post.getElementsByClass("otherdetails").first(); |
251 | if (date != null) { | |
252 | return date.text(); | |
253 | } | |
254 | ||
255 | return ""; | |
73785268 NR |
256 | } |
257 | ||
3e62b034 NR |
258 | @Override |
259 | protected Element getCommentContentElement(Element post) { | |
260 | if (post.hasClass("hidden")) { | |
261 | return null; | |
262 | } | |
27008a87 | 263 | |
3e62b034 | 264 | return post.getElementsByClass("commentBody").first(); |
27008a87 | 265 | } |
73785268 | 266 | |
3e62b034 NR |
267 | @Override |
268 | protected ElementProcessor getElementProcessorComment() { | |
269 | return new BasicElementProcessor() { | |
27008a87 NR |
270 | @Override |
271 | public String processText(String text) { | |
272 | while (text.startsWith(">")) { // comment in one-liners | |
273 | text = text.substring(1).trim(); | |
274 | } | |
73785268 | 275 | |
27008a87 | 276 | return text; |
73785268 | 277 | } |
73785268 | 278 | |
27008a87 NR |
279 | @Override |
280 | public boolean detectQuote(Node node) { | |
281 | if (node instanceof Element) { | |
282 | Element elementNode = (Element) node; | |
283 | if (elementNode.tagName().equals("blockquote") | |
284 | || elementNode.hasClass("quote") | |
285 | || (elementNode.tagName().equals("p") | |
286 | && elementNode.textNodes().size() == 1 && elementNode | |
287 | .textNodes().get(0).getWholeText() | |
288 | .startsWith(">"))) { | |
289 | return true; | |
290 | } | |
291 | } | |
73785268 | 292 | |
27008a87 NR |
293 | return false; |
294 | } | |
3e62b034 NR |
295 | }; |
296 | } | |
297 | ||
298 | private String getArticleDetailsReal(Element article) { | |
299 | Element detailsElement = article.getElementsByClass("details").first(); | |
300 | if (detailsElement != null) { | |
301 | return detailsElement.text(); | |
302 | } | |
303 | ||
304 | return ""; | |
73785268 NR |
305 | } |
306 | } |