Commit | Line | Data |
---|---|---|
73785268 NR |
1 | package be.nikiroo.gofetch.support; |
2 | ||
3 | import java.io.IOException; | |
73785268 | 4 | import java.net.URL; |
3e62b034 | 5 | import java.util.AbstractMap; |
73785268 NR |
6 | import java.util.ArrayList; |
7 | import java.util.List; | |
3e62b034 | 8 | import java.util.Map.Entry; |
73785268 | 9 | |
73785268 NR |
10 | import org.jsoup.nodes.Document; |
11 | import org.jsoup.nodes.Element; | |
27008a87 | 12 | import org.jsoup.nodes.Node; |
73785268 NR |
13 | import org.jsoup.select.Elements; |
14 | ||
70b18499 NR |
15 | /** |
16 | * Support <a href='https://slashdot.org/'>https://slashdot.org/</a>. | |
17 | * | |
18 | * @author niki | |
19 | */ | |
73785268 NR |
20 | public class Slashdot extends BasicSupport { |
21 | @Override | |
22 | public String getDescription() { | |
23 | return "Slashdot: News for nerds, stuff that matters!"; | |
24 | } | |
25 | ||
26 | @Override | |
3e62b034 NR |
27 | protected List<Entry<URL, String>> getUrls() throws IOException { |
28 | List<Entry<URL, String>> urls = new ArrayList<Entry<URL, String>>(); | |
29 | urls.add(new AbstractMap.SimpleEntry<URL, String>(new URL( | |
30 | "https://slashdot.org/"), "")); | |
31 | return urls; | |
32 | } | |
70b18499 | 33 | |
3e62b034 NR |
34 | @Override |
35 | protected List<Element> getArticles(Document doc) { | |
36 | return doc.getElementsByTag("header"); | |
37 | } | |
73785268 | 38 | |
3e62b034 NR |
39 | @Override |
40 | protected String getArticleId(Document doc, Element article) { | |
41 | Element title = article.getElementsByClass("story-title").first(); | |
42 | if (title != null) { | |
43 | String id = title.attr("id"); | |
73785268 NR |
44 | if (id.startsWith("title-")) { |
45 | id = id.substring("title-".length()); | |
46 | } | |
47 | ||
3e62b034 NR |
48 | return id; |
49 | } | |
50 | ||
51 | return ""; | |
52 | } | |
53 | ||
54 | @Override | |
55 | protected String getArticleTitle(Document doc, Element article) { | |
56 | Element title = article.getElementsByClass("story-title").first(); | |
57 | if (title != null) { | |
58 | return title.text(); | |
59 | } | |
60 | ||
61 | return ""; | |
62 | } | |
63 | ||
64 | @Override | |
65 | protected String getArticleAuthor(Document doc, Element article) { | |
66 | // details: "Posted by AUTHOR on DATE from the further-crackdown dept." | |
67 | String details = getArticleDetailsReal(article); | |
68 | int pos = details.indexOf(" on "); | |
69 | if (details.startsWith("Posted by ") && pos >= 0) { | |
70 | return details.substring("Posted by ".length(), pos).trim(); | |
71 | } | |
72 | ||
73 | return ""; | |
74 | } | |
75 | ||
76 | @Override | |
77 | protected String getArticleDate(Document doc, Element article) { | |
78 | // Do not try bad articles | |
79 | if (getArticleId(doc, article).isEmpty()) { | |
80 | return ""; | |
81 | } | |
82 | ||
83 | Element dateElement = doc.getElementsByTag("time").first(); | |
84 | if (dateElement != null) { | |
85 | String date = dateElement.text().trim(); | |
86 | if (date.startsWith("on ")) { | |
87 | date = date.substring("on ".length()); | |
88 | } | |
89 | ||
90 | return date; | |
91 | } | |
92 | ||
93 | return ""; | |
94 | } | |
95 | ||
96 | @Override | |
97 | protected String getArticleCategory(Document doc, Element article, | |
98 | String currentCategory) { | |
99 | Element categElement = doc.getElementsByClass("topic").first(); | |
100 | if (categElement != null) { | |
101 | return categElement.text(); | |
102 | } | |
103 | ||
104 | return ""; | |
105 | } | |
106 | ||
107 | @Override | |
108 | protected String getArticleDetails(Document doc, Element article) { | |
109 | // details: "Posted by AUTHOR on DATE from the further-crackdown dept." | |
110 | String details = getArticleDetailsReal(article); | |
111 | int pos = details.indexOf(" from the "); | |
112 | if (pos >= 0) { | |
113 | return details.substring(pos).trim(); | |
114 | } | |
115 | ||
116 | return ""; | |
117 | } | |
118 | ||
119 | @Override | |
120 | protected String getArticleIntUrl(Document doc, Element article) { | |
121 | Element title = article.getElementsByClass("story-title").first(); | |
122 | if (title != null) { | |
73785268 | 123 | Elements links = title.getElementsByTag("a"); |
73785268 | 124 | if (links.size() > 0) { |
3e62b034 | 125 | return links.get(0).absUrl("href"); |
73785268 | 126 | } |
3e62b034 NR |
127 | } |
128 | return ""; | |
129 | } | |
130 | ||
131 | @Override | |
132 | protected String getArticleExtUrl(Document doc, Element article) { | |
133 | Element title = article.getElementsByClass("story-title").first(); | |
134 | if (title != null) { | |
135 | Elements links = title.getElementsByTag("a"); | |
73785268 | 136 | if (links.size() > 1) { |
3e62b034 | 137 | return links.get(1).absUrl("href"); |
73785268 | 138 | } |
3e62b034 NR |
139 | } |
140 | return ""; | |
141 | } | |
73785268 | 142 | |
3e62b034 NR |
143 | @Override |
144 | protected String getArticleContent(Document doc, Element article) { | |
145 | Element contentElement = doc // | |
146 | .getElementById("text-" + getArticleId(doc, article)); | |
147 | if (contentElement != null) { | |
148 | return contentElement.text(); | |
149 | } | |
73785268 | 150 | |
3e62b034 NR |
151 | return ""; |
152 | } | |
b34d1f35 | 153 | |
3e62b034 NR |
154 | @Override |
155 | protected Element getFullArticle(Document doc) { | |
156 | return null; | |
157 | } | |
73785268 | 158 | |
3e62b034 NR |
159 | @Override |
160 | protected List<Element> getFullArticleCommentPosts(Document doc, URL intUrl) { | |
161 | List<Element> commentElements = new ArrayList<Element>(); | |
162 | Element listing = doc.getElementById("commentlisting"); | |
163 | if (listing != null) { | |
164 | for (Element commentElement : listing.children()) { | |
165 | if (commentElement.hasClass("comment")) { | |
166 | commentElements.add(commentElement); | |
167 | } | |
b34d1f35 | 168 | } |
3e62b034 NR |
169 | } |
170 | ||
171 | return commentElements; | |
172 | } | |
173 | ||
174 | @Override | |
175 | protected ElementProcessor getElementProcessorFullArticle() { | |
176 | return null; | |
177 | } | |
b34d1f35 | 178 | |
3e62b034 NR |
179 | @Override |
180 | protected List<Element> getCommentCommentPosts(Document doc, | |
181 | Element container) { | |
182 | List<Element> commentElements = new ArrayList<Element>(); | |
183 | for (Element child : container.children()) { | |
184 | if (child.id().contains("commtree_")) { | |
185 | for (Element sub : child.children()) { | |
186 | if (sub.hasClass("comment")) { | |
187 | commentElements.add(sub); | |
188 | } | |
c9cffa91 | 189 | } |
b34d1f35 | 190 | } |
3e62b034 NR |
191 | } |
192 | ||
193 | return commentElements; | |
194 | } | |
b34d1f35 | 195 | |
3e62b034 NR |
196 | @Override |
197 | protected String getCommentId(Element post) { | |
198 | if (post.hasClass("hidden")) { | |
199 | return ""; | |
73785268 NR |
200 | } |
201 | ||
3e62b034 | 202 | return post.id(); |
73785268 NR |
203 | } |
204 | ||
205 | @Override | |
3e62b034 NR |
206 | protected String getCommentAuthor(Element post) { |
207 | if (post.hasClass("hidden")) { | |
208 | return ""; | |
209 | } | |
73785268 | 210 | |
3e62b034 NR |
211 | Element author = post.getElementsByClass("by").first(); |
212 | if (author != null) { | |
213 | return author.text(); | |
73785268 NR |
214 | } |
215 | ||
3e62b034 | 216 | return ""; |
73785268 NR |
217 | } |
218 | ||
3e62b034 NR |
219 | @Override |
220 | protected String getCommentTitle(Element post) { | |
221 | if (post.hasClass("hidden")) { | |
222 | return ""; | |
223 | } | |
27008a87 | 224 | |
3e62b034 NR |
225 | Element title = post.getElementsByClass("title").first(); |
226 | if (title != null) { | |
227 | return title.text(); | |
228 | } | |
27008a87 | 229 | |
3e62b034 NR |
230 | return ""; |
231 | } | |
232 | ||
233 | @Override | |
234 | protected String getCommentDate(Element post) { | |
235 | if (post.hasClass("hidden")) { | |
236 | return ""; | |
73785268 | 237 | } |
27008a87 | 238 | |
3e62b034 NR |
239 | Element date = post.getElementsByClass("otherdetails").first(); |
240 | if (date != null) { | |
241 | return date.text(); | |
242 | } | |
243 | ||
244 | return ""; | |
73785268 NR |
245 | } |
246 | ||
3e62b034 NR |
247 | @Override |
248 | protected Element getCommentContentElement(Element post) { | |
249 | if (post.hasClass("hidden")) { | |
250 | return null; | |
251 | } | |
27008a87 | 252 | |
3e62b034 | 253 | return post.getElementsByClass("commentBody").first(); |
27008a87 | 254 | } |
73785268 | 255 | |
3e62b034 NR |
256 | @Override |
257 | protected ElementProcessor getElementProcessorComment() { | |
258 | return new BasicElementProcessor() { | |
27008a87 NR |
259 | @Override |
260 | public String processText(String text) { | |
261 | while (text.startsWith(">")) { // comment in one-liners | |
262 | text = text.substring(1).trim(); | |
263 | } | |
73785268 | 264 | |
27008a87 | 265 | return text; |
73785268 | 266 | } |
73785268 | 267 | |
27008a87 NR |
268 | @Override |
269 | public boolean detectQuote(Node node) { | |
270 | if (node instanceof Element) { | |
271 | Element elementNode = (Element) node; | |
272 | if (elementNode.tagName().equals("blockquote") | |
273 | || elementNode.hasClass("quote") | |
274 | || (elementNode.tagName().equals("p") | |
275 | && elementNode.textNodes().size() == 1 && elementNode | |
276 | .textNodes().get(0).getWholeText() | |
277 | .startsWith(">"))) { | |
278 | return true; | |
279 | } | |
280 | } | |
73785268 | 281 | |
27008a87 NR |
282 | return false; |
283 | } | |
3e62b034 NR |
284 | }; |
285 | } | |
286 | ||
287 | private String getArticleDetailsReal(Element article) { | |
288 | Element detailsElement = article.getElementsByClass("details").first(); | |
289 | if (detailsElement != null) { | |
290 | return detailsElement.text(); | |
291 | } | |
292 | ||
293 | return ""; | |
73785268 NR |
294 | } |
295 | } |