Commit | Line | Data |
---|---|---|
73785268 NR |
1 | package be.nikiroo.gofetch.support; |
2 | ||
3 | import java.io.IOException; | |
73785268 | 4 | import java.net.URL; |
3e62b034 | 5 | import java.util.AbstractMap; |
73785268 NR |
6 | import java.util.ArrayList; |
7 | import java.util.List; | |
3e62b034 | 8 | import java.util.Map.Entry; |
73785268 | 9 | |
73785268 NR |
10 | import org.jsoup.nodes.Document; |
11 | import org.jsoup.nodes.Element; | |
27008a87 | 12 | import org.jsoup.nodes.Node; |
73785268 NR |
13 | import org.jsoup.select.Elements; |
14 | ||
70b18499 NR |
15 | /** |
16 | * Support <a href='https://slashdot.org/'>https://slashdot.org/</a>. | |
17 | * | |
18 | * @author niki | |
19 | */ | |
73785268 NR |
20 | public class Slashdot extends BasicSupport { |
21 | @Override | |
22 | public String getDescription() { | |
23 | return "Slashdot: News for nerds, stuff that matters!"; | |
24 | } | |
25 | ||
26 | @Override | |
3e62b034 NR |
27 | protected List<Entry<URL, String>> getUrls() throws IOException { |
28 | List<Entry<URL, String>> urls = new ArrayList<Entry<URL, String>>(); | |
29 | urls.add(new AbstractMap.SimpleEntry<URL, String>(new URL( | |
30 | "https://slashdot.org/"), "")); | |
31 | return urls; | |
32 | } | |
70b18499 | 33 | |
3e62b034 NR |
34 | @Override |
35 | protected List<Element> getArticles(Document doc) { | |
36 | return doc.getElementsByTag("header"); | |
37 | } | |
73785268 | 38 | |
3e62b034 NR |
39 | @Override |
40 | protected String getArticleId(Document doc, Element article) { | |
41 | Element title = article.getElementsByClass("story-title").first(); | |
42 | if (title != null) { | |
43 | String id = title.attr("id"); | |
73785268 NR |
44 | if (id.startsWith("title-")) { |
45 | id = id.substring("title-".length()); | |
46 | } | |
47 | ||
64a785f6 NR |
48 | while (id.length() < 10) { |
49 | id = "0" + id; | |
50 | } | |
51 | ||
3e62b034 NR |
52 | return id; |
53 | } | |
54 | ||
55 | return ""; | |
56 | } | |
57 | ||
58 | @Override | |
59 | protected String getArticleTitle(Document doc, Element article) { | |
60 | Element title = article.getElementsByClass("story-title").first(); | |
61 | if (title != null) { | |
62 | return title.text(); | |
63 | } | |
64 | ||
65 | return ""; | |
66 | } | |
67 | ||
68 | @Override | |
69 | protected String getArticleAuthor(Document doc, Element article) { | |
70 | // details: "Posted by AUTHOR on DATE from the further-crackdown dept." | |
71 | String details = getArticleDetailsReal(article); | |
72 | int pos = details.indexOf(" on "); | |
73 | if (details.startsWith("Posted by ") && pos >= 0) { | |
74 | return details.substring("Posted by ".length(), pos).trim(); | |
75 | } | |
76 | ||
77 | return ""; | |
78 | } | |
79 | ||
80 | @Override | |
81 | protected String getArticleDate(Document doc, Element article) { | |
82 | // Do not try bad articles | |
83 | if (getArticleId(doc, article).isEmpty()) { | |
84 | return ""; | |
85 | } | |
86 | ||
87 | Element dateElement = doc.getElementsByTag("time").first(); | |
88 | if (dateElement != null) { | |
89 | String date = dateElement.text().trim(); | |
90 | if (date.startsWith("on ")) { | |
91 | date = date.substring("on ".length()); | |
92 | } | |
93 | ||
94 | return date; | |
95 | } | |
96 | ||
97 | return ""; | |
98 | } | |
99 | ||
100 | @Override | |
101 | protected String getArticleCategory(Document doc, Element article, | |
102 | String currentCategory) { | |
103 | Element categElement = doc.getElementsByClass("topic").first(); | |
104 | if (categElement != null) { | |
105 | return categElement.text(); | |
106 | } | |
107 | ||
108 | return ""; | |
109 | } | |
110 | ||
111 | @Override | |
112 | protected String getArticleDetails(Document doc, Element article) { | |
113 | // details: "Posted by AUTHOR on DATE from the further-crackdown dept." | |
114 | String details = getArticleDetailsReal(article); | |
115 | int pos = details.indexOf(" from the "); | |
116 | if (pos >= 0) { | |
117 | return details.substring(pos).trim(); | |
118 | } | |
119 | ||
120 | return ""; | |
121 | } | |
122 | ||
123 | @Override | |
124 | protected String getArticleIntUrl(Document doc, Element article) { | |
125 | Element title = article.getElementsByClass("story-title").first(); | |
126 | if (title != null) { | |
73785268 | 127 | Elements links = title.getElementsByTag("a"); |
73785268 | 128 | if (links.size() > 0) { |
3e62b034 | 129 | return links.get(0).absUrl("href"); |
73785268 | 130 | } |
3e62b034 NR |
131 | } |
132 | return ""; | |
133 | } | |
134 | ||
135 | @Override | |
136 | protected String getArticleExtUrl(Document doc, Element article) { | |
137 | Element title = article.getElementsByClass("story-title").first(); | |
138 | if (title != null) { | |
139 | Elements links = title.getElementsByTag("a"); | |
73785268 | 140 | if (links.size() > 1) { |
3e62b034 | 141 | return links.get(1).absUrl("href"); |
73785268 | 142 | } |
3e62b034 NR |
143 | } |
144 | return ""; | |
145 | } | |
73785268 | 146 | |
3e62b034 NR |
147 | @Override |
148 | protected String getArticleContent(Document doc, Element article) { | |
149 | Element contentElement = doc // | |
150 | .getElementById("text-" + getArticleId(doc, article)); | |
151 | if (contentElement != null) { | |
152 | return contentElement.text(); | |
153 | } | |
73785268 | 154 | |
3e62b034 NR |
155 | return ""; |
156 | } | |
b34d1f35 | 157 | |
3e62b034 NR |
158 | @Override |
159 | protected Element getFullArticle(Document doc) { | |
160 | return null; | |
161 | } | |
73785268 | 162 | |
3e62b034 NR |
163 | @Override |
164 | protected List<Element> getFullArticleCommentPosts(Document doc, URL intUrl) { | |
165 | List<Element> commentElements = new ArrayList<Element>(); | |
166 | Element listing = doc.getElementById("commentlisting"); | |
167 | if (listing != null) { | |
168 | for (Element commentElement : listing.children()) { | |
169 | if (commentElement.hasClass("comment")) { | |
170 | commentElements.add(commentElement); | |
171 | } | |
b34d1f35 | 172 | } |
3e62b034 NR |
173 | } |
174 | ||
175 | return commentElements; | |
176 | } | |
177 | ||
178 | @Override | |
179 | protected ElementProcessor getElementProcessorFullArticle() { | |
180 | return null; | |
181 | } | |
b34d1f35 | 182 | |
3e62b034 NR |
183 | @Override |
184 | protected List<Element> getCommentCommentPosts(Document doc, | |
185 | Element container) { | |
186 | List<Element> commentElements = new ArrayList<Element>(); | |
187 | for (Element child : container.children()) { | |
188 | if (child.id().contains("commtree_")) { | |
189 | for (Element sub : child.children()) { | |
190 | if (sub.hasClass("comment")) { | |
191 | commentElements.add(sub); | |
192 | } | |
c9cffa91 | 193 | } |
b34d1f35 | 194 | } |
3e62b034 NR |
195 | } |
196 | ||
197 | return commentElements; | |
198 | } | |
b34d1f35 | 199 | |
3e62b034 NR |
200 | @Override |
201 | protected String getCommentId(Element post) { | |
202 | if (post.hasClass("hidden")) { | |
203 | return ""; | |
73785268 NR |
204 | } |
205 | ||
3e62b034 | 206 | return post.id(); |
73785268 NR |
207 | } |
208 | ||
209 | @Override | |
3e62b034 NR |
210 | protected String getCommentAuthor(Element post) { |
211 | if (post.hasClass("hidden")) { | |
212 | return ""; | |
213 | } | |
73785268 | 214 | |
3e62b034 NR |
215 | Element author = post.getElementsByClass("by").first(); |
216 | if (author != null) { | |
217 | return author.text(); | |
73785268 NR |
218 | } |
219 | ||
3e62b034 | 220 | return ""; |
73785268 NR |
221 | } |
222 | ||
3e62b034 NR |
223 | @Override |
224 | protected String getCommentTitle(Element post) { | |
225 | if (post.hasClass("hidden")) { | |
226 | return ""; | |
227 | } | |
27008a87 | 228 | |
3e62b034 NR |
229 | Element title = post.getElementsByClass("title").first(); |
230 | if (title != null) { | |
231 | return title.text(); | |
232 | } | |
27008a87 | 233 | |
3e62b034 NR |
234 | return ""; |
235 | } | |
236 | ||
237 | @Override | |
238 | protected String getCommentDate(Element post) { | |
239 | if (post.hasClass("hidden")) { | |
240 | return ""; | |
73785268 | 241 | } |
27008a87 | 242 | |
3e62b034 NR |
243 | Element date = post.getElementsByClass("otherdetails").first(); |
244 | if (date != null) { | |
245 | return date.text(); | |
246 | } | |
247 | ||
248 | return ""; | |
73785268 NR |
249 | } |
250 | ||
3e62b034 NR |
251 | @Override |
252 | protected Element getCommentContentElement(Element post) { | |
253 | if (post.hasClass("hidden")) { | |
254 | return null; | |
255 | } | |
27008a87 | 256 | |
3e62b034 | 257 | return post.getElementsByClass("commentBody").first(); |
27008a87 | 258 | } |
73785268 | 259 | |
3e62b034 NR |
260 | @Override |
261 | protected ElementProcessor getElementProcessorComment() { | |
262 | return new BasicElementProcessor() { | |
27008a87 NR |
263 | @Override |
264 | public String processText(String text) { | |
265 | while (text.startsWith(">")) { // comment in one-liners | |
266 | text = text.substring(1).trim(); | |
267 | } | |
73785268 | 268 | |
27008a87 | 269 | return text; |
73785268 | 270 | } |
73785268 | 271 | |
27008a87 NR |
272 | @Override |
273 | public boolean detectQuote(Node node) { | |
274 | if (node instanceof Element) { | |
275 | Element elementNode = (Element) node; | |
276 | if (elementNode.tagName().equals("blockquote") | |
277 | || elementNode.hasClass("quote") | |
278 | || (elementNode.tagName().equals("p") | |
279 | && elementNode.textNodes().size() == 1 && elementNode | |
280 | .textNodes().get(0).getWholeText() | |
281 | .startsWith(">"))) { | |
282 | return true; | |
283 | } | |
284 | } | |
73785268 | 285 | |
27008a87 NR |
286 | return false; |
287 | } | |
3e62b034 NR |
288 | }; |
289 | } | |
290 | ||
291 | private String getArticleDetailsReal(Element article) { | |
292 | Element detailsElement = article.getElementsByClass("details").first(); | |
293 | if (detailsElement != null) { | |
294 | return detailsElement.text(); | |
295 | } | |
296 | ||
297 | return ""; | |
73785268 NR |
298 | } |
299 | } |