Commit | Line | Data |
---|---|---|
b19b3632 NR |
1 | package be.nikiroo.gofetch.support; |
2 | ||
b19b3632 | 3 | import java.io.IOException; |
b19b3632 | 4 | import java.net.URL; |
aacd7f07 | 5 | import java.text.SimpleDateFormat; |
b19b3632 NR |
6 | import java.util.AbstractMap; |
7 | import java.util.ArrayList; | |
aacd7f07 NR |
8 | import java.util.Date; |
9 | import java.util.HashMap; | |
b19b3632 | 10 | import java.util.LinkedList; |
aacd7f07 | 11 | import java.util.List; |
b19b3632 | 12 | import java.util.Map; |
aacd7f07 | 13 | import java.util.Map.Entry; |
b19b3632 NR |
14 | |
15 | import org.jsoup.nodes.Document; | |
16 | import org.jsoup.nodes.Element; | |
b19b3632 NR |
17 | import org.jsoup.select.Elements; |
18 | ||
aacd7f07 NR |
19 | import be.nikiroo.gofetch.data.Comment; |
20 | import be.nikiroo.gofetch.data.Story; | |
21 | ||
b19b3632 NR |
22 | /** |
23 | * Support <a href="https://www.reddit.com/">https://www.reddit.com/</a>. | |
24 | * | |
25 | * @author niki | |
26 | */ | |
27 | public class Reddit extends BasicSupport { | |
28 | @Override | |
29 | public String getDescription() { | |
30 | return "Reddit: The front page of the internet"; | |
31 | } | |
32 | ||
33 | @Override | |
34 | protected List<Entry<URL, String>> getUrls() throws IOException { | |
35 | List<Entry<URL, String>> urls = new ArrayList<Entry<URL, String>>(); | |
36 | String base = "https://www.reddit.com/r/"; | |
aacd7f07 NR |
37 | urls.add(new AbstractMap.SimpleEntry<URL, String>(new URL(base |
38 | + "linux_gaming" + "/new/"), "linux_gaming")); | |
b19b3632 NR |
39 | |
40 | return urls; | |
41 | } | |
42 | ||
43 | @Override | |
44 | protected List<Element> getArticles(Document doc) { | |
1197ec1a NR |
45 | List<Element> list = doc.getElementsByClass("thing"); |
46 | if (list.isEmpty()) { | |
47 | list = doc.getElementsByClass("Post"); | |
48 | } | |
49 | if (list.isEmpty()) { | |
50 | list = doc.getElementsByClass("scrollerItem"); | |
51 | } | |
aacd7f07 | 52 | |
1197ec1a | 53 | return list; |
b19b3632 NR |
54 | } |
55 | ||
56 | @Override | |
57 | protected String getArticleId(Document doc, Element article) { | |
7273fd58 NR |
58 | String date = getArticleDate(doc, article); |
59 | String title = getArticleTitle(doc, article); | |
aacd7f07 | 60 | |
7273fd58 NR |
61 | String id = (date + "_" + title).replaceAll("[^a-zA-Z0-9_-]", "_"); |
62 | if (id.length() > 40) { | |
63 | id = id.substring(0, 40); | |
64 | } | |
aacd7f07 | 65 | |
7273fd58 | 66 | return id; |
b19b3632 NR |
67 | } |
68 | ||
69 | @Override | |
70 | protected String getArticleTitle(Document doc, Element article) { | |
aacd7f07 NR |
71 | Elements els = article.getElementsByAttributeValue("data-event-action", |
72 | "title"); | |
1197ec1a NR |
73 | if (els == null || els.isEmpty()) { |
74 | els = article.getElementsByTag("h2"); | |
75 | } | |
aacd7f07 | 76 | |
1197ec1a | 77 | return els.first().text().trim(); |
b19b3632 | 78 | } |
aacd7f07 | 79 | |
b19b3632 NR |
80 | @Override |
81 | protected String getArticleAuthor(Document doc, Element article) { | |
757c24ee NR |
82 | String user = article |
83 | .getElementsByAttributeValueStarting("href", "/user/").text() | |
84 | .trim(); | |
85 | if (user.startsWith("/u")) | |
86 | user = user.substring(3); | |
87 | return user; | |
b19b3632 NR |
88 | } |
89 | ||
90 | @Override | |
91 | protected String getArticleDate(Document doc, Element article) { | |
1197ec1a NR |
92 | Element el = article.getElementsByClass("live-timestamp").first(); |
93 | if (el == null) { | |
aacd7f07 NR |
94 | el = article.getElementsByAttributeValue("data-click-id", |
95 | "timestamp").first(); | |
1197ec1a | 96 | } |
aacd7f07 | 97 | |
1197ec1a | 98 | String dateAgo = el.text().trim(); |
757c24ee | 99 | return new SimpleDateFormat("yyyy-MM-dd") // _HH-mm |
aacd7f07 | 100 | .format(getDate(dateAgo)); |
b19b3632 NR |
101 | } |
102 | ||
103 | @Override | |
104 | protected String getArticleCategory(Document doc, Element article, | |
105 | String currentCategory) { | |
aacd7f07 NR |
106 | Elements categEls = article.getElementsByAttributeValueStarting("href", |
107 | "/r/" + currentCategory + "/search=?q=flair_name"); | |
108 | ||
b19b3632 | 109 | if (categEls.size() > 0) { |
aacd7f07 | 110 | return currentCategory + ", " + categEls.first().text().trim(); |
b19b3632 | 111 | } |
aacd7f07 | 112 | |
b19b3632 NR |
113 | return currentCategory; |
114 | } | |
115 | ||
116 | @Override | |
117 | protected String getArticleDetails(Document doc, Element article) { | |
118 | return ""; | |
119 | } | |
120 | ||
121 | @Override | |
122 | protected String getArticleIntUrl(Document doc, Element article) { | |
1197ec1a NR |
123 | String url = article.absUrl("data-permalink"); |
124 | if (url == null || url.isEmpty()) { | |
aacd7f07 NR |
125 | url = article |
126 | .getElementsByAttributeValue("data-click-id", "timestamp") | |
127 | .first().absUrl("href"); | |
1197ec1a | 128 | } |
aacd7f07 | 129 | |
1197ec1a | 130 | return url; |
b19b3632 NR |
131 | } |
132 | ||
133 | @Override | |
134 | protected String getArticleExtUrl(Document doc, Element article) { | |
aacd7f07 NR |
135 | Elements els = article.getElementsByAttributeValue("data-event-action", |
136 | "title"); | |
1197ec1a | 137 | if (els == null || els.isEmpty()) { |
aacd7f07 | 138 | els = article.getElementsByAttributeValue("data-click-id", "body"); |
1197ec1a | 139 | } |
aacd7f07 | 140 | |
1197ec1a | 141 | Element url = els.first(); |
b19b3632 NR |
142 | if (!url.attr("href").trim().startsWith("/")) { |
143 | return url.absUrl("href"); | |
144 | } | |
aacd7f07 | 145 | |
b19b3632 NR |
146 | return ""; |
147 | } | |
148 | ||
149 | @Override | |
150 | protected String getArticleContent(Document doc, Element article) { | |
60acdaf9 | 151 | Elements els = article.getElementsByClass("h2"); |
1197ec1a NR |
152 | if (els != null && !els.isEmpty()) { |
153 | return els.first().text().trim(); | |
154 | } | |
aacd7f07 | 155 | |
b19b3632 NR |
156 | return ""; |
157 | } | |
158 | ||
159 | @Override | |
160 | protected Element getFullArticle(Document doc) { | |
aacd7f07 NR |
161 | Element element = doc.getElementsByAttributeValue("data-click-id", |
162 | "body").first(); | |
60acdaf9 NR |
163 | if (element == null) { |
164 | element = doc.getElementsByClass("ckueCN").first(); | |
165 | } | |
aacd7f07 | 166 | |
60acdaf9 | 167 | return element; |
b19b3632 NR |
168 | } |
169 | ||
170 | @Override | |
171 | protected ElementProcessor getElementProcessorFullArticle() { | |
172 | return new BasicElementProcessor(); | |
173 | } | |
174 | ||
175 | @Override | |
176 | protected List<Element> getFullArticleCommentPosts(Document doc, URL intUrl) { | |
60acdaf9 NR |
177 | Elements posts = doc.getElementsByClass("jHfOJm"); |
178 | if (posts.isEmpty()) { | |
179 | posts = doc.getElementsByClass("eCeBkc"); | |
180 | } | |
aacd7f07 NR |
181 | if (posts.isEmpty()) { |
182 | posts = doc.getElementsByClass("gxtxxZ"); | |
183 | } | |
184 | ||
60acdaf9 | 185 | return posts; |
b19b3632 NR |
186 | } |
187 | ||
188 | @Override | |
189 | protected List<Element> getCommentCommentPosts(Document doc, | |
190 | Element container) { | |
ff49bc76 | 191 | |
b19b3632 NR |
192 | List<Element> elements = new LinkedList<Element>(); |
193 | for (Element el : container.children()) { | |
ff49bc76 NR |
194 | // elements.addAll(el.getElementsByClass("jHfOJm")); |
195 | elements.addAll(el.getElementsByClass("emJXdb")); | |
b19b3632 | 196 | } |
aacd7f07 | 197 | |
b19b3632 NR |
198 | return elements; |
199 | } | |
200 | ||
201 | @Override | |
202 | protected String getCommentId(Element post) { | |
203 | int level = 1; | |
204 | Elements els = post.getElementsByClass("imyGpC"); | |
aacd7f07 NR |
205 | |
206 | if (!els.isEmpty()) { | |
207 | String l = els.first().text().trim().replace("level ", ""); | |
b19b3632 NR |
208 | try { |
209 | level = Integer.parseInt(l); | |
aacd7f07 | 210 | } catch (NumberFormatException e) { |
b19b3632 NR |
211 | } |
212 | } | |
aacd7f07 | 213 | |
b19b3632 NR |
214 | return Integer.toString(level); |
215 | } | |
216 | ||
217 | @Override | |
218 | protected String getCommentAuthor(Element post) { | |
219 | // Since we have no title, we switch with author | |
220 | return ""; | |
221 | } | |
222 | ||
223 | @Override | |
224 | protected String getCommentTitle(Element post) { | |
225 | // Since we have no title, we switch with author | |
aacd7f07 NR |
226 | |
227 | Element authorEl = post.getElementsByClass("RVnoX").first(); | |
228 | if (authorEl == null) | |
229 | authorEl = post.getElementsByClass("kzePTH").first(); | |
230 | if (authorEl == null) | |
231 | authorEl = post.getElementsByClass("jczTlv").first(); | |
232 | ||
233 | if (authorEl != null) | |
234 | return authorEl.text().trim(); | |
235 | ||
b19b3632 NR |
236 | return ""; |
237 | } | |
238 | ||
239 | @Override | |
240 | protected String getCommentDate(Element post) { | |
aacd7f07 NR |
241 | Element elAgo = post.getElementsByClass("hJDlLH").first(); |
242 | if (elAgo == null) | |
243 | elAgo = post.getElementsByClass("hDplaG").first(); | |
244 | ||
245 | if (elAgo != null) { | |
246 | String dateAgo = elAgo.text().trim(); | |
247 | return new SimpleDateFormat("yyyy-MM-dd_HH-mm") | |
248 | .format(getDate(dateAgo)); | |
249 | } | |
250 | ||
251 | return ""; | |
b19b3632 NR |
252 | } |
253 | ||
254 | @Override | |
255 | protected Element getCommentContentElement(Element post) { | |
aacd7f07 | 256 | return post.getElementsByClass("ckueCN").first(); |
b19b3632 NR |
257 | } |
258 | ||
259 | @Override | |
260 | protected ElementProcessor getElementProcessorComment() { | |
261 | return new BasicElementProcessor(); | |
262 | } | |
aacd7f07 | 263 | |
b19b3632 NR |
264 | @Override |
265 | public void fetch(Story story) throws IOException { | |
266 | super.fetch(story); | |
aacd7f07 | 267 | |
b19b3632 | 268 | List<Comment> comments = new LinkedList<Comment>(); |
aacd7f07 NR |
269 | Map<Integer, Comment> lastOfLevel = new HashMap<Integer, Comment>(); |
270 | ||
ff49bc76 NR |
271 | if (!story.getComments().isEmpty()) { |
272 | // comments are saved under a main ID (which is a copy of comment 1) | |
273 | // TODO: fix the cause instead of working around it here | |
274 | for (Comment c : story.getComments().get(0)) { | |
275 | int level = Integer.parseInt(c.getId()); | |
276 | lastOfLevel.put(level, c); | |
277 | if (level <= 1) { | |
b19b3632 | 278 | comments.add(c); |
ff49bc76 NR |
279 | } else { |
280 | Comment parent = lastOfLevel.get(level - 1); | |
281 | if (parent != null) { | |
282 | parent.add(c); | |
283 | } else { | |
284 | // bad data | |
285 | comments.add(c); | |
286 | } | |
b19b3632 NR |
287 | } |
288 | } | |
289 | } | |
aacd7f07 | 290 | |
b19b3632 NR |
291 | story.setComments(comments); |
292 | } | |
aacd7f07 | 293 | |
60acdaf9 NR |
294 | // 2 hours ago -> 18/10/2018 21:00 |
295 | private Date getDate(String dateAgo) { | |
296 | int h = 0; | |
297 | if (dateAgo.endsWith("hour ago")) { | |
298 | h = 1; | |
299 | } else if (dateAgo.endsWith("hours ago")) { | |
300 | dateAgo = dateAgo.replace("hours ago", "").trim(); | |
301 | h = Integer.parseInt(dateAgo); | |
302 | } else if (dateAgo.endsWith("day ago")) { | |
303 | h = 24; | |
304 | } else if (dateAgo.endsWith("days ago")) { | |
305 | dateAgo = dateAgo.replace("days ago", "").trim(); | |
306 | h = Integer.parseInt(dateAgo) * 24; | |
307 | } | |
aacd7f07 NR |
308 | |
309 | long now = new Date().getTime(); // in ms since 1970 | |
310 | now = now / (1000l * 60l * 60l); // in hours since 1970 | |
311 | long then = now - h; // in hours since 1970 | |
60acdaf9 | 312 | then = then * (1000l * 60l * 60l); // in ms since 1970 |
aacd7f07 | 313 | |
60acdaf9 NR |
314 | return new Date(then); |
315 | } | |
b19b3632 | 316 | } |