Fix reddit changing IDs + prep version 3.1.1
[gofetch.git] / src / be / nikiroo / gofetch / support / Reddit.java
CommitLineData
b19b3632
NR
1package be.nikiroo.gofetch.support;
2
b19b3632 3import java.io.IOException;
b19b3632 4import java.net.URL;
aacd7f07 5import java.text.SimpleDateFormat;
b19b3632
NR
6import java.util.AbstractMap;
7import java.util.ArrayList;
aacd7f07
NR
8import java.util.Date;
9import java.util.HashMap;
b19b3632 10import java.util.LinkedList;
aacd7f07 11import java.util.List;
b19b3632 12import java.util.Map;
aacd7f07 13import java.util.Map.Entry;
b19b3632
NR
14
15import org.jsoup.nodes.Document;
16import org.jsoup.nodes.Element;
b19b3632
NR
17import org.jsoup.select.Elements;
18
aacd7f07
NR
19import be.nikiroo.gofetch.data.Comment;
20import be.nikiroo.gofetch.data.Story;
21
b19b3632
NR
22/**
23 * Support <a href="https://www.reddit.com/">https://www.reddit.com/</a>.
24 *
25 * @author niki
26 */
27public class Reddit extends BasicSupport {
28 @Override
29 public String getDescription() {
30 return "Reddit: The front page of the internet";
31 }
32
33 @Override
34 protected List<Entry<URL, String>> getUrls() throws IOException {
35 List<Entry<URL, String>> urls = new ArrayList<Entry<URL, String>>();
36 String base = "https://www.reddit.com/r/";
aacd7f07
NR
37 urls.add(new AbstractMap.SimpleEntry<URL, String>(new URL(base
38 + "linux_gaming" + "/new/"), "linux_gaming"));
b19b3632
NR
39
40 return urls;
41 }
42
43 @Override
44 protected List<Element> getArticles(Document doc) {
1197ec1a
NR
45 List<Element> list = doc.getElementsByClass("thing");
46 if (list.isEmpty()) {
47 list = doc.getElementsByClass("Post");
48 }
49 if (list.isEmpty()) {
50 list = doc.getElementsByClass("scrollerItem");
51 }
aacd7f07 52
1197ec1a 53 return list;
b19b3632
NR
54 }
55
56 @Override
57 protected String getArticleId(Document doc, Element article) {
7273fd58
NR
58 String date = getArticleDate(doc, article);
59 String title = getArticleTitle(doc, article);
aacd7f07 60
7273fd58
NR
61 String id = (date + "_" + title).replaceAll("[^a-zA-Z0-9_-]", "_");
62 if (id.length() > 40) {
63 id = id.substring(0, 40);
64 }
aacd7f07 65
7273fd58 66 return id;
b19b3632
NR
67 }
68
69 @Override
70 protected String getArticleTitle(Document doc, Element article) {
aacd7f07
NR
71 Elements els = article.getElementsByAttributeValue("data-event-action",
72 "title");
1197ec1a
NR
73 if (els == null || els.isEmpty()) {
74 els = article.getElementsByTag("h2");
75 }
aacd7f07 76
1197ec1a 77 return els.first().text().trim();
b19b3632 78 }
aacd7f07 79
b19b3632
NR
80 @Override
81 protected String getArticleAuthor(Document doc, Element article) {
aacd7f07
NR
82 return article.getElementsByAttributeValueStarting("href", "/user/")
83 .text().trim();
b19b3632
NR
84 }
85
86 @Override
87 protected String getArticleDate(Document doc, Element article) {
1197ec1a
NR
88 Element el = article.getElementsByClass("live-timestamp").first();
89 if (el == null) {
aacd7f07
NR
90 el = article.getElementsByAttributeValue("data-click-id",
91 "timestamp").first();
1197ec1a 92 }
aacd7f07 93
1197ec1a 94 String dateAgo = el.text().trim();
aacd7f07
NR
95 return new SimpleDateFormat("yyyy-MM-dd_HH-mm")
96 .format(getDate(dateAgo));
b19b3632
NR
97 }
98
99 @Override
100 protected String getArticleCategory(Document doc, Element article,
101 String currentCategory) {
aacd7f07
NR
102 Elements categEls = article.getElementsByAttributeValueStarting("href",
103 "/r/" + currentCategory + "/search=?q=flair_name");
104
b19b3632 105 if (categEls.size() > 0) {
aacd7f07 106 return currentCategory + ", " + categEls.first().text().trim();
b19b3632 107 }
aacd7f07 108
b19b3632
NR
109 return currentCategory;
110 }
111
112 @Override
113 protected String getArticleDetails(Document doc, Element article) {
114 return "";
115 }
116
117 @Override
118 protected String getArticleIntUrl(Document doc, Element article) {
1197ec1a
NR
119 String url = article.absUrl("data-permalink");
120 if (url == null || url.isEmpty()) {
aacd7f07
NR
121 url = article
122 .getElementsByAttributeValue("data-click-id", "timestamp")
123 .first().absUrl("href");
1197ec1a 124 }
aacd7f07 125
1197ec1a 126 return url;
b19b3632
NR
127 }
128
129 @Override
130 protected String getArticleExtUrl(Document doc, Element article) {
aacd7f07
NR
131 Elements els = article.getElementsByAttributeValue("data-event-action",
132 "title");
1197ec1a 133 if (els == null || els.isEmpty()) {
aacd7f07 134 els = article.getElementsByAttributeValue("data-click-id", "body");
1197ec1a 135 }
aacd7f07 136
1197ec1a 137 Element url = els.first();
b19b3632
NR
138 if (!url.attr("href").trim().startsWith("/")) {
139 return url.absUrl("href");
140 }
aacd7f07 141
b19b3632
NR
142 return "";
143 }
144
145 @Override
146 protected String getArticleContent(Document doc, Element article) {
60acdaf9 147 Elements els = article.getElementsByClass("h2");
1197ec1a
NR
148 if (els != null && !els.isEmpty()) {
149 return els.first().text().trim();
150 }
aacd7f07 151
b19b3632
NR
152 return "";
153 }
154
155 @Override
156 protected Element getFullArticle(Document doc) {
aacd7f07
NR
157 Element element = doc.getElementsByAttributeValue("data-click-id",
158 "body").first();
60acdaf9
NR
159 if (element == null) {
160 element = doc.getElementsByClass("ckueCN").first();
161 }
aacd7f07 162
60acdaf9 163 return element;
b19b3632
NR
164 }
165
166 @Override
167 protected ElementProcessor getElementProcessorFullArticle() {
168 return new BasicElementProcessor();
169 }
170
171 @Override
172 protected List<Element> getFullArticleCommentPosts(Document doc, URL intUrl) {
60acdaf9
NR
173 Elements posts = doc.getElementsByClass("jHfOJm");
174 if (posts.isEmpty()) {
175 posts = doc.getElementsByClass("eCeBkc");
176 }
aacd7f07
NR
177 if (posts.isEmpty()) {
178 posts = doc.getElementsByClass("gxtxxZ");
179 }
180
60acdaf9 181 return posts;
b19b3632
NR
182 }
183
184 @Override
185 protected List<Element> getCommentCommentPosts(Document doc,
186 Element container) {
187 List<Element> elements = new LinkedList<Element>();
188 for (Element el : container.children()) {
189 elements.addAll(el.getElementsByClass("jHfOJm"));
aacd7f07
NR
190
191 }
192
193 if (elements.isEmpty()) {
194 for (Element el : container.children()) {
bce031dc
NR
195 elements.addAll(el.getElementsByClass("s1ook3io-0"));
196 elements.addAll(el.getElementsByClass("s1ook3io-1"));
197 elements.addAll(el.getElementsByClass("s1ook3io-2"));
198 elements.addAll(el.getElementsByClass("s1ook3io-3"));
199 elements.addAll(el.getElementsByClass("s1ook3io-4"));
200 elements.addAll(el.getElementsByClass("s1ook3io-5"));
aacd7f07 201 }
b19b3632 202 }
aacd7f07 203
b19b3632
NR
204 return elements;
205 }
206
207 @Override
208 protected String getCommentId(Element post) {
209 int level = 1;
210 Elements els = post.getElementsByClass("imyGpC");
aacd7f07
NR
211 if (els.isEmpty())
212 els.addAll(post.getElementsByClass("emJXdb"));
213
214 if (!els.isEmpty()) {
215 String l = els.first().text().trim().replace("level ", "");
b19b3632
NR
216 try {
217 level = Integer.parseInt(l);
aacd7f07 218 } catch (NumberFormatException e) {
b19b3632
NR
219 }
220 }
aacd7f07 221
b19b3632
NR
222 return Integer.toString(level);
223 }
224
225 @Override
226 protected String getCommentAuthor(Element post) {
227 // Since we have no title, we switch with author
228 return "";
229 }
230
231 @Override
232 protected String getCommentTitle(Element post) {
233 // Since we have no title, we switch with author
aacd7f07
NR
234
235 Element authorEl = post.getElementsByClass("RVnoX").first();
236 if (authorEl == null)
237 authorEl = post.getElementsByClass("kzePTH").first();
238 if (authorEl == null)
239 authorEl = post.getElementsByClass("jczTlv").first();
240
241 if (authorEl != null)
242 return authorEl.text().trim();
243
b19b3632
NR
244 return "";
245 }
246
247 @Override
248 protected String getCommentDate(Element post) {
aacd7f07
NR
249 Element elAgo = post.getElementsByClass("hJDlLH").first();
250 if (elAgo == null)
251 elAgo = post.getElementsByClass("hDplaG").first();
252
253 if (elAgo != null) {
254 String dateAgo = elAgo.text().trim();
255 return new SimpleDateFormat("yyyy-MM-dd_HH-mm")
256 .format(getDate(dateAgo));
257 }
258
259 return "";
b19b3632
NR
260 }
261
262 @Override
263 protected Element getCommentContentElement(Element post) {
aacd7f07 264 return post.getElementsByClass("ckueCN").first();
b19b3632
NR
265 }
266
267 @Override
268 protected ElementProcessor getElementProcessorComment() {
269 return new BasicElementProcessor();
270 }
aacd7f07 271
b19b3632
NR
272 @Override
273 public void fetch(Story story) throws IOException {
274 super.fetch(story);
aacd7f07 275
b19b3632 276 List<Comment> comments = new LinkedList<Comment>();
aacd7f07
NR
277 Map<Integer, Comment> lastOfLevel = new HashMap<Integer, Comment>();
278
b19b3632
NR
279 for (Comment c : story.getComments()) {
280 int level = Integer.parseInt(c.getId());
281 lastOfLevel.put(level, c);
282 if (level <= 1) {
283 comments.add(c);
284 } else {
285 Comment parent = lastOfLevel.get(level - 1);
aacd7f07 286 if (parent != null) {
b19b3632
NR
287 parent.add(c);
288 } else {
289 // bad data
290 comments.add(c);
291 }
292 }
293 }
aacd7f07 294
b19b3632
NR
295 story.setComments(comments);
296 }
aacd7f07 297
60acdaf9
NR
298 // 2 hours ago -> 18/10/2018 21:00
299 private Date getDate(String dateAgo) {
300 int h = 0;
301 if (dateAgo.endsWith("hour ago")) {
302 h = 1;
303 } else if (dateAgo.endsWith("hours ago")) {
304 dateAgo = dateAgo.replace("hours ago", "").trim();
305 h = Integer.parseInt(dateAgo);
306 } else if (dateAgo.endsWith("day ago")) {
307 h = 24;
308 } else if (dateAgo.endsWith("days ago")) {
309 dateAgo = dateAgo.replace("days ago", "").trim();
310 h = Integer.parseInt(dateAgo) * 24;
311 }
aacd7f07
NR
312
313 long now = new Date().getTime(); // in ms since 1970
314 now = now / (1000l * 60l * 60l); // in hours since 1970
315 long then = now - h; // in hours since 1970
60acdaf9 316 then = then * (1000l * 60l * 60l); // in ms since 1970
aacd7f07 317
60acdaf9
NR
318 return new Date(then);
319 }
b19b3632 320}