Fix reddit
[gofetch.git] / src / be / nikiroo / gofetch / support / Reddit.java
CommitLineData
b19b3632
NR
1package be.nikiroo.gofetch.support;
2
b19b3632 3import java.io.IOException;
b19b3632 4import java.net.URL;
aacd7f07 5import java.text.SimpleDateFormat;
b19b3632
NR
6import java.util.AbstractMap;
7import java.util.ArrayList;
aacd7f07
NR
8import java.util.Date;
9import java.util.HashMap;
b19b3632 10import java.util.LinkedList;
aacd7f07 11import java.util.List;
b19b3632 12import java.util.Map;
aacd7f07 13import java.util.Map.Entry;
b19b3632
NR
14
15import org.jsoup.nodes.Document;
16import org.jsoup.nodes.Element;
b19b3632
NR
17import org.jsoup.select.Elements;
18
aacd7f07
NR
19import be.nikiroo.gofetch.data.Comment;
20import be.nikiroo.gofetch.data.Story;
21
b19b3632
NR
22/**
23 * Support <a href="https://www.reddit.com/">https://www.reddit.com/</a>.
24 *
25 * @author niki
26 */
27public class Reddit extends BasicSupport {
28 @Override
29 public String getDescription() {
30 return "Reddit: The front page of the internet";
31 }
32
33 @Override
34 protected List<Entry<URL, String>> getUrls() throws IOException {
35 List<Entry<URL, String>> urls = new ArrayList<Entry<URL, String>>();
36 String base = "https://www.reddit.com/r/";
aacd7f07
NR
37 urls.add(new AbstractMap.SimpleEntry<URL, String>(new URL(base
38 + "linux_gaming" + "/new/"), "linux_gaming"));
b19b3632
NR
39
40 return urls;
41 }
42
43 @Override
44 protected List<Element> getArticles(Document doc) {
1197ec1a
NR
45 List<Element> list = doc.getElementsByClass("thing");
46 if (list.isEmpty()) {
47 list = doc.getElementsByClass("Post");
48 }
49 if (list.isEmpty()) {
50 list = doc.getElementsByClass("scrollerItem");
51 }
aacd7f07 52
1197ec1a 53 return list;
b19b3632
NR
54 }
55
56 @Override
57 protected String getArticleId(Document doc, Element article) {
7273fd58
NR
58 String date = getArticleDate(doc, article);
59 String title = getArticleTitle(doc, article);
aacd7f07 60
7273fd58
NR
61 String id = (date + "_" + title).replaceAll("[^a-zA-Z0-9_-]", "_");
62 if (id.length() > 40) {
63 id = id.substring(0, 40);
64 }
aacd7f07 65
7273fd58 66 return id;
b19b3632
NR
67 }
68
69 @Override
70 protected String getArticleTitle(Document doc, Element article) {
aacd7f07
NR
71 Elements els = article.getElementsByAttributeValue("data-event-action",
72 "title");
1197ec1a
NR
73 if (els == null || els.isEmpty()) {
74 els = article.getElementsByTag("h2");
75 }
aacd7f07 76
1197ec1a 77 return els.first().text().trim();
b19b3632 78 }
aacd7f07 79
b19b3632
NR
80 @Override
81 protected String getArticleAuthor(Document doc, Element article) {
757c24ee
NR
82 String user = article
83 .getElementsByAttributeValueStarting("href", "/user/").text()
84 .trim();
85 if (user.startsWith("/u"))
86 user = user.substring(3);
87 return user;
b19b3632
NR
88 }
89
90 @Override
91 protected String getArticleDate(Document doc, Element article) {
1197ec1a
NR
92 Element el = article.getElementsByClass("live-timestamp").first();
93 if (el == null) {
aacd7f07
NR
94 el = article.getElementsByAttributeValue("data-click-id",
95 "timestamp").first();
1197ec1a 96 }
aacd7f07 97
1197ec1a 98 String dateAgo = el.text().trim();
757c24ee 99 return new SimpleDateFormat("yyyy-MM-dd") // _HH-mm
aacd7f07 100 .format(getDate(dateAgo));
b19b3632
NR
101 }
102
103 @Override
104 protected String getArticleCategory(Document doc, Element article,
105 String currentCategory) {
aacd7f07
NR
106 Elements categEls = article.getElementsByAttributeValueStarting("href",
107 "/r/" + currentCategory + "/search=?q=flair_name");
108
b19b3632 109 if (categEls.size() > 0) {
aacd7f07 110 return currentCategory + ", " + categEls.first().text().trim();
b19b3632 111 }
aacd7f07 112
b19b3632
NR
113 return currentCategory;
114 }
115
116 @Override
117 protected String getArticleDetails(Document doc, Element article) {
118 return "";
119 }
120
121 @Override
122 protected String getArticleIntUrl(Document doc, Element article) {
1197ec1a
NR
123 String url = article.absUrl("data-permalink");
124 if (url == null || url.isEmpty()) {
aacd7f07
NR
125 url = article
126 .getElementsByAttributeValue("data-click-id", "timestamp")
127 .first().absUrl("href");
1197ec1a 128 }
aacd7f07 129
1197ec1a 130 return url;
b19b3632
NR
131 }
132
133 @Override
134 protected String getArticleExtUrl(Document doc, Element article) {
aacd7f07
NR
135 Elements els = article.getElementsByAttributeValue("data-event-action",
136 "title");
1197ec1a 137 if (els == null || els.isEmpty()) {
aacd7f07 138 els = article.getElementsByAttributeValue("data-click-id", "body");
1197ec1a 139 }
aacd7f07 140
1197ec1a 141 Element url = els.first();
b19b3632
NR
142 if (!url.attr("href").trim().startsWith("/")) {
143 return url.absUrl("href");
144 }
aacd7f07 145
b19b3632
NR
146 return "";
147 }
148
149 @Override
150 protected String getArticleContent(Document doc, Element article) {
60acdaf9 151 Elements els = article.getElementsByClass("h2");
1197ec1a
NR
152 if (els != null && !els.isEmpty()) {
153 return els.first().text().trim();
154 }
aacd7f07 155
b19b3632
NR
156 return "";
157 }
158
159 @Override
160 protected Element getFullArticle(Document doc) {
aacd7f07
NR
161 Element element = doc.getElementsByAttributeValue("data-click-id",
162 "body").first();
60acdaf9
NR
163 if (element == null) {
164 element = doc.getElementsByClass("ckueCN").first();
165 }
aacd7f07 166
60acdaf9 167 return element;
b19b3632
NR
168 }
169
170 @Override
171 protected ElementProcessor getElementProcessorFullArticle() {
172 return new BasicElementProcessor();
173 }
174
175 @Override
176 protected List<Element> getFullArticleCommentPosts(Document doc, URL intUrl) {
60acdaf9
NR
177 Elements posts = doc.getElementsByClass("jHfOJm");
178 if (posts.isEmpty()) {
179 posts = doc.getElementsByClass("eCeBkc");
180 }
aacd7f07
NR
181 if (posts.isEmpty()) {
182 posts = doc.getElementsByClass("gxtxxZ");
183 }
184
60acdaf9 185 return posts;
b19b3632
NR
186 }
187
188 @Override
189 protected List<Element> getCommentCommentPosts(Document doc,
190 Element container) {
ff49bc76 191
b19b3632
NR
192 List<Element> elements = new LinkedList<Element>();
193 for (Element el : container.children()) {
ff49bc76
NR
194 // elements.addAll(el.getElementsByClass("jHfOJm"));
195 elements.addAll(el.getElementsByClass("emJXdb"));
b19b3632 196 }
aacd7f07 197
b19b3632
NR
198 return elements;
199 }
200
201 @Override
202 protected String getCommentId(Element post) {
203 int level = 1;
204 Elements els = post.getElementsByClass("imyGpC");
aacd7f07
NR
205
206 if (!els.isEmpty()) {
207 String l = els.first().text().trim().replace("level ", "");
b19b3632
NR
208 try {
209 level = Integer.parseInt(l);
aacd7f07 210 } catch (NumberFormatException e) {
b19b3632
NR
211 }
212 }
aacd7f07 213
b19b3632
NR
214 return Integer.toString(level);
215 }
216
217 @Override
218 protected String getCommentAuthor(Element post) {
219 // Since we have no title, we switch with author
220 return "";
221 }
222
223 @Override
224 protected String getCommentTitle(Element post) {
225 // Since we have no title, we switch with author
aacd7f07
NR
226
227 Element authorEl = post.getElementsByClass("RVnoX").first();
228 if (authorEl == null)
229 authorEl = post.getElementsByClass("kzePTH").first();
230 if (authorEl == null)
231 authorEl = post.getElementsByClass("jczTlv").first();
232
233 if (authorEl != null)
234 return authorEl.text().trim();
235
b19b3632
NR
236 return "";
237 }
238
239 @Override
240 protected String getCommentDate(Element post) {
aacd7f07
NR
241 Element elAgo = post.getElementsByClass("hJDlLH").first();
242 if (elAgo == null)
243 elAgo = post.getElementsByClass("hDplaG").first();
244
245 if (elAgo != null) {
246 String dateAgo = elAgo.text().trim();
247 return new SimpleDateFormat("yyyy-MM-dd_HH-mm")
248 .format(getDate(dateAgo));
249 }
250
251 return "";
b19b3632
NR
252 }
253
254 @Override
255 protected Element getCommentContentElement(Element post) {
aacd7f07 256 return post.getElementsByClass("ckueCN").first();
b19b3632
NR
257 }
258
259 @Override
260 protected ElementProcessor getElementProcessorComment() {
261 return new BasicElementProcessor();
262 }
aacd7f07 263
b19b3632
NR
264 @Override
265 public void fetch(Story story) throws IOException {
266 super.fetch(story);
aacd7f07 267
b19b3632 268 List<Comment> comments = new LinkedList<Comment>();
aacd7f07
NR
269 Map<Integer, Comment> lastOfLevel = new HashMap<Integer, Comment>();
270
ff49bc76
NR
271 if (!story.getComments().isEmpty()) {
272 // comments are saved under a main ID (which is a copy of comment 1)
273 // TODO: fix the cause instead of working around it here
274 for (Comment c : story.getComments().get(0)) {
275 int level = Integer.parseInt(c.getId());
276 lastOfLevel.put(level, c);
277 if (level <= 1) {
b19b3632 278 comments.add(c);
ff49bc76
NR
279 } else {
280 Comment parent = lastOfLevel.get(level - 1);
281 if (parent != null) {
282 parent.add(c);
283 } else {
284 // bad data
285 comments.add(c);
286 }
b19b3632
NR
287 }
288 }
289 }
aacd7f07 290
b19b3632
NR
291 story.setComments(comments);
292 }
aacd7f07 293
60acdaf9
NR
294 // 2 hours ago -> 18/10/2018 21:00
295 private Date getDate(String dateAgo) {
296 int h = 0;
297 if (dateAgo.endsWith("hour ago")) {
298 h = 1;
299 } else if (dateAgo.endsWith("hours ago")) {
300 dateAgo = dateAgo.replace("hours ago", "").trim();
301 h = Integer.parseInt(dateAgo);
302 } else if (dateAgo.endsWith("day ago")) {
303 h = 24;
304 } else if (dateAgo.endsWith("days ago")) {
305 dateAgo = dateAgo.replace("days ago", "").trim();
306 h = Integer.parseInt(dateAgo) * 24;
307 }
aacd7f07
NR
308
309 long now = new Date().getTime(); // in ms since 1970
310 now = now / (1000l * 60l * 60l); // in hours since 1970
311 long then = now - h; // in hours since 1970
60acdaf9 312 then = then * (1000l * 60l * 60l); // in ms since 1970
aacd7f07 313
60acdaf9
NR
314 return new Date(then);
315 }
b19b3632 316}