Reddit: fix comments and some articles
[gofetch.git] / src / be / nikiroo / gofetch / support / Reddit.java
CommitLineData
b19b3632
NR
1package be.nikiroo.gofetch.support;
2
3import be.nikiroo.gofetch.data.Story;
4import be.nikiroo.gofetch.data.Comment;
5
6import java.io.IOException;
7import java.io.UnsupportedEncodingException;
8import java.net.URL;
9import java.net.URLDecoder;
10import java.util.AbstractMap;
11import java.util.ArrayList;
12import java.util.List;
13import java.util.LinkedList;
14import java.util.Map.Entry;
15import java.util.Map;
16import java.util.HashMap;
17import java.util.Date;
1197ec1a 18import java.text.SimpleDateFormat;
b19b3632
NR
19
20import org.jsoup.nodes.Document;
21import org.jsoup.nodes.Element;
22import org.jsoup.nodes.Node;
23import org.jsoup.select.Elements;
24
25/**
26 * Support <a href="https://www.reddit.com/">https://www.reddit.com/</a>.
27 *
28 * @author niki
29 */
30public class Reddit extends BasicSupport {
31 @Override
32 public String getDescription() {
33 return "Reddit: The front page of the internet";
34 }
35
36 @Override
37 protected List<Entry<URL, String>> getUrls() throws IOException {
38 List<Entry<URL, String>> urls = new ArrayList<Entry<URL, String>>();
39 String base = "https://www.reddit.com/r/";
40 urls.add(new AbstractMap.SimpleEntry<URL, String>(
41 new URL(base + "linux_gaming" + "/new/"), "linux_gaming"
42 ));
43
44 return urls;
45 }
46
47 @Override
48 protected List<Element> getArticles(Document doc) {
1197ec1a
NR
49 List<Element> list = doc.getElementsByClass("thing");
50 if (list.isEmpty()) {
51 list = doc.getElementsByClass("Post");
52 }
53 if (list.isEmpty()) {
54 list = doc.getElementsByClass("scrollerItem");
55 }
56
57 return list;
b19b3632
NR
58 }
59
60 @Override
61 protected String getArticleId(Document doc, Element article) {
62 // Use the date, Luke
63 return "";
64 }
65
66 @Override
67 protected String getArticleTitle(Document doc, Element article) {
1197ec1a
NR
68 Elements els = article.getElementsByAttributeValue(
69 "data-event-action", "title");
70 if (els == null || els.isEmpty()) {
71 els = article.getElementsByTag("h2");
72 }
73
74 return els.first().text().trim();
b19b3632
NR
75 }
76
77 @Override
78 protected String getArticleAuthor(Document doc, Element article) {
79 return article.getElementsByAttributeValueStarting(
80 "href", "/user/"
81 ).text().trim();
82 }
83
84 @Override
85 protected String getArticleDate(Document doc, Element article) {
1197ec1a
NR
86 Element el = article.getElementsByClass("live-timestamp").first();
87 if (el == null) {
88 el = article.getElementsByAttributeValue(
89 "data-click-id", "timestamp").first();
90 }
91
92 String dateAgo = el.text().trim();
60acdaf9 93 return new SimpleDateFormat("yyyy-MM-dd_HH-mm").format(getDate(dateAgo));
b19b3632
NR
94 }
95
96 @Override
97 protected String getArticleCategory(Document doc, Element article,
98 String currentCategory) {
99 Elements categEls = article.getElementsByAttributeValueStarting(
100 "href", "/r/" + currentCategory + "/search=?q=flair_name"
101 );
102
103 if (categEls.size() > 0) {
104 return currentCategory + ", "
105 + categEls.first().text().trim();
106 }
107
108 return currentCategory;
109 }
110
111 @Override
112 protected String getArticleDetails(Document doc, Element article) {
113 return "";
114 }
115
116 @Override
117 protected String getArticleIntUrl(Document doc, Element article) {
1197ec1a
NR
118 String url = article.absUrl("data-permalink");
119 if (url == null || url.isEmpty()) {
120 url = article.getElementsByAttributeValue(
121 "data-click-id", "timestamp").first().absUrl("href");
122 }
123
124 return url;
b19b3632
NR
125 }
126
127 @Override
128 protected String getArticleExtUrl(Document doc, Element article) {
1197ec1a
NR
129 Elements els = article.getElementsByAttributeValue(
130 "data-event-action", "title");
131 if (els == null || els.isEmpty()) {
132 els = article.getElementsByAttributeValue(
133 "data-click-id", "body");
134 }
135
136 Element url = els.first();
b19b3632
NR
137 if (!url.attr("href").trim().startsWith("/")) {
138 return url.absUrl("href");
139 }
140
141 return "";
142 }
143
144 @Override
145 protected String getArticleContent(Document doc, Element article) {
60acdaf9 146 Elements els = article.getElementsByClass("h2");
1197ec1a
NR
147 if (els != null && !els.isEmpty()) {
148 return els.first().text().trim();
149 }
150
b19b3632
NR
151 return "";
152 }
153
154 @Override
155 protected Element getFullArticle(Document doc) {
60acdaf9
NR
156 Element element = doc.getElementsByAttributeValue(
157 "data-click-id", "body").first();
158 if (element == null) {
159 element = doc.getElementsByClass("ckueCN").first();
160 }
161
162 return element;
b19b3632
NR
163 }
164
165 @Override
166 protected ElementProcessor getElementProcessorFullArticle() {
167 return new BasicElementProcessor();
168 }
169
170 @Override
171 protected List<Element> getFullArticleCommentPosts(Document doc, URL intUrl) {
60acdaf9
NR
172 Elements posts = doc.getElementsByClass("jHfOJm");
173 if (posts.isEmpty()) {
174 posts = doc.getElementsByClass("eCeBkc");
175 }
176
177 return posts;
b19b3632
NR
178 }
179
180 @Override
181 protected List<Element> getCommentCommentPosts(Document doc,
182 Element container) {
183 List<Element> elements = new LinkedList<Element>();
184 for (Element el : container.children()) {
185 elements.addAll(el.getElementsByClass("jHfOJm"));
186 }
187
188 return elements;
189 }
190
191 @Override
192 protected String getCommentId(Element post) {
193 int level = 1;
194 Elements els = post.getElementsByClass("imyGpC");
195 if (els.size() > 0) {
196 String l = els.first().text().trim()
197 .replace("level ", "");
198 try {
199 level = Integer.parseInt(l);
200 } catch(NumberFormatException e) {
201 }
202 }
203
204 return Integer.toString(level);
205 }
206
207 @Override
208 protected String getCommentAuthor(Element post) {
209 // Since we have no title, we switch with author
210 return "";
211 }
212
213 @Override
214 protected String getCommentTitle(Element post) {
215 // Since we have no title, we switch with author
216 Elements els = post.getElementsByClass("RVnoX");
217 if (els.size() > 0) {
218 return els.first().text().trim();
219 }
220
221 els = post.getElementsByClass("kzePTH");
222 if (els.size() > 0) {
223 return els.first().text().trim();
224 }
225
226 return "";
227 }
228
229 @Override
230 protected String getCommentDate(Element post) {
60acdaf9 231 String dateAgo = post.getElementsByClass("hJDlLH")
b19b3632 232 .first().text().trim();
60acdaf9 233 return new SimpleDateFormat("yyyy-MM-dd_HH-mm").format(getDate(dateAgo));
b19b3632
NR
234 }
235
236 @Override
237 protected Element getCommentContentElement(Element post) {
238 return post.getElementsByClass("ckueCN")
239 .first();
240 }
241
242 @Override
243 protected ElementProcessor getElementProcessorComment() {
244 return new BasicElementProcessor();
245 }
246
247 @Override
248 public void fetch(Story story) throws IOException {
249 super.fetch(story);
250
251 List<Comment> comments = new LinkedList<Comment>();
252 Map<Integer, Comment> lastOfLevel =
253 new HashMap<Integer, Comment>();
254
255 for (Comment c : story.getComments()) {
256 int level = Integer.parseInt(c.getId());
257 lastOfLevel.put(level, c);
258 if (level <= 1) {
259 comments.add(c);
260 } else {
261 Comment parent = lastOfLevel.get(level - 1);
262 if (parent != null ){
263 parent.add(c);
264 } else {
265 // bad data
266 comments.add(c);
267 }
268 }
269 }
270
271 story.setComments(comments);
272 }
60acdaf9
NR
273
274 // 2 hours ago -> 18/10/2018 21:00
275 private Date getDate(String dateAgo) {
276 int h = 0;
277 if (dateAgo.endsWith("hour ago")) {
278 h = 1;
279 } else if (dateAgo.endsWith("hours ago")) {
280 dateAgo = dateAgo.replace("hours ago", "").trim();
281 h = Integer.parseInt(dateAgo);
282 } else if (dateAgo.endsWith("day ago")) {
283 h = 24;
284 } else if (dateAgo.endsWith("days ago")) {
285 dateAgo = dateAgo.replace("days ago", "").trim();
286 h = Integer.parseInt(dateAgo) * 24;
287 }
288
289 long now = new Date().getTime(); // in ms since 1970
290 now = now / (1000l * 60l * 60l); // in hours since 1970
291 long then = now - h; // in hours since 1970
292 then = then * (1000l * 60l * 60l); // in ms since 1970
293
294 return new Date(then);
295 }
b19b3632 296}