Reddit: ID also use title
[gofetch.git] / src / be / nikiroo / gofetch / support / Reddit.java
CommitLineData
b19b3632
NR
1package be.nikiroo.gofetch.support;
2
3import be.nikiroo.gofetch.data.Story;
4import be.nikiroo.gofetch.data.Comment;
5
6import java.io.IOException;
7import java.io.UnsupportedEncodingException;
8import java.net.URL;
9import java.net.URLDecoder;
10import java.util.AbstractMap;
11import java.util.ArrayList;
12import java.util.List;
13import java.util.LinkedList;
14import java.util.Map.Entry;
15import java.util.Map;
16import java.util.HashMap;
17import java.util.Date;
1197ec1a 18import java.text.SimpleDateFormat;
b19b3632
NR
19
20import org.jsoup.nodes.Document;
21import org.jsoup.nodes.Element;
22import org.jsoup.nodes.Node;
23import org.jsoup.select.Elements;
24
25/**
26 * Support <a href="https://www.reddit.com/">https://www.reddit.com/</a>.
27 *
28 * @author niki
29 */
30public class Reddit extends BasicSupport {
31 @Override
32 public String getDescription() {
33 return "Reddit: The front page of the internet";
34 }
35
36 @Override
37 protected List<Entry<URL, String>> getUrls() throws IOException {
38 List<Entry<URL, String>> urls = new ArrayList<Entry<URL, String>>();
39 String base = "https://www.reddit.com/r/";
40 urls.add(new AbstractMap.SimpleEntry<URL, String>(
41 new URL(base + "linux_gaming" + "/new/"), "linux_gaming"
42 ));
43
44 return urls;
45 }
46
47 @Override
48 protected List<Element> getArticles(Document doc) {
1197ec1a
NR
49 List<Element> list = doc.getElementsByClass("thing");
50 if (list.isEmpty()) {
51 list = doc.getElementsByClass("Post");
52 }
53 if (list.isEmpty()) {
54 list = doc.getElementsByClass("scrollerItem");
55 }
56
57 return list;
b19b3632
NR
58 }
59
60 @Override
61 protected String getArticleId(Document doc, Element article) {
7273fd58
NR
62 String date = getArticleDate(doc, article);
63 String title = getArticleTitle(doc, article);
64
65 String id = (date + "_" + title).replaceAll("[^a-zA-Z0-9_-]", "_");
66 if (id.length() > 40) {
67 id = id.substring(0, 40);
68 }
69
70 return id;
b19b3632
NR
71 }
72
73 @Override
74 protected String getArticleTitle(Document doc, Element article) {
1197ec1a
NR
75 Elements els = article.getElementsByAttributeValue(
76 "data-event-action", "title");
77 if (els == null || els.isEmpty()) {
78 els = article.getElementsByTag("h2");
79 }
80
81 return els.first().text().trim();
b19b3632
NR
82 }
83
84 @Override
85 protected String getArticleAuthor(Document doc, Element article) {
86 return article.getElementsByAttributeValueStarting(
87 "href", "/user/"
88 ).text().trim();
89 }
90
91 @Override
92 protected String getArticleDate(Document doc, Element article) {
1197ec1a
NR
93 Element el = article.getElementsByClass("live-timestamp").first();
94 if (el == null) {
95 el = article.getElementsByAttributeValue(
96 "data-click-id", "timestamp").first();
97 }
98
99 String dateAgo = el.text().trim();
60acdaf9 100 return new SimpleDateFormat("yyyy-MM-dd_HH-mm").format(getDate(dateAgo));
b19b3632
NR
101 }
102
103 @Override
104 protected String getArticleCategory(Document doc, Element article,
105 String currentCategory) {
106 Elements categEls = article.getElementsByAttributeValueStarting(
107 "href", "/r/" + currentCategory + "/search=?q=flair_name"
108 );
109
110 if (categEls.size() > 0) {
111 return currentCategory + ", "
112 + categEls.first().text().trim();
113 }
114
115 return currentCategory;
116 }
117
118 @Override
119 protected String getArticleDetails(Document doc, Element article) {
120 return "";
121 }
122
123 @Override
124 protected String getArticleIntUrl(Document doc, Element article) {
1197ec1a
NR
125 String url = article.absUrl("data-permalink");
126 if (url == null || url.isEmpty()) {
127 url = article.getElementsByAttributeValue(
128 "data-click-id", "timestamp").first().absUrl("href");
129 }
130
131 return url;
b19b3632
NR
132 }
133
134 @Override
135 protected String getArticleExtUrl(Document doc, Element article) {
1197ec1a
NR
136 Elements els = article.getElementsByAttributeValue(
137 "data-event-action", "title");
138 if (els == null || els.isEmpty()) {
139 els = article.getElementsByAttributeValue(
140 "data-click-id", "body");
141 }
142
143 Element url = els.first();
b19b3632
NR
144 if (!url.attr("href").trim().startsWith("/")) {
145 return url.absUrl("href");
146 }
147
148 return "";
149 }
150
151 @Override
152 protected String getArticleContent(Document doc, Element article) {
60acdaf9 153 Elements els = article.getElementsByClass("h2");
1197ec1a
NR
154 if (els != null && !els.isEmpty()) {
155 return els.first().text().trim();
156 }
157
b19b3632
NR
158 return "";
159 }
160
161 @Override
162 protected Element getFullArticle(Document doc) {
60acdaf9
NR
163 Element element = doc.getElementsByAttributeValue(
164 "data-click-id", "body").first();
165 if (element == null) {
166 element = doc.getElementsByClass("ckueCN").first();
167 }
168
169 return element;
b19b3632
NR
170 }
171
172 @Override
173 protected ElementProcessor getElementProcessorFullArticle() {
174 return new BasicElementProcessor();
175 }
176
177 @Override
178 protected List<Element> getFullArticleCommentPosts(Document doc, URL intUrl) {
60acdaf9
NR
179 Elements posts = doc.getElementsByClass("jHfOJm");
180 if (posts.isEmpty()) {
181 posts = doc.getElementsByClass("eCeBkc");
182 }
183
184 return posts;
b19b3632
NR
185 }
186
187 @Override
188 protected List<Element> getCommentCommentPosts(Document doc,
189 Element container) {
190 List<Element> elements = new LinkedList<Element>();
191 for (Element el : container.children()) {
192 elements.addAll(el.getElementsByClass("jHfOJm"));
193 }
194
195 return elements;
196 }
197
198 @Override
199 protected String getCommentId(Element post) {
200 int level = 1;
201 Elements els = post.getElementsByClass("imyGpC");
202 if (els.size() > 0) {
203 String l = els.first().text().trim()
204 .replace("level ", "");
205 try {
206 level = Integer.parseInt(l);
207 } catch(NumberFormatException e) {
208 }
209 }
210
211 return Integer.toString(level);
212 }
213
214 @Override
215 protected String getCommentAuthor(Element post) {
216 // Since we have no title, we switch with author
217 return "";
218 }
219
220 @Override
221 protected String getCommentTitle(Element post) {
222 // Since we have no title, we switch with author
223 Elements els = post.getElementsByClass("RVnoX");
224 if (els.size() > 0) {
225 return els.first().text().trim();
226 }
227
228 els = post.getElementsByClass("kzePTH");
229 if (els.size() > 0) {
230 return els.first().text().trim();
231 }
232
233 return "";
234 }
235
236 @Override
237 protected String getCommentDate(Element post) {
60acdaf9 238 String dateAgo = post.getElementsByClass("hJDlLH")
b19b3632 239 .first().text().trim();
60acdaf9 240 return new SimpleDateFormat("yyyy-MM-dd_HH-mm").format(getDate(dateAgo));
b19b3632
NR
241 }
242
243 @Override
244 protected Element getCommentContentElement(Element post) {
245 return post.getElementsByClass("ckueCN")
246 .first();
247 }
248
249 @Override
250 protected ElementProcessor getElementProcessorComment() {
251 return new BasicElementProcessor();
252 }
253
254 @Override
255 public void fetch(Story story) throws IOException {
256 super.fetch(story);
257
258 List<Comment> comments = new LinkedList<Comment>();
259 Map<Integer, Comment> lastOfLevel =
260 new HashMap<Integer, Comment>();
261
262 for (Comment c : story.getComments()) {
263 int level = Integer.parseInt(c.getId());
264 lastOfLevel.put(level, c);
265 if (level <= 1) {
266 comments.add(c);
267 } else {
268 Comment parent = lastOfLevel.get(level - 1);
269 if (parent != null ){
270 parent.add(c);
271 } else {
272 // bad data
273 comments.add(c);
274 }
275 }
276 }
277
278 story.setComments(comments);
279 }
60acdaf9
NR
280
281 // 2 hours ago -> 18/10/2018 21:00
282 private Date getDate(String dateAgo) {
283 int h = 0;
284 if (dateAgo.endsWith("hour ago")) {
285 h = 1;
286 } else if (dateAgo.endsWith("hours ago")) {
287 dateAgo = dateAgo.replace("hours ago", "").trim();
288 h = Integer.parseInt(dateAgo);
289 } else if (dateAgo.endsWith("day ago")) {
290 h = 24;
291 } else if (dateAgo.endsWith("days ago")) {
292 dateAgo = dateAgo.replace("days ago", "").trim();
293 h = Integer.parseInt(dateAgo) * 24;
294 }
295
296 long now = new Date().getTime(); // in ms since 1970
297 now = now / (1000l * 60l * 60l); // in hours since 1970
298 long then = now - h; // in hours since 1970
299 then = then * (1000l * 60l * 60l); // in ms since 1970
300
301 return new Date(then);
302 }
b19b3632 303}