Commit | Line | Data |
---|---|---|
b19b3632 NR |
1 | package be.nikiroo.gofetch.support; |
2 | ||
3 | import be.nikiroo.gofetch.data.Story; | |
4 | import be.nikiroo.gofetch.data.Comment; | |
5 | ||
6 | import java.io.IOException; | |
7 | import java.io.UnsupportedEncodingException; | |
8 | import java.net.URL; | |
9 | import java.net.URLDecoder; | |
10 | import java.util.AbstractMap; | |
11 | import java.util.ArrayList; | |
12 | import java.util.List; | |
13 | import java.util.LinkedList; | |
14 | import java.util.Map.Entry; | |
15 | import java.util.Map; | |
16 | import java.util.HashMap; | |
17 | import java.util.Date; | |
1197ec1a | 18 | import java.text.SimpleDateFormat; |
b19b3632 NR |
19 | |
20 | import org.jsoup.nodes.Document; | |
21 | import org.jsoup.nodes.Element; | |
22 | import org.jsoup.nodes.Node; | |
23 | import org.jsoup.select.Elements; | |
24 | ||
25 | /** | |
26 | * Support <a href="https://www.reddit.com/">https://www.reddit.com/</a>. | |
27 | * | |
28 | * @author niki | |
29 | */ | |
30 | public class Reddit extends BasicSupport { | |
31 | @Override | |
32 | public String getDescription() { | |
33 | return "Reddit: The front page of the internet"; | |
34 | } | |
35 | ||
36 | @Override | |
37 | protected List<Entry<URL, String>> getUrls() throws IOException { | |
38 | List<Entry<URL, String>> urls = new ArrayList<Entry<URL, String>>(); | |
39 | String base = "https://www.reddit.com/r/"; | |
40 | urls.add(new AbstractMap.SimpleEntry<URL, String>( | |
41 | new URL(base + "linux_gaming" + "/new/"), "linux_gaming" | |
42 | )); | |
43 | ||
44 | return urls; | |
45 | } | |
46 | ||
47 | @Override | |
48 | protected List<Element> getArticles(Document doc) { | |
1197ec1a NR |
49 | List<Element> list = doc.getElementsByClass("thing"); |
50 | if (list.isEmpty()) { | |
51 | list = doc.getElementsByClass("Post"); | |
52 | } | |
53 | if (list.isEmpty()) { | |
54 | list = doc.getElementsByClass("scrollerItem"); | |
55 | } | |
56 | ||
57 | return list; | |
b19b3632 NR |
58 | } |
59 | ||
60 | @Override | |
61 | protected String getArticleId(Document doc, Element article) { | |
62 | // Use the date, Luke | |
63 | return ""; | |
64 | } | |
65 | ||
66 | @Override | |
67 | protected String getArticleTitle(Document doc, Element article) { | |
1197ec1a NR |
68 | Elements els = article.getElementsByAttributeValue( |
69 | "data-event-action", "title"); | |
70 | if (els == null || els.isEmpty()) { | |
71 | els = article.getElementsByTag("h2"); | |
72 | } | |
73 | ||
74 | return els.first().text().trim(); | |
b19b3632 NR |
75 | } |
76 | ||
77 | @Override | |
78 | protected String getArticleAuthor(Document doc, Element article) { | |
79 | return article.getElementsByAttributeValueStarting( | |
80 | "href", "/user/" | |
81 | ).text().trim(); | |
82 | } | |
83 | ||
84 | @Override | |
85 | protected String getArticleDate(Document doc, Element article) { | |
1197ec1a NR |
86 | Element el = article.getElementsByClass("live-timestamp").first(); |
87 | if (el == null) { | |
88 | el = article.getElementsByAttributeValue( | |
89 | "data-click-id", "timestamp").first(); | |
90 | } | |
91 | ||
92 | String dateAgo = el.text().trim(); | |
60acdaf9 | 93 | return new SimpleDateFormat("yyyy-MM-dd_HH-mm").format(getDate(dateAgo)); |
b19b3632 NR |
94 | } |
95 | ||
96 | @Override | |
97 | protected String getArticleCategory(Document doc, Element article, | |
98 | String currentCategory) { | |
99 | Elements categEls = article.getElementsByAttributeValueStarting( | |
100 | "href", "/r/" + currentCategory + "/search=?q=flair_name" | |
101 | ); | |
102 | ||
103 | if (categEls.size() > 0) { | |
104 | return currentCategory + ", " | |
105 | + categEls.first().text().trim(); | |
106 | } | |
107 | ||
108 | return currentCategory; | |
109 | } | |
110 | ||
111 | @Override | |
112 | protected String getArticleDetails(Document doc, Element article) { | |
113 | return ""; | |
114 | } | |
115 | ||
116 | @Override | |
117 | protected String getArticleIntUrl(Document doc, Element article) { | |
1197ec1a NR |
118 | String url = article.absUrl("data-permalink"); |
119 | if (url == null || url.isEmpty()) { | |
120 | url = article.getElementsByAttributeValue( | |
121 | "data-click-id", "timestamp").first().absUrl("href"); | |
122 | } | |
123 | ||
124 | return url; | |
b19b3632 NR |
125 | } |
126 | ||
127 | @Override | |
128 | protected String getArticleExtUrl(Document doc, Element article) { | |
1197ec1a NR |
129 | Elements els = article.getElementsByAttributeValue( |
130 | "data-event-action", "title"); | |
131 | if (els == null || els.isEmpty()) { | |
132 | els = article.getElementsByAttributeValue( | |
133 | "data-click-id", "body"); | |
134 | } | |
135 | ||
136 | Element url = els.first(); | |
b19b3632 NR |
137 | if (!url.attr("href").trim().startsWith("/")) { |
138 | return url.absUrl("href"); | |
139 | } | |
140 | ||
141 | return ""; | |
142 | } | |
143 | ||
144 | @Override | |
145 | protected String getArticleContent(Document doc, Element article) { | |
60acdaf9 | 146 | Elements els = article.getElementsByClass("h2"); |
1197ec1a NR |
147 | if (els != null && !els.isEmpty()) { |
148 | return els.first().text().trim(); | |
149 | } | |
150 | ||
b19b3632 NR |
151 | return ""; |
152 | } | |
153 | ||
154 | @Override | |
155 | protected Element getFullArticle(Document doc) { | |
60acdaf9 NR |
156 | Element element = doc.getElementsByAttributeValue( |
157 | "data-click-id", "body").first(); | |
158 | if (element == null) { | |
159 | element = doc.getElementsByClass("ckueCN").first(); | |
160 | } | |
161 | ||
162 | return element; | |
b19b3632 NR |
163 | } |
164 | ||
165 | @Override | |
166 | protected ElementProcessor getElementProcessorFullArticle() { | |
167 | return new BasicElementProcessor(); | |
168 | } | |
169 | ||
170 | @Override | |
171 | protected List<Element> getFullArticleCommentPosts(Document doc, URL intUrl) { | |
60acdaf9 NR |
172 | Elements posts = doc.getElementsByClass("jHfOJm"); |
173 | if (posts.isEmpty()) { | |
174 | posts = doc.getElementsByClass("eCeBkc"); | |
175 | } | |
176 | ||
177 | return posts; | |
b19b3632 NR |
178 | } |
179 | ||
180 | @Override | |
181 | protected List<Element> getCommentCommentPosts(Document doc, | |
182 | Element container) { | |
183 | List<Element> elements = new LinkedList<Element>(); | |
184 | for (Element el : container.children()) { | |
185 | elements.addAll(el.getElementsByClass("jHfOJm")); | |
186 | } | |
187 | ||
188 | return elements; | |
189 | } | |
190 | ||
191 | @Override | |
192 | protected String getCommentId(Element post) { | |
193 | int level = 1; | |
194 | Elements els = post.getElementsByClass("imyGpC"); | |
195 | if (els.size() > 0) { | |
196 | String l = els.first().text().trim() | |
197 | .replace("level ", ""); | |
198 | try { | |
199 | level = Integer.parseInt(l); | |
200 | } catch(NumberFormatException e) { | |
201 | } | |
202 | } | |
203 | ||
204 | return Integer.toString(level); | |
205 | } | |
206 | ||
207 | @Override | |
208 | protected String getCommentAuthor(Element post) { | |
209 | // Since we have no title, we switch with author | |
210 | return ""; | |
211 | } | |
212 | ||
213 | @Override | |
214 | protected String getCommentTitle(Element post) { | |
215 | // Since we have no title, we switch with author | |
216 | Elements els = post.getElementsByClass("RVnoX"); | |
217 | if (els.size() > 0) { | |
218 | return els.first().text().trim(); | |
219 | } | |
220 | ||
221 | els = post.getElementsByClass("kzePTH"); | |
222 | if (els.size() > 0) { | |
223 | return els.first().text().trim(); | |
224 | } | |
225 | ||
226 | return ""; | |
227 | } | |
228 | ||
229 | @Override | |
230 | protected String getCommentDate(Element post) { | |
60acdaf9 | 231 | String dateAgo = post.getElementsByClass("hJDlLH") |
b19b3632 | 232 | .first().text().trim(); |
60acdaf9 | 233 | return new SimpleDateFormat("yyyy-MM-dd_HH-mm").format(getDate(dateAgo)); |
b19b3632 NR |
234 | } |
235 | ||
236 | @Override | |
237 | protected Element getCommentContentElement(Element post) { | |
238 | return post.getElementsByClass("ckueCN") | |
239 | .first(); | |
240 | } | |
241 | ||
242 | @Override | |
243 | protected ElementProcessor getElementProcessorComment() { | |
244 | return new BasicElementProcessor(); | |
245 | } | |
246 | ||
247 | @Override | |
248 | public void fetch(Story story) throws IOException { | |
249 | super.fetch(story); | |
250 | ||
251 | List<Comment> comments = new LinkedList<Comment>(); | |
252 | Map<Integer, Comment> lastOfLevel = | |
253 | new HashMap<Integer, Comment>(); | |
254 | ||
255 | for (Comment c : story.getComments()) { | |
256 | int level = Integer.parseInt(c.getId()); | |
257 | lastOfLevel.put(level, c); | |
258 | if (level <= 1) { | |
259 | comments.add(c); | |
260 | } else { | |
261 | Comment parent = lastOfLevel.get(level - 1); | |
262 | if (parent != null ){ | |
263 | parent.add(c); | |
264 | } else { | |
265 | // bad data | |
266 | comments.add(c); | |
267 | } | |
268 | } | |
269 | } | |
270 | ||
271 | story.setComments(comments); | |
272 | } | |
60acdaf9 NR |
273 | |
274 | // 2 hours ago -> 18/10/2018 21:00 | |
275 | private Date getDate(String dateAgo) { | |
276 | int h = 0; | |
277 | if (dateAgo.endsWith("hour ago")) { | |
278 | h = 1; | |
279 | } else if (dateAgo.endsWith("hours ago")) { | |
280 | dateAgo = dateAgo.replace("hours ago", "").trim(); | |
281 | h = Integer.parseInt(dateAgo); | |
282 | } else if (dateAgo.endsWith("day ago")) { | |
283 | h = 24; | |
284 | } else if (dateAgo.endsWith("days ago")) { | |
285 | dateAgo = dateAgo.replace("days ago", "").trim(); | |
286 | h = Integer.parseInt(dateAgo) * 24; | |
287 | } | |
288 | ||
289 | long now = new Date().getTime(); // in ms since 1970 | |
290 | now = now / (1000l * 60l * 60l); // in hours since 1970 | |
291 | long then = now - h; // in hours since 1970 | |
292 | then = then * (1000l * 60l * 60l); // in ms since 1970 | |
293 | ||
294 | return new Date(then); | |
295 | } | |
b19b3632 | 296 | } |