Commit | Line | Data |
---|---|---|
b19b3632 NR |
1 | package be.nikiroo.gofetch.support; |
2 | ||
3 | import be.nikiroo.gofetch.data.Story; | |
4 | import be.nikiroo.gofetch.data.Comment; | |
5 | ||
6 | import java.io.IOException; | |
7 | import java.io.UnsupportedEncodingException; | |
8 | import java.net.URL; | |
9 | import java.net.URLDecoder; | |
10 | import java.util.AbstractMap; | |
11 | import java.util.ArrayList; | |
12 | import java.util.List; | |
13 | import java.util.LinkedList; | |
14 | import java.util.Map.Entry; | |
15 | import java.util.Map; | |
16 | import java.util.HashMap; | |
17 | import java.util.Date; | |
1197ec1a | 18 | import java.text.SimpleDateFormat; |
b19b3632 NR |
19 | |
20 | import org.jsoup.nodes.Document; | |
21 | import org.jsoup.nodes.Element; | |
22 | import org.jsoup.nodes.Node; | |
23 | import org.jsoup.select.Elements; | |
24 | ||
25 | /** | |
26 | * Support <a href="https://www.reddit.com/">https://www.reddit.com/</a>. | |
27 | * | |
28 | * @author niki | |
29 | */ | |
30 | public class Reddit extends BasicSupport { | |
31 | @Override | |
32 | public String getDescription() { | |
33 | return "Reddit: The front page of the internet"; | |
34 | } | |
35 | ||
36 | @Override | |
37 | protected List<Entry<URL, String>> getUrls() throws IOException { | |
38 | List<Entry<URL, String>> urls = new ArrayList<Entry<URL, String>>(); | |
39 | String base = "https://www.reddit.com/r/"; | |
40 | urls.add(new AbstractMap.SimpleEntry<URL, String>( | |
41 | new URL(base + "linux_gaming" + "/new/"), "linux_gaming" | |
42 | )); | |
43 | ||
44 | return urls; | |
45 | } | |
46 | ||
47 | @Override | |
48 | protected List<Element> getArticles(Document doc) { | |
1197ec1a NR |
49 | List<Element> list = doc.getElementsByClass("thing"); |
50 | if (list.isEmpty()) { | |
51 | list = doc.getElementsByClass("Post"); | |
52 | } | |
53 | if (list.isEmpty()) { | |
54 | list = doc.getElementsByClass("scrollerItem"); | |
55 | } | |
56 | ||
57 | return list; | |
b19b3632 NR |
58 | } |
59 | ||
60 | @Override | |
61 | protected String getArticleId(Document doc, Element article) { | |
62 | // Use the date, Luke | |
63 | return ""; | |
64 | } | |
65 | ||
66 | @Override | |
67 | protected String getArticleTitle(Document doc, Element article) { | |
1197ec1a NR |
68 | Elements els = article.getElementsByAttributeValue( |
69 | "data-event-action", "title"); | |
70 | if (els == null || els.isEmpty()) { | |
71 | els = article.getElementsByTag("h2"); | |
72 | } | |
73 | ||
74 | return els.first().text().trim(); | |
b19b3632 NR |
75 | } |
76 | ||
77 | @Override | |
78 | protected String getArticleAuthor(Document doc, Element article) { | |
79 | return article.getElementsByAttributeValueStarting( | |
80 | "href", "/user/" | |
81 | ).text().trim(); | |
82 | } | |
83 | ||
84 | @Override | |
85 | protected String getArticleDate(Document doc, Element article) { | |
1197ec1a NR |
86 | Element el = article.getElementsByClass("live-timestamp").first(); |
87 | if (el == null) { | |
88 | el = article.getElementsByAttributeValue( | |
89 | "data-click-id", "timestamp").first(); | |
90 | } | |
91 | ||
92 | String dateAgo = el.text().trim(); | |
93 | int h = 0; | |
94 | if (dateAgo.endsWith("hour ago")) { | |
95 | h = 1; | |
96 | } else if (dateAgo.endsWith("hours ago")) { | |
97 | dateAgo = dateAgo.replace("hours ago", "").trim(); | |
98 | h = Integer.parseInt(dateAgo); | |
99 | } else if (dateAgo.endsWith("day ago")) { | |
100 | h = 24; | |
101 | } else if (dateAgo.endsWith("days ago")) { | |
102 | dateAgo = dateAgo.replace("days ago", "").trim(); | |
103 | h = Integer.parseInt(dateAgo) * 24; | |
104 | } | |
105 | ||
106 | long now = new Date().getTime(); // in ms since 1970 | |
107 | now = now / (1000l * 60l * 60l); // in hours | |
108 | long then = now - h; // in hours | |
109 | then = then * (60l * 60l); // in seconds | |
110 | ||
111 | return Long.toString(then); | |
b19b3632 NR |
112 | } |
113 | ||
114 | @Override | |
115 | protected String getArticleCategory(Document doc, Element article, | |
116 | String currentCategory) { | |
117 | Elements categEls = article.getElementsByAttributeValueStarting( | |
118 | "href", "/r/" + currentCategory + "/search=?q=flair_name" | |
119 | ); | |
120 | ||
121 | if (categEls.size() > 0) { | |
122 | return currentCategory + ", " | |
123 | + categEls.first().text().trim(); | |
124 | } | |
125 | ||
126 | return currentCategory; | |
127 | } | |
128 | ||
129 | @Override | |
130 | protected String getArticleDetails(Document doc, Element article) { | |
131 | return ""; | |
132 | } | |
133 | ||
134 | @Override | |
135 | protected String getArticleIntUrl(Document doc, Element article) { | |
1197ec1a NR |
136 | String url = article.absUrl("data-permalink"); |
137 | if (url == null || url.isEmpty()) { | |
138 | url = article.getElementsByAttributeValue( | |
139 | "data-click-id", "timestamp").first().absUrl("href"); | |
140 | } | |
141 | ||
142 | return url; | |
b19b3632 NR |
143 | } |
144 | ||
145 | @Override | |
146 | protected String getArticleExtUrl(Document doc, Element article) { | |
1197ec1a NR |
147 | Elements els = article.getElementsByAttributeValue( |
148 | "data-event-action", "title"); | |
149 | if (els == null || els.isEmpty()) { | |
150 | els = article.getElementsByAttributeValue( | |
151 | "data-click-id", "body"); | |
152 | } | |
153 | ||
154 | Element url = els.first(); | |
b19b3632 NR |
155 | if (!url.attr("href").trim().startsWith("/")) { |
156 | return url.absUrl("href"); | |
157 | } | |
158 | ||
159 | return ""; | |
160 | } | |
161 | ||
162 | @Override | |
163 | protected String getArticleContent(Document doc, Element article) { | |
1197ec1a NR |
164 | Elements els = article.getElementsByClass("md"); |
165 | if (els != null && !els.isEmpty()) { | |
166 | return els.first().text().trim(); | |
167 | } | |
168 | ||
b19b3632 NR |
169 | return ""; |
170 | } | |
171 | ||
172 | @Override | |
173 | protected Element getFullArticle(Document doc) { | |
174 | return doc.getElementsByClass("ckueCN").first(); | |
175 | } | |
176 | ||
177 | @Override | |
178 | protected ElementProcessor getElementProcessorFullArticle() { | |
179 | return new BasicElementProcessor(); | |
180 | } | |
181 | ||
182 | @Override | |
183 | protected List<Element> getFullArticleCommentPosts(Document doc, URL intUrl) { | |
184 | return doc.getElementsByClass("jHfOJm"); | |
185 | } | |
186 | ||
187 | @Override | |
188 | protected List<Element> getCommentCommentPosts(Document doc, | |
189 | Element container) { | |
190 | List<Element> elements = new LinkedList<Element>(); | |
191 | for (Element el : container.children()) { | |
192 | elements.addAll(el.getElementsByClass("jHfOJm")); | |
193 | } | |
194 | ||
195 | return elements; | |
196 | } | |
197 | ||
198 | @Override | |
199 | protected String getCommentId(Element post) { | |
200 | int level = 1; | |
201 | Elements els = post.getElementsByClass("imyGpC"); | |
202 | if (els.size() > 0) { | |
203 | String l = els.first().text().trim() | |
204 | .replace("level ", ""); | |
205 | try { | |
206 | level = Integer.parseInt(l); | |
207 | } catch(NumberFormatException e) { | |
208 | } | |
209 | } | |
210 | ||
211 | return Integer.toString(level); | |
212 | } | |
213 | ||
214 | @Override | |
215 | protected String getCommentAuthor(Element post) { | |
216 | // Since we have no title, we switch with author | |
217 | return ""; | |
218 | } | |
219 | ||
220 | @Override | |
221 | protected String getCommentTitle(Element post) { | |
222 | // Since we have no title, we switch with author | |
223 | Elements els = post.getElementsByClass("RVnoX"); | |
224 | if (els.size() > 0) { | |
225 | return els.first().text().trim(); | |
226 | } | |
227 | ||
228 | els = post.getElementsByClass("kzePTH"); | |
229 | if (els.size() > 0) { | |
230 | return els.first().text().trim(); | |
231 | } | |
232 | ||
233 | return ""; | |
234 | } | |
235 | ||
236 | @Override | |
237 | protected String getCommentDate(Element post) { | |
238 | return post.getElementsByClass("hJDlLH") | |
239 | .first().text().trim(); | |
240 | } | |
241 | ||
242 | @Override | |
243 | protected Element getCommentContentElement(Element post) { | |
244 | return post.getElementsByClass("ckueCN") | |
245 | .first(); | |
246 | } | |
247 | ||
248 | @Override | |
249 | protected ElementProcessor getElementProcessorComment() { | |
250 | return new BasicElementProcessor(); | |
251 | } | |
252 | ||
253 | @Override | |
254 | public void fetch(Story story) throws IOException { | |
255 | super.fetch(story); | |
256 | ||
257 | List<Comment> comments = new LinkedList<Comment>(); | |
258 | Map<Integer, Comment> lastOfLevel = | |
259 | new HashMap<Integer, Comment>(); | |
260 | ||
261 | for (Comment c : story.getComments()) { | |
262 | int level = Integer.parseInt(c.getId()); | |
263 | lastOfLevel.put(level, c); | |
264 | if (level <= 1) { | |
265 | comments.add(c); | |
266 | } else { | |
267 | Comment parent = lastOfLevel.get(level - 1); | |
268 | if (parent != null ){ | |
269 | parent.add(c); | |
270 | } else { | |
271 | // bad data | |
272 | comments.add(c); | |
273 | } | |
274 | } | |
275 | } | |
276 | ||
277 | story.setComments(comments); | |
278 | } | |
279 | } |