1 package be
.nikiroo
.gofetch
.support
;
3 import java
.io
.IOException
;
5 import java
.text
.SimpleDateFormat
;
6 import java
.util
.AbstractMap
;
7 import java
.util
.ArrayList
;
9 import java
.util
.HashMap
;
10 import java
.util
.LinkedList
;
11 import java
.util
.List
;
13 import java
.util
.Map
.Entry
;
15 import org
.jsoup
.nodes
.Document
;
16 import org
.jsoup
.nodes
.Element
;
17 import org
.jsoup
.select
.Elements
;
19 import be
.nikiroo
.gofetch
.data
.Comment
;
20 import be
.nikiroo
.gofetch
.data
.Story
;
23 * Support <a href="https://www.reddit.com/">https://www.reddit.com/</a>.
27 public class Reddit
extends BasicSupport
{
29 public String
getDescription() {
30 return "Reddit: The front page of the internet";
34 protected List
<Entry
<URL
, String
>> getUrls() throws IOException
{
35 List
<Entry
<URL
, String
>> urls
= new ArrayList
<Entry
<URL
, String
>>();
36 String base
= "https://www.reddit.com/r/";
37 urls
.add(new AbstractMap
.SimpleEntry
<URL
, String
>(new URL(base
38 + "linux_gaming" + "/new/"), "linux_gaming"));
44 protected List
<Element
> getArticles(Document doc
) {
45 List
<Element
> list
= doc
.getElementsByClass("thing");
47 list
= doc
.getElementsByClass("Post");
50 list
= doc
.getElementsByClass("scrollerItem");
57 protected String
getArticleId(Document doc
, Element article
) {
58 String date
= getArticleDate(doc
, article
);
59 String title
= getArticleTitle(doc
, article
);
61 String id
= (date
+ "_" + title
).replaceAll("[^a-zA-Z0-9_-]", "_");
62 if (id
.length() > 40) {
63 id
= id
.substring(0, 40);
70 protected String
getArticleTitle(Document doc
, Element article
) {
71 Elements els
= article
.getElementsByAttributeValue("data-event-action",
73 if (els
== null || els
.isEmpty()) {
74 els
= article
.getElementsByTag("h2");
77 return els
.first().text().trim();
81 protected String
getArticleAuthor(Document doc
, Element article
) {
82 return article
.getElementsByAttributeValueStarting("href", "/user/")
87 protected String
getArticleDate(Document doc
, Element article
) {
88 Element el
= article
.getElementsByClass("live-timestamp").first();
90 el
= article
.getElementsByAttributeValue("data-click-id",
94 String dateAgo
= el
.text().trim();
95 return new SimpleDateFormat("yyyy-MM-dd_HH-mm")
96 .format(getDate(dateAgo
));
100 protected String
getArticleCategory(Document doc
, Element article
,
101 String currentCategory
) {
102 Elements categEls
= article
.getElementsByAttributeValueStarting("href",
103 "/r/" + currentCategory
+ "/search=?q=flair_name");
105 if (categEls
.size() > 0) {
106 return currentCategory
+ ", " + categEls
.first().text().trim();
109 return currentCategory
;
113 protected String
getArticleDetails(Document doc
, Element article
) {
118 protected String
getArticleIntUrl(Document doc
, Element article
) {
119 String url
= article
.absUrl("data-permalink");
120 if (url
== null || url
.isEmpty()) {
122 .getElementsByAttributeValue("data-click-id", "timestamp")
123 .first().absUrl("href");
130 protected String
getArticleExtUrl(Document doc
, Element article
) {
131 Elements els
= article
.getElementsByAttributeValue("data-event-action",
133 if (els
== null || els
.isEmpty()) {
134 els
= article
.getElementsByAttributeValue("data-click-id", "body");
137 Element url
= els
.first();
138 if (!url
.attr("href").trim().startsWith("/")) {
139 return url
.absUrl("href");
146 protected String
getArticleContent(Document doc
, Element article
) {
147 Elements els
= article
.getElementsByClass("h2");
148 if (els
!= null && !els
.isEmpty()) {
149 return els
.first().text().trim();
156 protected Element
getFullArticle(Document doc
) {
157 Element element
= doc
.getElementsByAttributeValue("data-click-id",
159 if (element
== null) {
160 element
= doc
.getElementsByClass("ckueCN").first();
167 protected ElementProcessor
getElementProcessorFullArticle() {
168 return new BasicElementProcessor();
172 protected List
<Element
> getFullArticleCommentPosts(Document doc
, URL intUrl
) {
173 Elements posts
= doc
.getElementsByClass("jHfOJm");
174 if (posts
.isEmpty()) {
175 posts
= doc
.getElementsByClass("eCeBkc");
177 if (posts
.isEmpty()) {
178 posts
= doc
.getElementsByClass("gxtxxZ");
185 protected List
<Element
> getCommentCommentPosts(Document doc
,
187 List
<Element
> elements
= new LinkedList
<Element
>();
188 for (Element el
: container
.children()) {
189 elements
.addAll(el
.getElementsByClass("jHfOJm"));
193 if (elements
.isEmpty()) {
194 for (Element el
: container
.children()) {
195 elements
.addAll(el
.getElementsByClass("s1ook3io-0"));
196 elements
.addAll(el
.getElementsByClass("s1ook3io-1"));
197 elements
.addAll(el
.getElementsByClass("s1ook3io-2"));
198 elements
.addAll(el
.getElementsByClass("s1ook3io-3"));
199 elements
.addAll(el
.getElementsByClass("s1ook3io-4"));
200 elements
.addAll(el
.getElementsByClass("s1ook3io-5"));
208 protected String
getCommentId(Element post
) {
210 Elements els
= post
.getElementsByClass("imyGpC");
212 els
.addAll(post
.getElementsByClass("emJXdb"));
214 if (!els
.isEmpty()) {
215 String l
= els
.first().text().trim().replace("level ", "");
217 level
= Integer
.parseInt(l
);
218 } catch (NumberFormatException e
) {
222 return Integer
.toString(level
);
226 protected String
getCommentAuthor(Element post
) {
227 // Since we have no title, we switch with author
232 protected String
getCommentTitle(Element post
) {
233 // Since we have no title, we switch with author
235 Element authorEl
= post
.getElementsByClass("RVnoX").first();
236 if (authorEl
== null)
237 authorEl
= post
.getElementsByClass("kzePTH").first();
238 if (authorEl
== null)
239 authorEl
= post
.getElementsByClass("jczTlv").first();
241 if (authorEl
!= null)
242 return authorEl
.text().trim();
248 protected String
getCommentDate(Element post
) {
249 Element elAgo
= post
.getElementsByClass("hJDlLH").first();
251 elAgo
= post
.getElementsByClass("hDplaG").first();
254 String dateAgo
= elAgo
.text().trim();
255 return new SimpleDateFormat("yyyy-MM-dd_HH-mm")
256 .format(getDate(dateAgo
));
263 protected Element
getCommentContentElement(Element post
) {
264 return post
.getElementsByClass("ckueCN").first();
268 protected ElementProcessor
getElementProcessorComment() {
269 return new BasicElementProcessor();
273 public void fetch(Story story
) throws IOException
{
276 List
<Comment
> comments
= new LinkedList
<Comment
>();
277 Map
<Integer
, Comment
> lastOfLevel
= new HashMap
<Integer
, Comment
>();
279 for (Comment c
: story
.getComments()) {
280 int level
= Integer
.parseInt(c
.getId());
281 lastOfLevel
.put(level
, c
);
285 Comment parent
= lastOfLevel
.get(level
- 1);
286 if (parent
!= null) {
295 story
.setComments(comments
);
298 // 2 hours ago -> 18/10/2018 21:00
299 private Date
getDate(String dateAgo
) {
301 if (dateAgo
.endsWith("hour ago")) {
303 } else if (dateAgo
.endsWith("hours ago")) {
304 dateAgo
= dateAgo
.replace("hours ago", "").trim();
305 h
= Integer
.parseInt(dateAgo
);
306 } else if (dateAgo
.endsWith("day ago")) {
308 } else if (dateAgo
.endsWith("days ago")) {
309 dateAgo
= dateAgo
.replace("days ago", "").trim();
310 h
= Integer
.parseInt(dateAgo
) * 24;
313 long now
= new Date().getTime(); // in ms since 1970
314 now
= now
/ (1000l * 60l * 60l); // in hours since 1970
315 long then
= now
- h
; // in hours since 1970
316 then
= then
* (1000l * 60l * 60l); // in ms since 1970
318 return new Date(then
);