1 package be
.nikiroo
.gofetch
.support
;
3 import be
.nikiroo
.gofetch
.data
.Story
;
4 import be
.nikiroo
.gofetch
.data
.Comment
;
6 import java
.io
.IOException
;
7 import java
.io
.UnsupportedEncodingException
;
9 import java
.net
.URLDecoder
;
10 import java
.util
.AbstractMap
;
11 import java
.util
.ArrayList
;
12 import java
.util
.List
;
13 import java
.util
.LinkedList
;
14 import java
.util
.Map
.Entry
;
16 import java
.util
.HashMap
;
17 import java
.util
.Date
;
18 import java
.text
.SimpleDateFormat
;
20 import org
.jsoup
.nodes
.Document
;
21 import org
.jsoup
.nodes
.Element
;
22 import org
.jsoup
.nodes
.Node
;
23 import org
.jsoup
.select
.Elements
;
26 * Support <a href="https://www.reddit.com/">https://www.reddit.com/</a>.
30 public class Reddit
extends BasicSupport
{
32 public String
getDescription() {
33 return "Reddit: The front page of the internet";
37 protected List
<Entry
<URL
, String
>> getUrls() throws IOException
{
38 List
<Entry
<URL
, String
>> urls
= new ArrayList
<Entry
<URL
, String
>>();
39 String base
= "https://www.reddit.com/r/";
40 urls
.add(new AbstractMap
.SimpleEntry
<URL
, String
>(
41 new URL(base
+ "linux_gaming" + "/new/"), "linux_gaming"
48 protected List
<Element
> getArticles(Document doc
) {
49 List
<Element
> list
= doc
.getElementsByClass("thing");
51 list
= doc
.getElementsByClass("Post");
54 list
= doc
.getElementsByClass("scrollerItem");
61 protected String
getArticleId(Document doc
, Element article
) {
62 String date
= getArticleDate(doc
, article
);
63 String title
= getArticleTitle(doc
, article
);
65 String id
= (date
+ "_" + title
).replaceAll("[^a-zA-Z0-9_-]", "_");
66 if (id
.length() > 40) {
67 id
= id
.substring(0, 40);
74 protected String
getArticleTitle(Document doc
, Element article
) {
75 Elements els
= article
.getElementsByAttributeValue(
76 "data-event-action", "title");
77 if (els
== null || els
.isEmpty()) {
78 els
= article
.getElementsByTag("h2");
81 return els
.first().text().trim();
85 protected String
getArticleAuthor(Document doc
, Element article
) {
86 return article
.getElementsByAttributeValueStarting(
92 protected String
getArticleDate(Document doc
, Element article
) {
93 Element el
= article
.getElementsByClass("live-timestamp").first();
95 el
= article
.getElementsByAttributeValue(
96 "data-click-id", "timestamp").first();
99 String dateAgo
= el
.text().trim();
100 return new SimpleDateFormat("yyyy-MM-dd_HH-mm").format(getDate(dateAgo
));
104 protected String
getArticleCategory(Document doc
, Element article
,
105 String currentCategory
) {
106 Elements categEls
= article
.getElementsByAttributeValueStarting(
107 "href", "/r/" + currentCategory
+ "/search=?q=flair_name"
110 if (categEls
.size() > 0) {
111 return currentCategory
+ ", "
112 + categEls
.first().text().trim();
115 return currentCategory
;
119 protected String
getArticleDetails(Document doc
, Element article
) {
124 protected String
getArticleIntUrl(Document doc
, Element article
) {
125 String url
= article
.absUrl("data-permalink");
126 if (url
== null || url
.isEmpty()) {
127 url
= article
.getElementsByAttributeValue(
128 "data-click-id", "timestamp").first().absUrl("href");
135 protected String
getArticleExtUrl(Document doc
, Element article
) {
136 Elements els
= article
.getElementsByAttributeValue(
137 "data-event-action", "title");
138 if (els
== null || els
.isEmpty()) {
139 els
= article
.getElementsByAttributeValue(
140 "data-click-id", "body");
143 Element url
= els
.first();
144 if (!url
.attr("href").trim().startsWith("/")) {
145 return url
.absUrl("href");
152 protected String
getArticleContent(Document doc
, Element article
) {
153 Elements els
= article
.getElementsByClass("h2");
154 if (els
!= null && !els
.isEmpty()) {
155 return els
.first().text().trim();
162 protected Element
getFullArticle(Document doc
) {
163 Element element
= doc
.getElementsByAttributeValue(
164 "data-click-id", "body").first();
165 if (element
== null) {
166 element
= doc
.getElementsByClass("ckueCN").first();
173 protected ElementProcessor
getElementProcessorFullArticle() {
174 return new BasicElementProcessor();
178 protected List
<Element
> getFullArticleCommentPosts(Document doc
, URL intUrl
) {
179 Elements posts
= doc
.getElementsByClass("jHfOJm");
180 if (posts
.isEmpty()) {
181 posts
= doc
.getElementsByClass("eCeBkc");
188 protected List
<Element
> getCommentCommentPosts(Document doc
,
190 List
<Element
> elements
= new LinkedList
<Element
>();
191 for (Element el
: container
.children()) {
192 elements
.addAll(el
.getElementsByClass("jHfOJm"));
199 protected String
getCommentId(Element post
) {
201 Elements els
= post
.getElementsByClass("imyGpC");
202 if (els
.size() > 0) {
203 String l
= els
.first().text().trim()
204 .replace("level ", "");
206 level
= Integer
.parseInt(l
);
207 } catch(NumberFormatException e
) {
211 return Integer
.toString(level
);
215 protected String
getCommentAuthor(Element post
) {
216 // Since we have no title, we switch with author
221 protected String
getCommentTitle(Element post
) {
222 // Since we have no title, we switch with author
223 Elements els
= post
.getElementsByClass("RVnoX");
224 if (els
.size() > 0) {
225 return els
.first().text().trim();
228 els
= post
.getElementsByClass("kzePTH");
229 if (els
.size() > 0) {
230 return els
.first().text().trim();
237 protected String
getCommentDate(Element post
) {
238 String dateAgo
= post
.getElementsByClass("hJDlLH")
239 .first().text().trim();
240 return new SimpleDateFormat("yyyy-MM-dd_HH-mm").format(getDate(dateAgo
));
244 protected Element
getCommentContentElement(Element post
) {
245 return post
.getElementsByClass("ckueCN")
250 protected ElementProcessor
getElementProcessorComment() {
251 return new BasicElementProcessor();
255 public void fetch(Story story
) throws IOException
{
258 List
<Comment
> comments
= new LinkedList
<Comment
>();
259 Map
<Integer
, Comment
> lastOfLevel
=
260 new HashMap
<Integer
, Comment
>();
262 for (Comment c
: story
.getComments()) {
263 int level
= Integer
.parseInt(c
.getId());
264 lastOfLevel
.put(level
, c
);
268 Comment parent
= lastOfLevel
.get(level
- 1);
269 if (parent
!= null ){
278 story
.setComments(comments
);
281 // 2 hours ago -> 18/10/2018 21:00
282 private Date
getDate(String dateAgo
) {
284 if (dateAgo
.endsWith("hour ago")) {
286 } else if (dateAgo
.endsWith("hours ago")) {
287 dateAgo
= dateAgo
.replace("hours ago", "").trim();
288 h
= Integer
.parseInt(dateAgo
);
289 } else if (dateAgo
.endsWith("day ago")) {
291 } else if (dateAgo
.endsWith("days ago")) {
292 dateAgo
= dateAgo
.replace("days ago", "").trim();
293 h
= Integer
.parseInt(dateAgo
) * 24;
296 long now
= new Date().getTime(); // in ms since 1970
297 now
= now
/ (1000l * 60l * 60l); // in hours since 1970
298 long then
= now
- h
; // in hours since 1970
299 then
= then
* (1000l * 60l * 60l); // in ms since 1970
301 return new Date(then
);