Reddit: ID also use title
[gofetch.git] / src / be / nikiroo / gofetch / support / Reddit.java
index 6ae1a6c0ed611b1ec24d7816fcc00525b13f7bc8..2732894a4fe77168b21ccaeda8cb92d562382a95 100644 (file)
@@ -15,6 +15,7 @@ import java.util.Map.Entry;
 import java.util.Map;
 import java.util.HashMap;
 import java.util.Date;
+import java.text.SimpleDateFormat;
 
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
@@ -45,19 +46,39 @@ public class Reddit extends BasicSupport {
 
        @Override
        protected List<Element> getArticles(Document doc) {
-               return doc.getElementsByClass("thing");
+               List<Element> list = doc.getElementsByClass("thing");
+               if (list.isEmpty()) {
+                       list = doc.getElementsByClass("Post");
+               }
+               if (list.isEmpty()) {
+                       list = doc.getElementsByClass("scrollerItem");
+               }
+               
+               return list;
        }
 
        @Override
        protected String getArticleId(Document doc, Element article) {
-               // Use the date, Luke
-               return "";
+               String date = getArticleDate(doc, article);
+               String title = getArticleTitle(doc, article);
+               
+               String id = (date + "_" + title).replaceAll("[^a-zA-Z0-9_-]", "_");
+               if (id.length() > 40) {
+                       id = id.substring(0, 40);
+               }
+               
+               return id;
        }
 
        @Override
        protected String getArticleTitle(Document doc, Element article) {
-               return article.getElementsByAttributeValue(
-                       "data-event-action", "title").first().text().trim();
+               Elements els = article.getElementsByAttributeValue(
+                               "data-event-action", "title");
+               if (els == null || els.isEmpty()) {
+                       els = article.getElementsByTag("h2");
+               }
+               
+               return els.first().text().trim();
        }
        
        @Override
@@ -69,8 +90,14 @@ public class Reddit extends BasicSupport {
 
        @Override
        protected String getArticleDate(Document doc, Element article) {
-               return article.getElementsByClass("live-timestamp")
-                       .attr("datetime").trim();
+               Element el = article.getElementsByClass("live-timestamp").first();
+               if (el == null) {
+                       el = article.getElementsByAttributeValue(
+                               "data-click-id", "timestamp").first();
+               }
+               
+               String dateAgo = el.text().trim();
+               return new SimpleDateFormat("yyyy-MM-dd_HH-mm").format(getDate(dateAgo));
        }
 
        @Override
@@ -95,14 +122,25 @@ public class Reddit extends BasicSupport {
 
        @Override
        protected String getArticleIntUrl(Document doc, Element article) {
-               return article.getElementsByClass("thing").first()
-                       .absUrl("data-permalink");
+               String url = article.absUrl("data-permalink");
+               if (url == null || url.isEmpty()) {
+                       url = article.getElementsByAttributeValue(
+                               "data-click-id", "timestamp").first().absUrl("href");
+               }
+               
+               return url;
        }
 
        @Override
        protected String getArticleExtUrl(Document doc, Element article) {
-               Element url = article.getElementsByAttributeValue(
-                       "data-event-action", "title").first();
+               Elements els = article.getElementsByAttributeValue(
+                       "data-event-action", "title");
+               if (els == null || els.isEmpty()) {
+                       els = article.getElementsByAttributeValue(
+                                       "data-click-id", "body");
+               }
+               
+               Element url = els.first();
                if (!url.attr("href").trim().startsWith("/")) {
                        return url.absUrl("href");
                }
@@ -112,12 +150,23 @@ public class Reddit extends BasicSupport {
 
        @Override
        protected String getArticleContent(Document doc, Element article) {
+               Elements els = article.getElementsByClass("h2");
+               if (els != null && !els.isEmpty()) {
+                       return els.first().text().trim();
+               }
+               
                return "";
        }
 
        @Override
        protected Element getFullArticle(Document doc) {
-               return doc.getElementsByClass("ckueCN").first();
+               Element element = doc.getElementsByAttributeValue(
+                       "data-click-id", "body").first();
+               if (element == null) {
+                       element = doc.getElementsByClass("ckueCN").first();
+               }
+               
+               return element;
        }
 
        @Override
@@ -127,7 +176,12 @@ public class Reddit extends BasicSupport {
 
        @Override
        protected List<Element> getFullArticleCommentPosts(Document doc, URL intUrl) {
-               return doc.getElementsByClass("jHfOJm");
+               Elements posts = doc.getElementsByClass("jHfOJm");
+               if (posts.isEmpty()) {
+                       posts = doc.getElementsByClass("eCeBkc");
+               }
+               
+               return posts;
        }
 
        @Override
@@ -181,8 +235,9 @@ public class Reddit extends BasicSupport {
 
        @Override
        protected String getCommentDate(Element post) {
-               return post.getElementsByClass("hJDlLH")
+               String dateAgo = post.getElementsByClass("hJDlLH")
                        .first().text().trim();
+               return new SimpleDateFormat("yyyy-MM-dd_HH-mm").format(getDate(dateAgo));
        }
 
        @Override
@@ -222,4 +277,27 @@ public class Reddit extends BasicSupport {
                
                story.setComments(comments);
        }
+       
+       // 2 hours ago -> 18/10/2018 21:00
+       private Date getDate(String dateAgo) {
+               int h = 0;
+               if (dateAgo.endsWith("hour ago")) {
+                       h = 1;
+               } else if (dateAgo.endsWith("hours ago")) {
+                       dateAgo = dateAgo.replace("hours ago", "").trim();
+                       h = Integer.parseInt(dateAgo);
+               } else if (dateAgo.endsWith("day ago")) {
+                       h = 24;
+               } else if (dateAgo.endsWith("days ago")) {
+                       dateAgo = dateAgo.replace("days ago", "").trim();
+                       h = Integer.parseInt(dateAgo) * 24;
+               }
+               
+               long now = new Date().getTime();   // in ms since 1970
+               now = now / (1000l * 60l * 60l);   // in hours since 1970
+               long then = now - h;               // in hours since 1970
+               then = then * (1000l * 60l * 60l); // in ms since 1970
+               
+               return new Date(then);
+       }
 }