Reddit: fix problem with new kind of html (wip)
authorNiki Roo <roo.niki@gmail.com>
Mon, 8 Oct 2018 13:57:45 +0000 (15:57 +0200)
committerNiki Roo <roo.niki@gmail.com>
Mon, 8 Oct 2018 13:57:45 +0000 (15:57 +0200)
src/be/nikiroo/gofetch/Fetcher.java
src/be/nikiroo/gofetch/support/BasicSupport.java
src/be/nikiroo/gofetch/support/Reddit.java

index cb77f657beb706dbf9e1fefc48cfc567e01d090a..bb4cf4d31f32a7266550cb9c46672368dd7a905e 100644 (file)
@@ -156,6 +156,7 @@ public class Fetcher {
                                fetchedStories.add(story);
                                System.err.println();
                        } catch (IOException e) {
+e.printStackTrace();
                                System.err.println(" Failed to get story!");
                        }
                        i++;
index ec19c3207fbbba7447da4ce5e419da9060f742b8..17a3c151750e9c7e56169fe4fd8be87b9eb39855 100644 (file)
@@ -133,9 +133,9 @@ public abstract class BasicSupport {
                                                id = "0" + id;
                                        }
                                } else {
-                                       id = date.replace(":", "_").replace("+", "_");
+                                       id = date.replace(":", "_").replace("+", "_").replace("/", "-");
                                }
-
+                               
                                date = date(date);
 
                                list.add(new Story(getType(), id, title, author, date, categ,
@@ -822,7 +822,7 @@ public abstract class BasicSupport {
                        Date dat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssXXX")
                                        .parse(date.trim());
                        return out.format(dat);
-               } catch (ParseException e) {
+               } catch (Exception e) {
                        return date;
                }
        }
index 6ae1a6c0ed611b1ec24d7816fcc00525b13f7bc8..3d27a466a265cd724e912b99b618ef03e9838c41 100644 (file)
@@ -15,6 +15,7 @@ import java.util.Map.Entry;
 import java.util.Map;
 import java.util.HashMap;
 import java.util.Date;
+import java.text.SimpleDateFormat;
 
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
@@ -45,7 +46,15 @@ public class Reddit extends BasicSupport {
 
        @Override
        protected List<Element> getArticles(Document doc) {
-               return doc.getElementsByClass("thing");
+               List<Element> list = doc.getElementsByClass("thing");
+               if (list.isEmpty()) {
+                       list = doc.getElementsByClass("Post");
+               }
+               if (list.isEmpty()) {
+                       list = doc.getElementsByClass("scrollerItem");
+               }
+               
+               return list;
        }
 
        @Override
@@ -56,8 +65,13 @@ public class Reddit extends BasicSupport {
 
        @Override
        protected String getArticleTitle(Document doc, Element article) {
-               return article.getElementsByAttributeValue(
-                       "data-event-action", "title").first().text().trim();
+               Elements els = article.getElementsByAttributeValue(
+                               "data-event-action", "title");
+               if (els == null || els.isEmpty()) {
+                       els = article.getElementsByTag("h2");
+               }
+               
+               return els.first().text().trim();
        }
        
        @Override
@@ -69,8 +83,32 @@ public class Reddit extends BasicSupport {
 
        @Override
        protected String getArticleDate(Document doc, Element article) {
-               return article.getElementsByClass("live-timestamp")
-                       .attr("datetime").trim();
+               Element el = article.getElementsByClass("live-timestamp").first();
+               if (el == null) {
+                       el = article.getElementsByAttributeValue(
+                               "data-click-id", "timestamp").first();
+               }
+               
+               String dateAgo = el.text().trim();
+               int h = 0;
+               if (dateAgo.endsWith("hour ago")) {
+                       h = 1;
+               } else if (dateAgo.endsWith("hours ago")) {
+                       dateAgo = dateAgo.replace("hours ago", "").trim();
+                       h = Integer.parseInt(dateAgo);
+               } else if (dateAgo.endsWith("day ago")) {
+                       h = 24;
+               } else if (dateAgo.endsWith("days ago")) {
+                       dateAgo = dateAgo.replace("days ago", "").trim();
+                       h = Integer.parseInt(dateAgo) * 24;
+               }
+               
+               long now = new Date().getTime();   // in ms since 1970
+               now = now / (1000l * 60l * 60l);   // in hours
+               long then = now - h;               // in hours
+               then = then * (60l * 60l); // in seconds
+               
+               return Long.toString(then);
        }
 
        @Override
@@ -95,14 +133,25 @@ public class Reddit extends BasicSupport {
 
        @Override
        protected String getArticleIntUrl(Document doc, Element article) {
-               return article.getElementsByClass("thing").first()
-                       .absUrl("data-permalink");
+               String url = article.absUrl("data-permalink");
+               if (url == null || url.isEmpty()) {
+                       url = article.getElementsByAttributeValue(
+                               "data-click-id", "timestamp").first().absUrl("href");
+               }
+               
+               return url;
        }
 
        @Override
        protected String getArticleExtUrl(Document doc, Element article) {
-               Element url = article.getElementsByAttributeValue(
-                       "data-event-action", "title").first();
+               Elements els = article.getElementsByAttributeValue(
+                       "data-event-action", "title");
+               if (els == null || els.isEmpty()) {
+                       els = article.getElementsByAttributeValue(
+                                       "data-click-id", "body");
+               }
+               
+               Element url = els.first();
                if (!url.attr("href").trim().startsWith("/")) {
                        return url.absUrl("href");
                }
@@ -112,6 +161,11 @@ public class Reddit extends BasicSupport {
 
        @Override
        protected String getArticleContent(Document doc, Element article) {
+               Elements els = article.getElementsByClass("md");
+               if (els != null && !els.isEmpty()) {
+                       return els.first().text().trim();
+               }
+               
                return "";
        }