X-Git-Url: http://git.nikiroo.be/?a=blobdiff_plain;f=src%2Fbe%2Fnikiroo%2Fgofetch%2Fsupport%2FReddit.java;h=2732894a4fe77168b21ccaeda8cb92d562382a95;hb=7273fd5890478d6ec1f3c566e0c5e4640ab79f15;hp=6ae1a6c0ed611b1ec24d7816fcc00525b13f7bc8;hpb=b19b36322d5453c1b01761dd76190a67465e0d53;p=gofetch.git diff --git a/src/be/nikiroo/gofetch/support/Reddit.java b/src/be/nikiroo/gofetch/support/Reddit.java index 6ae1a6c..2732894 100644 --- a/src/be/nikiroo/gofetch/support/Reddit.java +++ b/src/be/nikiroo/gofetch/support/Reddit.java @@ -15,6 +15,7 @@ import java.util.Map.Entry; import java.util.Map; import java.util.HashMap; import java.util.Date; +import java.text.SimpleDateFormat; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; @@ -45,19 +46,39 @@ public class Reddit extends BasicSupport { @Override protected List getArticles(Document doc) { - return doc.getElementsByClass("thing"); + List list = doc.getElementsByClass("thing"); + if (list.isEmpty()) { + list = doc.getElementsByClass("Post"); + } + if (list.isEmpty()) { + list = doc.getElementsByClass("scrollerItem"); + } + + return list; } @Override protected String getArticleId(Document doc, Element article) { - // Use the date, Luke - return ""; + String date = getArticleDate(doc, article); + String title = getArticleTitle(doc, article); + + String id = (date + "_" + title).replaceAll("[^a-zA-Z0-9_-]", "_"); + if (id.length() > 40) { + id = id.substring(0, 40); + } + + return id; } @Override protected String getArticleTitle(Document doc, Element article) { - return article.getElementsByAttributeValue( - "data-event-action", "title").first().text().trim(); + Elements els = article.getElementsByAttributeValue( + "data-event-action", "title"); + if (els == null || els.isEmpty()) { + els = article.getElementsByTag("h2"); + } + + return els.first().text().trim(); } @Override @@ -69,8 +90,14 @@ public class Reddit extends BasicSupport { @Override protected String getArticleDate(Document doc, Element article) { - return article.getElementsByClass("live-timestamp") - .attr("datetime").trim(); + Element el = article.getElementsByClass("live-timestamp").first(); + if (el == null) { + el = article.getElementsByAttributeValue( + "data-click-id", "timestamp").first(); + } + + String dateAgo = el.text().trim(); + return new SimpleDateFormat("yyyy-MM-dd_HH-mm").format(getDate(dateAgo)); } @Override @@ -95,14 +122,25 @@ public class Reddit extends BasicSupport { @Override protected String getArticleIntUrl(Document doc, Element article) { - return article.getElementsByClass("thing").first() - .absUrl("data-permalink"); + String url = article.absUrl("data-permalink"); + if (url == null || url.isEmpty()) { + url = article.getElementsByAttributeValue( + "data-click-id", "timestamp").first().absUrl("href"); + } + + return url; } @Override protected String getArticleExtUrl(Document doc, Element article) { - Element url = article.getElementsByAttributeValue( - "data-event-action", "title").first(); + Elements els = article.getElementsByAttributeValue( + "data-event-action", "title"); + if (els == null || els.isEmpty()) { + els = article.getElementsByAttributeValue( + "data-click-id", "body"); + } + + Element url = els.first(); if (!url.attr("href").trim().startsWith("/")) { return url.absUrl("href"); } @@ -112,12 +150,23 @@ public class Reddit extends BasicSupport { @Override protected String getArticleContent(Document doc, Element article) { + Elements els = article.getElementsByClass("h2"); + if (els != null && !els.isEmpty()) { + return els.first().text().trim(); + } + return ""; } @Override protected Element getFullArticle(Document doc) { - return doc.getElementsByClass("ckueCN").first(); + Element element = doc.getElementsByAttributeValue( + "data-click-id", "body").first(); + if (element == null) { + element = doc.getElementsByClass("ckueCN").first(); + } + + return element; } @Override @@ -127,7 +176,12 @@ public class Reddit extends BasicSupport { @Override protected List getFullArticleCommentPosts(Document doc, URL intUrl) { - return doc.getElementsByClass("jHfOJm"); + Elements posts = doc.getElementsByClass("jHfOJm"); + if (posts.isEmpty()) { + posts = doc.getElementsByClass("eCeBkc"); + } + + return posts; } @Override @@ -181,8 +235,9 @@ public class Reddit extends BasicSupport { @Override protected String getCommentDate(Element post) { - return post.getElementsByClass("hJDlLH") + String dateAgo = post.getElementsByClass("hJDlLH") .first().text().trim(); + return new SimpleDateFormat("yyyy-MM-dd_HH-mm").format(getDate(dateAgo)); } @Override @@ -222,4 +277,27 @@ public class Reddit extends BasicSupport { story.setComments(comments); } + + // 2 hours ago -> 18/10/2018 21:00 + private Date getDate(String dateAgo) { + int h = 0; + if (dateAgo.endsWith("hour ago")) { + h = 1; + } else if (dateAgo.endsWith("hours ago")) { + dateAgo = dateAgo.replace("hours ago", "").trim(); + h = Integer.parseInt(dateAgo); + } else if (dateAgo.endsWith("day ago")) { + h = 24; + } else if (dateAgo.endsWith("days ago")) { + dateAgo = dateAgo.replace("days ago", "").trim(); + h = Integer.parseInt(dateAgo) * 24; + } + + long now = new Date().getTime(); // in ms since 1970 + now = now / (1000l * 60l * 60l); // in hours since 1970 + long then = now - h; // in hours since 1970 + then = then * (1000l * 60l * 60l); // in ms since 1970 + + return new Date(then); + } }