From b19b36322d5453c1b01761dd76190a67465e0d53 Mon Sep 17 00:00:00 2001 From: Niki Roo Date: Fri, 5 Oct 2018 14:24:31 +0200 Subject: [PATCH] New: reddit linux_gaming --- .../nikiroo/gofetch/support/BasicSupport.java | 3 + src/be/nikiroo/gofetch/support/Reddit.java | 225 ++++++++++++++++++ src/be/nikiroo/gofetch/support/Type.java | 2 + src/be/nikiroo/gofetch/test/Test.java | 1 + src/be/nikiroo/gofetch/test/TestReddit.java | 30 +++ 5 files changed, 261 insertions(+) create mode 100644 src/be/nikiroo/gofetch/support/Reddit.java create mode 100644 src/be/nikiroo/gofetch/test/TestReddit.java diff --git a/src/be/nikiroo/gofetch/support/BasicSupport.java b/src/be/nikiroo/gofetch/support/BasicSupport.java index 42761c9..ec19c32 100644 --- a/src/be/nikiroo/gofetch/support/BasicSupport.java +++ b/src/be/nikiroo/gofetch/support/BasicSupport.java @@ -601,6 +601,9 @@ public abstract class BasicSupport { case SEPT_SUR_SEPT: support = new SeptSurSept(); break; + case REDDIT: + support = new Reddit(); + break; } if (support != null) { diff --git a/src/be/nikiroo/gofetch/support/Reddit.java b/src/be/nikiroo/gofetch/support/Reddit.java new file mode 100644 index 0000000..6ae1a6c --- /dev/null +++ b/src/be/nikiroo/gofetch/support/Reddit.java @@ -0,0 +1,225 @@ +package be.nikiroo.gofetch.support; + +import be.nikiroo.gofetch.data.Story; +import be.nikiroo.gofetch.data.Comment; + +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.net.URL; +import java.net.URLDecoder; +import java.util.AbstractMap; +import java.util.ArrayList; +import java.util.List; +import java.util.LinkedList; +import java.util.Map.Entry; +import java.util.Map; +import java.util.HashMap; +import java.util.Date; + +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.nodes.Node; +import org.jsoup.select.Elements; + +/** + * Support https://www.reddit.com/. + * + * @author niki + */ +public class Reddit extends BasicSupport { + @Override + public String getDescription() { + return "Reddit: The front page of the internet"; + } + + @Override + protected List> getUrls() throws IOException { + List> urls = new ArrayList>(); + String base = "https://www.reddit.com/r/"; + urls.add(new AbstractMap.SimpleEntry( + new URL(base + "linux_gaming" + "/new/"), "linux_gaming" + )); + + return urls; + } + + @Override + protected List getArticles(Document doc) { + return doc.getElementsByClass("thing"); + } + + @Override + protected String getArticleId(Document doc, Element article) { + // Use the date, Luke + return ""; + } + + @Override + protected String getArticleTitle(Document doc, Element article) { + return article.getElementsByAttributeValue( + "data-event-action", "title").first().text().trim(); + } + + @Override + protected String getArticleAuthor(Document doc, Element article) { + return article.getElementsByAttributeValueStarting( + "href", "/user/" + ).text().trim(); + } + + @Override + protected String getArticleDate(Document doc, Element article) { + return article.getElementsByClass("live-timestamp") + .attr("datetime").trim(); + } + + @Override + protected String getArticleCategory(Document doc, Element article, + String currentCategory) { + Elements categEls = article.getElementsByAttributeValueStarting( + "href", "/r/" + currentCategory + "/search=?q=flair_name" + ); + + if (categEls.size() > 0) { + return currentCategory + ", " + + categEls.first().text().trim(); + } + + return currentCategory; + } + + @Override + protected String getArticleDetails(Document doc, Element article) { + return ""; + } + + @Override + protected String getArticleIntUrl(Document doc, Element article) { + return article.getElementsByClass("thing").first() + .absUrl("data-permalink"); + } + + @Override + protected String getArticleExtUrl(Document doc, Element article) { + Element url = article.getElementsByAttributeValue( + "data-event-action", "title").first(); + if (!url.attr("href").trim().startsWith("/")) { + return url.absUrl("href"); + } + + return ""; + } + + @Override + protected String getArticleContent(Document doc, Element article) { + return ""; + } + + @Override + protected Element getFullArticle(Document doc) { + return doc.getElementsByClass("ckueCN").first(); + } + + @Override + protected ElementProcessor getElementProcessorFullArticle() { + return new BasicElementProcessor(); + } + + @Override + protected List getFullArticleCommentPosts(Document doc, URL intUrl) { + return doc.getElementsByClass("jHfOJm"); + } + + @Override + protected List getCommentCommentPosts(Document doc, + Element container) { + List elements = new LinkedList(); + for (Element el : container.children()) { + elements.addAll(el.getElementsByClass("jHfOJm")); + } + + return elements; + } + + @Override + protected String getCommentId(Element post) { + int level = 1; + Elements els = post.getElementsByClass("imyGpC"); + if (els.size() > 0) { + String l = els.first().text().trim() + .replace("level ", ""); + try { + level = Integer.parseInt(l); + } catch(NumberFormatException e) { + } + } + + return Integer.toString(level); + } + + @Override + protected String getCommentAuthor(Element post) { + // Since we have no title, we switch with author + return ""; + } + + @Override + protected String getCommentTitle(Element post) { + // Since we have no title, we switch with author + Elements els = post.getElementsByClass("RVnoX"); + if (els.size() > 0) { + return els.first().text().trim(); + } + + els = post.getElementsByClass("kzePTH"); + if (els.size() > 0) { + return els.first().text().trim(); + } + + return ""; + } + + @Override + protected String getCommentDate(Element post) { + return post.getElementsByClass("hJDlLH") + .first().text().trim(); + } + + @Override + protected Element getCommentContentElement(Element post) { + return post.getElementsByClass("ckueCN") + .first(); + } + + @Override + protected ElementProcessor getElementProcessorComment() { + return new BasicElementProcessor(); + } + + @Override + public void fetch(Story story) throws IOException { + super.fetch(story); + + List comments = new LinkedList(); + Map lastOfLevel = + new HashMap(); + + for (Comment c : story.getComments()) { + int level = Integer.parseInt(c.getId()); + lastOfLevel.put(level, c); + if (level <= 1) { + comments.add(c); + } else { + Comment parent = lastOfLevel.get(level - 1); + if (parent != null ){ + parent.add(c); + } else { + // bad data + comments.add(c); + } + } + } + + story.setComments(comments); + } +} diff --git a/src/be/nikiroo/gofetch/support/Type.java b/src/be/nikiroo/gofetch/support/Type.java index 614022c..a90cb44 100644 --- a/src/be/nikiroo/gofetch/support/Type.java +++ b/src/be/nikiroo/gofetch/support/Type.java @@ -24,4 +24,6 @@ public enum Type { PHORONIX, /** FR: Any */ SEPT_SUR_SEPT, + /** EN: IT */ + REDDIT, } diff --git a/src/be/nikiroo/gofetch/test/Test.java b/src/be/nikiroo/gofetch/test/Test.java index 0151ae2..b671483 100644 --- a/src/be/nikiroo/gofetch/test/Test.java +++ b/src/be/nikiroo/gofetch/test/Test.java @@ -14,6 +14,7 @@ public class Test extends TestLauncher { addTest(new TestSlashdot(args)); addTest(new TestTooLinux(args)); addTest(new TestSeptSurSept(args)); + addTest(new TestReddit(args)); } public static void main(String[] args) { diff --git a/src/be/nikiroo/gofetch/test/TestReddit.java b/src/be/nikiroo/gofetch/test/TestReddit.java new file mode 100644 index 0000000..c79b8f5 --- /dev/null +++ b/src/be/nikiroo/gofetch/test/TestReddit.java @@ -0,0 +1,30 @@ + +package be.nikiroo.gofetch.test; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; + +import be.nikiroo.gofetch.support.Reddit; +import be.nikiroo.gofetch.support.Type; + +public class TestReddit extends TestBase { + static private Type type = Type.REDDIT; + static private TestBase base = null; + + public TestReddit(String[] args) { + super(new Reddit() { + @Override + protected InputStream open(URL url) throws IOException { + return base.download(url); + } + + @Override + public Type getType() { + return type; + } + }, args); + + base = this; + } +} -- 2.27.0