From 2d95a873f4dc858d9ed55fa1889b1cf29a78f5eb Mon Sep 17 00:00:00 2001 From: Niki Roo Date: Sun, 6 Aug 2017 13:22:50 +0200 Subject: [PATCH] Add pipedot support --- .../nikiroo/gofetch/support/BasicSupport.java | 7 +- src/be/nikiroo/gofetch/support/Pipedot.java | 182 ++++++++++++++++++ 2 files changed, 187 insertions(+), 2 deletions(-) create mode 100644 src/be/nikiroo/gofetch/support/Pipedot.java diff --git a/src/be/nikiroo/gofetch/support/BasicSupport.java b/src/be/nikiroo/gofetch/support/BasicSupport.java index 1105df5..daa0ed1 100644 --- a/src/be/nikiroo/gofetch/support/BasicSupport.java +++ b/src/be/nikiroo/gofetch/support/BasicSupport.java @@ -12,7 +12,7 @@ import be.nikiroo.gofetch.data.Story; public abstract class BasicSupport { public enum Type { - SLASHDOT, + SLASHDOT, PIPEDOT, } static private String preselector; @@ -24,7 +24,7 @@ public abstract class BasicSupport { abstract public List getComments(Story story) throws IOException; abstract public String getDescription(); - + public String getSelector() { return getSelector(type); } @@ -53,6 +53,9 @@ public abstract class BasicSupport { case SLASHDOT: support = new Slashdot(); break; + case PIPEDOT: + support = new Pipedot(); + break; } if (support != null) { diff --git a/src/be/nikiroo/gofetch/support/Pipedot.java b/src/be/nikiroo/gofetch/support/Pipedot.java new file mode 100644 index 0000000..4d68fe7 --- /dev/null +++ b/src/be/nikiroo/gofetch/support/Pipedot.java @@ -0,0 +1,182 @@ +package be.nikiroo.gofetch.support; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; + +import org.jsoup.helper.DataUtil; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import be.nikiroo.gofetch.data.Comment; +import be.nikiroo.gofetch.data.Story; + +/** + * Support https://pipedot.org/. + * + * @author niki + */ +public class Pipedot extends BasicSupport { + @Override + public String getDescription() { + return "Pipedot: News for nerds, without the corporate slant"; + } + + @Override + public List list() throws IOException { + List list = new ArrayList(); + + URL url = new URL("https://pipedot.org/"); + InputStream in = open(url); + Document doc = DataUtil.load(in, "UTF-8", url.toString()); + Elements stories = doc.getElementsByClass("story"); + for (Element story : stories) { + Elements titles = story.getElementsByTag("h1"); + if (titles.size() == 0) { + continue; + } + + Element title = titles.get(0); + + String id = ""; + for (Element idElem : story.getElementsByTag("a")) { + if (idElem.attr("href").startsWith("/pipe/")) { + id = idElem.attr("href").substring("/pipe/".length()); + break; + } + } + + String intUrl = null; + String extUrl = null; + + Elements links = story.getElementsByTag("a"); + if (links.size() > 0) { + intUrl = links.get(0).absUrl("href"); + } + + // Take first ext URL as original source + for (Element link : links) { + String uuu = link.absUrl("href"); + if (!uuu.isEmpty() && !uuu.contains("pipedot.org/")) { + extUrl = uuu; + break; + } + } + + String details = ""; + Elements detailsElements = story.getElementsByTag("div"); + if (detailsElements.size() > 0) { + details = detailsElements.get(0).text(); + } + + String body = ""; + for (Element elem : story.children()) { + String tag = elem.tag().toString(); + if (!tag.equals("header") && !tag.equals("footer")) { + body = elem.text(); + break; + } + } + + list.add(new Story(getType(), id, title.text(), details, intUrl, + extUrl, body)); + } + + return list; + } + + @Override + public List getComments(Story story) throws IOException { + List comments = new ArrayList(); + + URL url = new URL(story.getUrlInternal()); + InputStream in = open(url); + Document doc = DataUtil.load(in, "UTF-8", url.toString()); + Elements listing = doc.getElementsByTag("main"); + if (listing.size() > 0) { + comments.addAll(getComments(listing.get(0))); + } + + return comments; + } + + private List getComments(Element listing) { + List comments = new ArrayList(); + for (Element commentElement : listing.children()) { + if (commentElement.hasClass("comment")) { + Comment comment = getComment(commentElement); + if (!comment.isEmpty()) { + comments.add(comment); + } + } + } + return comments; + } + + private Comment getComment(Element commentElement) { + String title = firstOrEmptyTag(commentElement, "h3"); + String author = firstOrEmpty(commentElement, "h4"); + String content = firstOrEmpty(commentElement, "comment-body"); + + String date = ""; + int pos = author.lastIndexOf(" on "); + if (pos >= 0) { + date = author.substring(pos + " on ".length()).trim(); + author = author.substring(0, pos).trim(); + } + + Comment comment = new Comment(commentElement.id(), author, title, date, + content); + + Elements commentOutline = commentElement + .getElementsByClass("comment-outline"); + if (commentOutline.size() > 0) { + comment.addAll(getComments(commentOutline.get(0))); + } + + return comment; + } + + /** + * Get the first element of the given class, or an empty {@link String} if + * none found. + * + * @param element + * the element to look in + * @param className + * the class to look for + * + * @return the value or an empty {@link String} + */ + private String firstOrEmpty(Element element, String className) { + Elements subElements = element.getElementsByClass(className); + if (subElements.size() > 0) { + return subElements.get(0).text(); + } + + return ""; + } + + /** + * Get the first element of the given tag, or an empty {@link String} if + * none found. + * + * @param element + * the element to look in + * @param tagName + * the tag to look for + * + * @return the value or an empty {@link String} + */ + private String firstOrEmptyTag(Element element, String tagName) { + Elements subElements = element.getElementsByTag(tagName); + if (subElements.size() > 0) { + return subElements.get(0).text(); + } + + return ""; + } +} -- 2.27.0