package be.nikiroo.gofetch.support; import java.io.IOException; import java.net.URL; import java.util.AbstractMap; import java.util.ArrayList; import java.util.List; import java.util.Map.Entry; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.nodes.Node; import org.jsoup.select.Elements; /** * Support https://pipedot.org/. * * @author niki */ public class Pipedot extends BasicSupport { @Override public String getDescription() { return "Pipedot: News for nerds, without the corporate slant"; } @Override protected List> getUrls() throws IOException { List> urls = new ArrayList>(); urls.add(new AbstractMap.SimpleEntry(new URL( "https://pipedot.org/"), "")); return urls; } @Override protected List getArticles(Document doc) { return doc.getElementsByClass("story"); } @Override protected String getArticleId(Document doc, Element article) { // Don't try on bad articles if (getArticleTitle(doc, article).isEmpty()) { return ""; } for (Element idElem : article.getElementsByTag("a")) { if (idElem.attr("href").startsWith("/pipe/")) { return idElem.attr("href").substring("/pipe/".length()); } } return ""; } @Override protected String getArticleTitle(Document doc, Element article) { Element title = article.getElementsByTag("h1").first(); if (title != null) { return title.text(); } return ""; } @Override protected String getArticleAuthor(Document doc, Element article) { String value = getArticleDetailsReal(article); int pos = value.indexOf("by "); if (pos >= 0) { value = value.substring(pos + "by ".length()).trim(); pos = value.indexOf(" in "); if (pos >= 0) { value = value.substring(0, pos).trim(); } return value; } return ""; } @Override protected String getArticleDate(Document doc, Element article) { Element dateElement = article.getElementsByTag("time").first(); if (dateElement != null) { return dateElement.attr("datetime"); } return ""; } @Override protected String getArticleCategory(Document doc, Element article, String currentCategory) { String value = getArticleDetailsReal(article); int pos = value.indexOf(" in "); if (pos >= 0) { value = value.substring(pos + " in ".length()).trim(); pos = value.indexOf(" on "); if (pos >= 0) { value = value.substring(0, pos).trim(); } return value; } return ""; } @Override protected String getArticleDetails(Document doc, Element article) { return ""; // We alrady extracted all the info } @Override protected String getArticleIntUrl(Document doc, Element article) { Element link = article.getElementsByTag("a").first(); if (link != null) { return link.absUrl("href"); } return ""; } @Override protected String getArticleExtUrl(Document doc, Element article) { Element link = article.getElementsByTag("a").first(); if (link != null) { String possibleExtLink = link.absUrl("href").trim(); if (!possibleExtLink.isEmpty() && !possibleExtLink.contains("pipedot.org/")) { return possibleExtLink; } } return ""; } @Override protected String getArticleContent(Document doc, Element article) { for (Element elem : article.children()) { String tag = elem.tagName(); if (!tag.equals("header") && !tag.equals("footer")) { return getArticleText(elem); } } return ""; } @Override protected Element getFullArticle(Document doc) { return null; } @Override protected List getFullArticleCommentPosts(Document doc, URL intUrl) { return getCommentElements(doc.getElementsByTag("main").first()); } @Override protected ElementProcessor getElementProcessorFullArticle() { return new BasicElementProcessor(); } @Override protected List getCommentCommentPosts(Document doc, Element container) { if (container != null) { container = container.getElementsByClass("comment-outline").first(); } return getCommentElements(container); } @Override protected String getCommentId(Element post) { return post.id(); } @Override protected String getCommentAuthor(Element post) { Element authorDateE = post.getElementsByTag("h3").first(); if (authorDateE != null) { String authorDate = authorDateE.text(); int pos = authorDate.lastIndexOf(" on "); if (pos >= 0) { return authorDate.substring(0, pos).trim(); } } return ""; } @Override protected String getCommentTitle(Element post) { Element title = post.getElementsByTag("h3").first(); if (title != null) { return title.text(); } return ""; } @Override protected String getCommentDate(Element post) { Element authorDateE = post.getElementsByTag("h3").first(); if (authorDateE != null) { String authorDate = authorDateE.text(); int pos = authorDate.lastIndexOf(" on "); if (pos >= 0) { return authorDate.substring(pos + " on ".length()).trim(); } } return ""; } @Override protected Element getCommentContentElement(Element post) { return post.getElementsByClass("comment-body").first(); } @Override protected ElementProcessor getElementProcessorComment() { return new BasicElementProcessor() { @Override public boolean detectQuote(Node node) { if (node instanceof Element) { Element elementNode = (Element) node; if (elementNode.tagName().equals("blockquote") || elementNode.hasClass("quote")) { return true; } } return false; } }; } private String getArticleDetailsReal(Element article) { Elements detailsElements = article.getElementsByTag("div"); if (detailsElements.size() > 0) { return detailsElements.get(0).text().trim(); } return ""; } private List getCommentElements(Element container) { List commentElements = new ArrayList(); if (container != null) { for (Element commentElement : container.children()) { if (commentElement.hasClass("comment")) { commentElements.add(commentElement); } } } return commentElements; } }