From 127e065f2ec5040ae8059cb3bca92c30ac2a0a47 Mon Sep 17 00:00:00 2001 From: Niki Roo Date: Thu, 29 Mar 2018 14:54:28 +0200 Subject: [PATCH] New website supported: Phoronix --- .../nikiroo/gofetch/support/BasicSupport.java | 3 + src/be/nikiroo/gofetch/support/Phoronix.java | 239 ++++++++++++++++++ src/be/nikiroo/gofetch/support/Type.java | 2 + 3 files changed, 244 insertions(+) create mode 100644 src/be/nikiroo/gofetch/support/Phoronix.java diff --git a/src/be/nikiroo/gofetch/support/BasicSupport.java b/src/be/nikiroo/gofetch/support/BasicSupport.java index a59ae31..80fe5d2 100644 --- a/src/be/nikiroo/gofetch/support/BasicSupport.java +++ b/src/be/nikiroo/gofetch/support/BasicSupport.java @@ -526,6 +526,9 @@ public abstract class BasicSupport { case ERE_NUMERIQUE: support = new EreNumerique(); break; + case PHORONIX: + support = new Phoronix(); + break; } if (support != null) { diff --git a/src/be/nikiroo/gofetch/support/Phoronix.java b/src/be/nikiroo/gofetch/support/Phoronix.java new file mode 100644 index 0000000..8f257fb --- /dev/null +++ b/src/be/nikiroo/gofetch/support/Phoronix.java @@ -0,0 +1,239 @@ +package be.nikiroo.gofetch.support; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import java.util.AbstractMap; +import java.util.ArrayList; +import java.util.List; +import java.util.Map.Entry; + +import org.jsoup.helper.DataUtil; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.nodes.Node; +import org.jsoup.nodes.TextNode; + +class Phoronix extends BasicSupport { + @Override + public String getDescription() { + return "Phoronix: news regarding free and open-source software"; + } + + @Override + protected List> getUrls() throws IOException { + List> urls = new ArrayList>(); + urls.add(new AbstractMap.SimpleEntry(new URL( + "https://www.phoronix.com/"), "")); + return urls; + } + + @Override + protected List getArticles(Document doc) { + return doc.getElementsByTag("article"); + } + + @Override + protected String getArticleId(Document doc, Element article) { + Element comments = article.getElementsByClass("comments").first(); + if (comments != null) { + Element forumLink = comments.getElementsByTag("a").first(); + if (forumLink != null) { + String id = forumLink.absUrl("href"); + int pos = id.lastIndexOf("/"); + if (pos >= 0) { + id = id.substring(pos + 1); + } + + return id; + } + } + + return ""; + } + + @Override + protected String getArticleTitle(Document doc, Element article) { + Element header = article.getElementsByTag("header").first(); + if (header != null) { + return header.text(); + } + + return ""; + } + + @Override + protected String getArticleAuthor(Document doc, Element article) { + return ""; + } + + @Override + protected String getArticleDate(Document doc, Element article) { + return getArticleDetail(article, 0); + } + + @Override + protected String getArticleCategory(Document doc, Element article, + String currentCategory) { + return getArticleDetail(article, 1); + } + + @Override + protected String getArticleDetails(Document doc, Element article) { + return getArticleDetail(article, 2); + } + + private String getArticleDetail(Element article, int index) { + Element details = article.getElementsByClass("details").first(); + if (details != null && details.childNodes().size() > index) { + Node valueNode = details.childNodes().get(index); + String value = ""; + if (valueNode instanceof TextNode) { + value = ((TextNode) valueNode).text().trim(); + } else if (valueNode instanceof Element) { + value = ((Element) valueNode).text().trim(); + } + + if (value.startsWith("-")) { + value = value.substring(1).trim(); + } + if (value.endsWith("-")) { + value = value.substring(0, value.length() - 1).trim(); + } + + return value; + } + + return ""; + } + + @Override + protected String getArticleIntUrl(Document doc, Element article) { + Element a = article.getElementsByTag("a").first(); + if (a != null) { + return a.absUrl("href"); + } + + return ""; + } + + @Override + protected String getArticleExtUrl(Document doc, Element article) { + return ""; + } + + @Override + protected String getArticleContent(Document doc, Element article) { + Element p = article.getElementsByTag("p").first(); + if (p != null) { + return p.text(); + } + + return ""; + } + + @Override + protected Element getFullArticle(Document doc) { + return doc.getElementsByClass("content").first(); + } + + @Override + protected List getFullArticleCommentPosts(Document doc, URL intUrl) { + Element linkToComments = doc.getElementsByClass("comments-label") + .first(); + try { + if (linkToComments != null) { + Element a = linkToComments.getElementsByTag("a").first(); + if (a != null) { + String url = a.absUrl("href"); + InputStream in = downloader.open(new URL(url)); + try { + doc = DataUtil.load(in, "UTF-8", url.toString()); + return doc.getElementsByClass("b-post"); + } finally { + in.close(); + } + } + } + } catch (IOException e) { + } + + return null; + } + + @Override + protected ElementProcessor getElementProcessorFullArticle() { + return new BasicElementProcessor(); + } + + @Override + protected List getCommentCommentPosts(Document doc, + Element container) { + return null; + } + + @Override + protected String getCommentId(Element post) { + return post.id(); + } + + @Override + protected String getCommentAuthor(Element post) { + // We have an author, but no title, so, switch both: + return ""; + } + + @Override + protected String getCommentTitle(Element post) { + // We have an author, but no title, so, switch both: + Element author = post.getElementsByClass("author").first(); + if (author != null) { + return author.text(); + } + + return ""; + } + + @Override + protected String getCommentDate(Element post) { + Element date = post.getElementsByTag("time").first(); + if (date != null) { + return date.attr("datetime"); + } + + return ""; + } + + @Override + protected Element getCommentContentElement(Element post) { + return post.getElementsByClass("OLD__post-content-text").first(); + } + + @Override + protected ElementProcessor getElementProcessorComment() { + return new BasicElementProcessor() { + @Override + public boolean detectQuote(Node node) { + if (node instanceof Element) { + if (((Element) node).hasClass("quote_container")) { + return true; + } + } + + return super.detectQuote(node); + } + + @Override + public boolean ignoreNode(Node node) { + if (node instanceof Element) { + if (((Element) node).hasClass("b-icon")) { + return true; + } + } + + return super.ignoreNode(node); + } + }; + } + +} diff --git a/src/be/nikiroo/gofetch/support/Type.java b/src/be/nikiroo/gofetch/support/Type.java index dadbec1..ff758cb 100644 --- a/src/be/nikiroo/gofetch/support/Type.java +++ b/src/be/nikiroo/gofetch/support/Type.java @@ -20,4 +20,6 @@ public enum Type { TOO_LINUX, /** FR: IT */ ERE_NUMERIQUE, + /** EN: IT */ + PHORONIX, } -- 2.27.0