New website supported: Phoronix
authorNiki Roo <niki@nikiroo.be>
Thu, 29 Mar 2018 12:54:28 +0000 (14:54 +0200)
committerNiki Roo <niki@nikiroo.be>
Thu, 29 Mar 2018 12:54:28 +0000 (14:54 +0200)
src/be/nikiroo/gofetch/support/BasicSupport.java
src/be/nikiroo/gofetch/support/Phoronix.java [new file with mode: 0644]
src/be/nikiroo/gofetch/support/Type.java

index a59ae313fb1f1fda8979020d7e6315d81ba6592e..80fe5d236d31c2379856a8245f09e80b4cfeab79 100644 (file)
@@ -526,6 +526,9 @@ public abstract class BasicSupport {
                        case ERE_NUMERIQUE:
                                support = new EreNumerique();
                                break;
+                       case PHORONIX:
+                               support = new Phoronix();
+                               break;
                        }
 
                        if (support != null) {
diff --git a/src/be/nikiroo/gofetch/support/Phoronix.java b/src/be/nikiroo/gofetch/support/Phoronix.java
new file mode 100644 (file)
index 0000000..8f257fb
--- /dev/null
@@ -0,0 +1,239 @@
+package be.nikiroo.gofetch.support;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+import java.util.AbstractMap;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map.Entry;
+
+import org.jsoup.helper.DataUtil;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.nodes.Node;
+import org.jsoup.nodes.TextNode;
+
+class Phoronix extends BasicSupport {
+       @Override
+       public String getDescription() {
+               return "Phoronix: news regarding free and open-source software";
+       }
+
+       @Override
+       protected List<Entry<URL, String>> getUrls() throws IOException {
+               List<Entry<URL, String>> urls = new ArrayList<Entry<URL, String>>();
+               urls.add(new AbstractMap.SimpleEntry<URL, String>(new URL(
+                               "https://www.phoronix.com/"), ""));
+               return urls;
+       }
+
+       @Override
+       protected List<Element> getArticles(Document doc) {
+               return doc.getElementsByTag("article");
+       }
+
+       @Override
+       protected String getArticleId(Document doc, Element article) {
+               Element comments = article.getElementsByClass("comments").first();
+               if (comments != null) {
+                       Element forumLink = comments.getElementsByTag("a").first();
+                       if (forumLink != null) {
+                               String id = forumLink.absUrl("href");
+                               int pos = id.lastIndexOf("/");
+                               if (pos >= 0) {
+                                       id = id.substring(pos + 1);
+                               }
+
+                               return id;
+                       }
+               }
+
+               return "";
+       }
+
+       @Override
+       protected String getArticleTitle(Document doc, Element article) {
+               Element header = article.getElementsByTag("header").first();
+               if (header != null) {
+                       return header.text();
+               }
+
+               return "";
+       }
+
+       @Override
+       protected String getArticleAuthor(Document doc, Element article) {
+               return "";
+       }
+
+       @Override
+       protected String getArticleDate(Document doc, Element article) {
+               return getArticleDetail(article, 0);
+       }
+
+       @Override
+       protected String getArticleCategory(Document doc, Element article,
+                       String currentCategory) {
+               return getArticleDetail(article, 1);
+       }
+
+       @Override
+       protected String getArticleDetails(Document doc, Element article) {
+               return getArticleDetail(article, 2);
+       }
+
+       private String getArticleDetail(Element article, int index) {
+               Element details = article.getElementsByClass("details").first();
+               if (details != null && details.childNodes().size() > index) {
+                       Node valueNode = details.childNodes().get(index);
+                       String value = "";
+                       if (valueNode instanceof TextNode) {
+                               value = ((TextNode) valueNode).text().trim();
+                       } else if (valueNode instanceof Element) {
+                               value = ((Element) valueNode).text().trim();
+                       }
+
+                       if (value.startsWith("-")) {
+                               value = value.substring(1).trim();
+                       }
+                       if (value.endsWith("-")) {
+                               value = value.substring(0, value.length() - 1).trim();
+                       }
+
+                       return value;
+               }
+
+               return "";
+       }
+
+       @Override
+       protected String getArticleIntUrl(Document doc, Element article) {
+               Element a = article.getElementsByTag("a").first();
+               if (a != null) {
+                       return a.absUrl("href");
+               }
+
+               return "";
+       }
+
+       @Override
+       protected String getArticleExtUrl(Document doc, Element article) {
+               return "";
+       }
+
+       @Override
+       protected String getArticleContent(Document doc, Element article) {
+               Element p = article.getElementsByTag("p").first();
+               if (p != null) {
+                       return p.text();
+               }
+
+               return "";
+       }
+
+       @Override
+       protected Element getFullArticle(Document doc) {
+               return doc.getElementsByClass("content").first();
+       }
+
+       @Override
+       protected List<Element> getFullArticleCommentPosts(Document doc, URL intUrl) {
+               Element linkToComments = doc.getElementsByClass("comments-label")
+                               .first();
+               try {
+                       if (linkToComments != null) {
+                               Element a = linkToComments.getElementsByTag("a").first();
+                               if (a != null) {
+                                       String url = a.absUrl("href");
+                                       InputStream in = downloader.open(new URL(url));
+                                       try {
+                                               doc = DataUtil.load(in, "UTF-8", url.toString());
+                                               return doc.getElementsByClass("b-post");
+                                       } finally {
+                                               in.close();
+                                       }
+                               }
+                       }
+               } catch (IOException e) {
+               }
+
+               return null;
+       }
+
+       @Override
+       protected ElementProcessor getElementProcessorFullArticle() {
+               return new BasicElementProcessor();
+       }
+
+       @Override
+       protected List<Element> getCommentCommentPosts(Document doc,
+                       Element container) {
+               return null;
+       }
+
+       @Override
+       protected String getCommentId(Element post) {
+               return post.id();
+       }
+
+       @Override
+       protected String getCommentAuthor(Element post) {
+               // We have an author, but no title, so, switch both:
+               return "";
+       }
+
+       @Override
+       protected String getCommentTitle(Element post) {
+               // We have an author, but no title, so, switch both:
+               Element author = post.getElementsByClass("author").first();
+               if (author != null) {
+                       return author.text();
+               }
+
+               return "";
+       }
+
+       @Override
+       protected String getCommentDate(Element post) {
+               Element date = post.getElementsByTag("time").first();
+               if (date != null) {
+                       return date.attr("datetime");
+               }
+
+               return "";
+       }
+
+       @Override
+       protected Element getCommentContentElement(Element post) {
+               return post.getElementsByClass("OLD__post-content-text").first();
+       }
+
+       @Override
+       protected ElementProcessor getElementProcessorComment() {
+               return new BasicElementProcessor() {
+                       @Override
+                       public boolean detectQuote(Node node) {
+                               if (node instanceof Element) {
+                                       if (((Element) node).hasClass("quote_container")) {
+                                               return true;
+                                       }
+                               }
+
+                               return super.detectQuote(node);
+                       }
+
+                       @Override
+                       public boolean ignoreNode(Node node) {
+                               if (node instanceof Element) {
+                                       if (((Element) node).hasClass("b-icon")) {
+                                               return true;
+                                       }
+                               }
+
+                               return super.ignoreNode(node);
+                       }
+               };
+       }
+
+}
index dadbec16dede6f052f145cd3cca6f5eaeee65265..ff758cb379bab6c910036ea31811295ba9b61dbb 100644 (file)
@@ -20,4 +20,6 @@ public enum Type {
        TOO_LINUX,
        /** FR: IT */
        ERE_NUMERIQUE,
+       /** EN: IT */
+       PHORONIX,
 }