8f257fbb0d7b3a36d17a8601a61fc3bfd3de6149
1 package be
.nikiroo
.gofetch
.support
;
3 import java
.io
.IOException
;
4 import java
.io
.InputStream
;
6 import java
.util
.AbstractMap
;
7 import java
.util
.ArrayList
;
9 import java
.util
.Map
.Entry
;
11 import org
.jsoup
.helper
.DataUtil
;
12 import org
.jsoup
.nodes
.Document
;
13 import org
.jsoup
.nodes
.Element
;
14 import org
.jsoup
.nodes
.Node
;
15 import org
.jsoup
.nodes
.TextNode
;
17 class Phoronix
extends BasicSupport
{
19 public String
getDescription() {
20 return "Phoronix: news regarding free and open-source software";
24 protected List
<Entry
<URL
, String
>> getUrls() throws IOException
{
25 List
<Entry
<URL
, String
>> urls
= new ArrayList
<Entry
<URL
, String
>>();
26 urls
.add(new AbstractMap
.SimpleEntry
<URL
, String
>(new URL(
27 "https://www.phoronix.com/"), ""));
32 protected List
<Element
> getArticles(Document doc
) {
33 return doc
.getElementsByTag("article");
37 protected String
getArticleId(Document doc
, Element article
) {
38 Element comments
= article
.getElementsByClass("comments").first();
39 if (comments
!= null) {
40 Element forumLink
= comments
.getElementsByTag("a").first();
41 if (forumLink
!= null) {
42 String id
= forumLink
.absUrl("href");
43 int pos
= id
.lastIndexOf("/");
45 id
= id
.substring(pos
+ 1);
56 protected String
getArticleTitle(Document doc
, Element article
) {
57 Element header
= article
.getElementsByTag("header").first();
66 protected String
getArticleAuthor(Document doc
, Element article
) {
71 protected String
getArticleDate(Document doc
, Element article
) {
72 return getArticleDetail(article
, 0);
76 protected String
getArticleCategory(Document doc
, Element article
,
77 String currentCategory
) {
78 return getArticleDetail(article
, 1);
82 protected String
getArticleDetails(Document doc
, Element article
) {
83 return getArticleDetail(article
, 2);
86 private String
getArticleDetail(Element article
, int index
) {
87 Element details
= article
.getElementsByClass("details").first();
88 if (details
!= null && details
.childNodes().size() > index
) {
89 Node valueNode
= details
.childNodes().get(index
);
91 if (valueNode
instanceof TextNode
) {
92 value
= ((TextNode
) valueNode
).text().trim();
93 } else if (valueNode
instanceof Element
) {
94 value
= ((Element
) valueNode
).text().trim();
97 if (value
.startsWith("-")) {
98 value
= value
.substring(1).trim();
100 if (value
.endsWith("-")) {
101 value
= value
.substring(0, value
.length() - 1).trim();
111 protected String
getArticleIntUrl(Document doc
, Element article
) {
112 Element a
= article
.getElementsByTag("a").first();
114 return a
.absUrl("href");
121 protected String
getArticleExtUrl(Document doc
, Element article
) {
126 protected String
getArticleContent(Document doc
, Element article
) {
127 Element p
= article
.getElementsByTag("p").first();
136 protected Element
getFullArticle(Document doc
) {
137 return doc
.getElementsByClass("content").first();
141 protected List
<Element
> getFullArticleCommentPosts(Document doc
, URL intUrl
) {
142 Element linkToComments
= doc
.getElementsByClass("comments-label")
145 if (linkToComments
!= null) {
146 Element a
= linkToComments
.getElementsByTag("a").first();
148 String url
= a
.absUrl("href");
149 InputStream in
= downloader
.open(new URL(url
));
151 doc
= DataUtil
.load(in
, "UTF-8", url
.toString());
152 return doc
.getElementsByClass("b-post");
158 } catch (IOException e
) {
165 protected ElementProcessor
getElementProcessorFullArticle() {
166 return new BasicElementProcessor();
170 protected List
<Element
> getCommentCommentPosts(Document doc
,
176 protected String
getCommentId(Element post
) {
181 protected String
getCommentAuthor(Element post
) {
182 // We have an author, but no title, so, switch both:
187 protected String
getCommentTitle(Element post
) {
188 // We have an author, but no title, so, switch both:
189 Element author
= post
.getElementsByClass("author").first();
190 if (author
!= null) {
191 return author
.text();
198 protected String
getCommentDate(Element post
) {
199 Element date
= post
.getElementsByTag("time").first();
201 return date
.attr("datetime");
208 protected Element
getCommentContentElement(Element post
) {
209 return post
.getElementsByClass("OLD__post-content-text").first();
213 protected ElementProcessor
getElementProcessorComment() {
214 return new BasicElementProcessor() {
216 public boolean detectQuote(Node node
) {
217 if (node
instanceof Element
) {
218 if (((Element
) node
).hasClass("quote_container")) {
223 return super.detectQuote(node
);
227 public boolean ignoreNode(Node node
) {
228 if (node
instanceof Element
) {
229 if (((Element
) node
).hasClass("b-icon")) {
234 return super.ignoreNode(node
);