1 package be
.nikiroo
.gofetch
.support
;
3 import java
.io
.IOException
;
4 import java
.io
.InputStream
;
6 import java
.util
.AbstractMap
;
7 import java
.util
.ArrayList
;
9 import java
.util
.Map
.Entry
;
11 import org
.jsoup
.helper
.DataUtil
;
12 import org
.jsoup
.nodes
.Document
;
13 import org
.jsoup
.nodes
.Element
;
14 import org
.jsoup
.nodes
.Node
;
15 import org
.jsoup
.nodes
.TextNode
;
17 import be
.nikiroo
.gofetch
.data
.Comment
;
18 import be
.nikiroo
.gofetch
.data
.Story
;
20 class Phoronix
extends BasicSupport
{
22 public String
getDescription() {
23 return "Phoronix: news regarding free and open-source software";
27 protected List
<Entry
<URL
, String
>> getUrls() throws IOException
{
28 List
<Entry
<URL
, String
>> urls
= new ArrayList
<Entry
<URL
, String
>>();
29 urls
.add(new AbstractMap
.SimpleEntry
<URL
, String
>(new URL(
30 "https://www.phoronix.com/"), ""));
35 protected List
<Element
> getArticles(Document doc
) {
36 return doc
.getElementsByTag("article");
40 protected String
getArticleId(Document doc
, Element article
) {
41 Element comments
= article
.getElementsByClass("comments").first();
42 if (comments
!= null) {
43 Element forumLink
= comments
.getElementsByTag("a").first();
44 if (forumLink
!= null) {
45 String id
= forumLink
.absUrl("href");
46 int pos
= id
.lastIndexOf("/");
48 id
= id
.substring(pos
+ 1);
59 protected String
getArticleTitle(Document doc
, Element article
) {
60 Element header
= article
.getElementsByTag("header").first();
69 protected String
getArticleAuthor(Document doc
, Element article
) {
74 protected String
getArticleDate(Document doc
, Element article
) {
75 return getArticleDetail(article
, 0);
79 protected String
getArticleCategory(Document doc
, Element article
,
80 String currentCategory
) {
81 return getArticleDetail(article
, 1);
85 protected String
getArticleDetails(Document doc
, Element article
) {
86 return getArticleDetail(article
, 2);
89 private String
getArticleDetail(Element article
, int index
) {
90 Element details
= article
.getElementsByClass("details").first();
91 if (details
!= null && details
.childNodes().size() > index
) {
92 Node valueNode
= details
.childNodes().get(index
);
94 if (valueNode
instanceof TextNode
) {
95 value
= ((TextNode
) valueNode
).text().trim();
96 } else if (valueNode
instanceof Element
) {
97 value
= ((Element
) valueNode
).text().trim();
100 if (value
.startsWith("-")) {
101 value
= value
.substring(1).trim();
103 if (value
.endsWith("-")) {
104 value
= value
.substring(0, value
.length() - 1).trim();
114 protected String
getArticleIntUrl(Document doc
, Element article
) {
115 Element a
= article
.getElementsByTag("a").first();
117 return a
.absUrl("href");
124 protected String
getArticleExtUrl(Document doc
, Element article
) {
129 protected String
getArticleContent(Document doc
, Element article
) {
130 Element p
= article
.getElementsByTag("p").first();
132 return getArticleText(p
);
139 protected Element
getFullArticle(Document doc
) {
140 return doc
.getElementsByClass("content").first();
144 protected List
<Element
> getFullArticleCommentPosts(Document doc
, URL intUrl
) {
145 Element linkToComments
= doc
.getElementsByClass("comments-label")
148 if (linkToComments
!= null) {
149 Element a
= linkToComments
.getElementsByTag("a").first();
151 String url
= a
.absUrl("href");
152 InputStream in
= open(new URL(url
));
154 doc
= DataUtil
.load(in
, "UTF-8", url
.toString());
155 return doc
.getElementsByClass("b-post");
161 } catch (IOException e
) {
168 protected ElementProcessor
getElementProcessorFullArticle() {
169 return new BasicElementProcessor();
173 protected List
<Element
> getCommentCommentPosts(Document doc
,
179 protected String
getCommentId(Element post
) {
184 protected String
getCommentAuthor(Element post
) {
185 // We have an author, but no title, so, switch both:
190 protected String
getCommentTitle(Element post
) {
191 // We have an author, but no title, so, switch both:
192 Element author
= post
.getElementsByClass("author").first();
193 if (author
!= null) {
194 return author
.text();
201 protected String
getCommentDate(Element post
) {
202 Element date
= post
.getElementsByTag("time").first();
204 return date
.attr("datetime");
211 protected Element
getCommentContentElement(Element post
) {
212 return post
.getElementsByClass("OLD__post-content-text").first();
216 protected ElementProcessor
getElementProcessorComment() {
217 return new BasicElementProcessor() {
219 public boolean detectQuote(Node node
) {
220 if (node
instanceof Element
) {
221 if (((Element
) node
).hasClass("quote_container")) {
226 return super.detectQuote(node
);
230 public boolean ignoreNode(Node node
) {
231 if (node
instanceof Element
) {
232 if (((Element
) node
).hasClass("b-icon")) {
237 return super.ignoreNode(node
);
243 public void fetch(Story story
) throws IOException
{
246 // First comment is a copy of the article, discard it
247 List
<Comment
> comments
= story
.getComments();
248 if (comments
!= null && comments
.size() > 1) {
249 comments
= comments
.subList(1, comments
.size());
251 story
.setComments(comments
);