Phoronix: fix first comment is article copy
[gofetch.git] / src / be / nikiroo / gofetch / support / Phoronix.java
CommitLineData
127e065f
NR
1package be.nikiroo.gofetch.support;
2
3import java.io.IOException;
4import java.io.InputStream;
5import java.net.URL;
6import java.util.AbstractMap;
7import java.util.ArrayList;
8import java.util.List;
9import java.util.Map.Entry;
10
11import org.jsoup.helper.DataUtil;
12import org.jsoup.nodes.Document;
13import org.jsoup.nodes.Element;
14import org.jsoup.nodes.Node;
15import org.jsoup.nodes.TextNode;
16
183f2d47
NR
17import be.nikiroo.gofetch.data.Comment;
18import be.nikiroo.gofetch.data.Story;
19
127e065f
NR
20class Phoronix extends BasicSupport {
21 @Override
22 public String getDescription() {
23 return "Phoronix: news regarding free and open-source software";
24 }
25
26 @Override
27 protected List<Entry<URL, String>> getUrls() throws IOException {
28 List<Entry<URL, String>> urls = new ArrayList<Entry<URL, String>>();
29 urls.add(new AbstractMap.SimpleEntry<URL, String>(new URL(
30 "https://www.phoronix.com/"), ""));
31 return urls;
32 }
33
34 @Override
35 protected List<Element> getArticles(Document doc) {
36 return doc.getElementsByTag("article");
37 }
38
39 @Override
40 protected String getArticleId(Document doc, Element article) {
41 Element comments = article.getElementsByClass("comments").first();
42 if (comments != null) {
43 Element forumLink = comments.getElementsByTag("a").first();
44 if (forumLink != null) {
45 String id = forumLink.absUrl("href");
46 int pos = id.lastIndexOf("/");
47 if (pos >= 0) {
48 id = id.substring(pos + 1);
49 }
50
51 return id;
52 }
53 }
54
55 return "";
56 }
57
58 @Override
59 protected String getArticleTitle(Document doc, Element article) {
60 Element header = article.getElementsByTag("header").first();
61 if (header != null) {
62 return header.text();
63 }
64
65 return "";
66 }
67
68 @Override
69 protected String getArticleAuthor(Document doc, Element article) {
70 return "";
71 }
72
73 @Override
74 protected String getArticleDate(Document doc, Element article) {
75 return getArticleDetail(article, 0);
76 }
77
78 @Override
79 protected String getArticleCategory(Document doc, Element article,
80 String currentCategory) {
81 return getArticleDetail(article, 1);
82 }
83
84 @Override
85 protected String getArticleDetails(Document doc, Element article) {
86 return getArticleDetail(article, 2);
87 }
88
89 private String getArticleDetail(Element article, int index) {
90 Element details = article.getElementsByClass("details").first();
91 if (details != null && details.childNodes().size() > index) {
92 Node valueNode = details.childNodes().get(index);
93 String value = "";
94 if (valueNode instanceof TextNode) {
95 value = ((TextNode) valueNode).text().trim();
96 } else if (valueNode instanceof Element) {
97 value = ((Element) valueNode).text().trim();
98 }
99
100 if (value.startsWith("-")) {
101 value = value.substring(1).trim();
102 }
103 if (value.endsWith("-")) {
104 value = value.substring(0, value.length() - 1).trim();
105 }
106
107 return value;
108 }
109
110 return "";
111 }
112
113 @Override
114 protected String getArticleIntUrl(Document doc, Element article) {
115 Element a = article.getElementsByTag("a").first();
116 if (a != null) {
117 return a.absUrl("href");
118 }
119
120 return "";
121 }
122
123 @Override
124 protected String getArticleExtUrl(Document doc, Element article) {
125 return "";
126 }
127
128 @Override
129 protected String getArticleContent(Document doc, Element article) {
130 Element p = article.getElementsByTag("p").first();
131 if (p != null) {
132 return p.text();
133 }
134
135 return "";
136 }
137
138 @Override
139 protected Element getFullArticle(Document doc) {
140 return doc.getElementsByClass("content").first();
141 }
142
143 @Override
144 protected List<Element> getFullArticleCommentPosts(Document doc, URL intUrl) {
145 Element linkToComments = doc.getElementsByClass("comments-label")
146 .first();
147 try {
148 if (linkToComments != null) {
149 Element a = linkToComments.getElementsByTag("a").first();
150 if (a != null) {
151 String url = a.absUrl("href");
a71d4075 152 InputStream in = open(new URL(url));
127e065f
NR
153 try {
154 doc = DataUtil.load(in, "UTF-8", url.toString());
155 return doc.getElementsByClass("b-post");
156 } finally {
157 in.close();
158 }
159 }
160 }
161 } catch (IOException e) {
162 }
163
164 return null;
165 }
166
167 @Override
168 protected ElementProcessor getElementProcessorFullArticle() {
169 return new BasicElementProcessor();
170 }
171
172 @Override
173 protected List<Element> getCommentCommentPosts(Document doc,
174 Element container) {
175 return null;
176 }
177
178 @Override
179 protected String getCommentId(Element post) {
180 return post.id();
181 }
182
183 @Override
184 protected String getCommentAuthor(Element post) {
185 // We have an author, but no title, so, switch both:
186 return "";
187 }
188
189 @Override
190 protected String getCommentTitle(Element post) {
191 // We have an author, but no title, so, switch both:
192 Element author = post.getElementsByClass("author").first();
193 if (author != null) {
194 return author.text();
195 }
196
197 return "";
198 }
199
200 @Override
201 protected String getCommentDate(Element post) {
202 Element date = post.getElementsByTag("time").first();
203 if (date != null) {
204 return date.attr("datetime");
205 }
206
207 return "";
208 }
209
210 @Override
211 protected Element getCommentContentElement(Element post) {
212 return post.getElementsByClass("OLD__post-content-text").first();
213 }
214
215 @Override
216 protected ElementProcessor getElementProcessorComment() {
217 return new BasicElementProcessor() {
218 @Override
219 public boolean detectQuote(Node node) {
220 if (node instanceof Element) {
221 if (((Element) node).hasClass("quote_container")) {
222 return true;
223 }
224 }
225
226 return super.detectQuote(node);
227 }
228
229 @Override
230 public boolean ignoreNode(Node node) {
231 if (node instanceof Element) {
232 if (((Element) node).hasClass("b-icon")) {
233 return true;
234 }
235 }
236
237 return super.ignoreNode(node);
238 }
239 };
240 }
241
183f2d47
NR
242 @Override
243 public void fetch(Story story) throws IOException {
244 super.fetch(story);
245
246 // First comment is a copy of the article, discard it
247 List<Comment> comments = story.getComments();
248 if (comments != null && comments.size() > 1) {
249 comments = comments.subList(1, comments.size());
250 }
251 story.setComments(comments);
252 }
127e065f 253}