Commit | Line | Data |
---|---|---|
127e065f NR |
1 | package be.nikiroo.gofetch.support; |
2 | ||
3 | import java.io.IOException; | |
4 | import java.io.InputStream; | |
5 | import java.net.URL; | |
6 | import java.util.AbstractMap; | |
7 | import java.util.ArrayList; | |
8 | import java.util.List; | |
9 | import java.util.Map.Entry; | |
10 | ||
11 | import org.jsoup.helper.DataUtil; | |
12 | import org.jsoup.nodes.Document; | |
13 | import org.jsoup.nodes.Element; | |
14 | import org.jsoup.nodes.Node; | |
15 | import org.jsoup.nodes.TextNode; | |
16 | ||
183f2d47 NR |
17 | import be.nikiroo.gofetch.data.Comment; |
18 | import be.nikiroo.gofetch.data.Story; | |
19 | ||
127e065f NR |
20 | class Phoronix extends BasicSupport { |
21 | @Override | |
22 | public String getDescription() { | |
23 | return "Phoronix: news regarding free and open-source software"; | |
24 | } | |
25 | ||
26 | @Override | |
27 | protected List<Entry<URL, String>> getUrls() throws IOException { | |
28 | List<Entry<URL, String>> urls = new ArrayList<Entry<URL, String>>(); | |
29 | urls.add(new AbstractMap.SimpleEntry<URL, String>(new URL( | |
30 | "https://www.phoronix.com/"), "")); | |
31 | return urls; | |
32 | } | |
33 | ||
34 | @Override | |
35 | protected List<Element> getArticles(Document doc) { | |
36 | return doc.getElementsByTag("article"); | |
37 | } | |
38 | ||
39 | @Override | |
40 | protected String getArticleId(Document doc, Element article) { | |
41 | Element comments = article.getElementsByClass("comments").first(); | |
42 | if (comments != null) { | |
43 | Element forumLink = comments.getElementsByTag("a").first(); | |
44 | if (forumLink != null) { | |
45 | String id = forumLink.absUrl("href"); | |
46 | int pos = id.lastIndexOf("/"); | |
47 | if (pos >= 0) { | |
48 | id = id.substring(pos + 1); | |
49 | } | |
50 | ||
51 | return id; | |
52 | } | |
53 | } | |
54 | ||
55 | return ""; | |
56 | } | |
57 | ||
58 | @Override | |
59 | protected String getArticleTitle(Document doc, Element article) { | |
60 | Element header = article.getElementsByTag("header").first(); | |
61 | if (header != null) { | |
62 | return header.text(); | |
63 | } | |
64 | ||
65 | return ""; | |
66 | } | |
67 | ||
68 | @Override | |
69 | protected String getArticleAuthor(Document doc, Element article) { | |
70 | return ""; | |
71 | } | |
72 | ||
73 | @Override | |
74 | protected String getArticleDate(Document doc, Element article) { | |
75 | return getArticleDetail(article, 0); | |
76 | } | |
77 | ||
78 | @Override | |
79 | protected String getArticleCategory(Document doc, Element article, | |
80 | String currentCategory) { | |
81 | return getArticleDetail(article, 1); | |
82 | } | |
83 | ||
84 | @Override | |
85 | protected String getArticleDetails(Document doc, Element article) { | |
86 | return getArticleDetail(article, 2); | |
87 | } | |
88 | ||
89 | private String getArticleDetail(Element article, int index) { | |
90 | Element details = article.getElementsByClass("details").first(); | |
91 | if (details != null && details.childNodes().size() > index) { | |
92 | Node valueNode = details.childNodes().get(index); | |
93 | String value = ""; | |
94 | if (valueNode instanceof TextNode) { | |
95 | value = ((TextNode) valueNode).text().trim(); | |
96 | } else if (valueNode instanceof Element) { | |
97 | value = ((Element) valueNode).text().trim(); | |
98 | } | |
99 | ||
100 | if (value.startsWith("-")) { | |
101 | value = value.substring(1).trim(); | |
102 | } | |
103 | if (value.endsWith("-")) { | |
104 | value = value.substring(0, value.length() - 1).trim(); | |
105 | } | |
106 | ||
107 | return value; | |
108 | } | |
109 | ||
110 | return ""; | |
111 | } | |
112 | ||
113 | @Override | |
114 | protected String getArticleIntUrl(Document doc, Element article) { | |
115 | Element a = article.getElementsByTag("a").first(); | |
116 | if (a != null) { | |
117 | return a.absUrl("href"); | |
118 | } | |
119 | ||
120 | return ""; | |
121 | } | |
122 | ||
123 | @Override | |
124 | protected String getArticleExtUrl(Document doc, Element article) { | |
125 | return ""; | |
126 | } | |
127 | ||
128 | @Override | |
129 | protected String getArticleContent(Document doc, Element article) { | |
130 | Element p = article.getElementsByTag("p").first(); | |
131 | if (p != null) { | |
132 | return p.text(); | |
133 | } | |
134 | ||
135 | return ""; | |
136 | } | |
137 | ||
138 | @Override | |
139 | protected Element getFullArticle(Document doc) { | |
140 | return doc.getElementsByClass("content").first(); | |
141 | } | |
142 | ||
143 | @Override | |
144 | protected List<Element> getFullArticleCommentPosts(Document doc, URL intUrl) { | |
145 | Element linkToComments = doc.getElementsByClass("comments-label") | |
146 | .first(); | |
147 | try { | |
148 | if (linkToComments != null) { | |
149 | Element a = linkToComments.getElementsByTag("a").first(); | |
150 | if (a != null) { | |
151 | String url = a.absUrl("href"); | |
a71d4075 | 152 | InputStream in = open(new URL(url)); |
127e065f NR |
153 | try { |
154 | doc = DataUtil.load(in, "UTF-8", url.toString()); | |
155 | return doc.getElementsByClass("b-post"); | |
156 | } finally { | |
157 | in.close(); | |
158 | } | |
159 | } | |
160 | } | |
161 | } catch (IOException e) { | |
162 | } | |
163 | ||
164 | return null; | |
165 | } | |
166 | ||
167 | @Override | |
168 | protected ElementProcessor getElementProcessorFullArticle() { | |
169 | return new BasicElementProcessor(); | |
170 | } | |
171 | ||
172 | @Override | |
173 | protected List<Element> getCommentCommentPosts(Document doc, | |
174 | Element container) { | |
175 | return null; | |
176 | } | |
177 | ||
178 | @Override | |
179 | protected String getCommentId(Element post) { | |
180 | return post.id(); | |
181 | } | |
182 | ||
183 | @Override | |
184 | protected String getCommentAuthor(Element post) { | |
185 | // We have an author, but no title, so, switch both: | |
186 | return ""; | |
187 | } | |
188 | ||
189 | @Override | |
190 | protected String getCommentTitle(Element post) { | |
191 | // We have an author, but no title, so, switch both: | |
192 | Element author = post.getElementsByClass("author").first(); | |
193 | if (author != null) { | |
194 | return author.text(); | |
195 | } | |
196 | ||
197 | return ""; | |
198 | } | |
199 | ||
200 | @Override | |
201 | protected String getCommentDate(Element post) { | |
202 | Element date = post.getElementsByTag("time").first(); | |
203 | if (date != null) { | |
204 | return date.attr("datetime"); | |
205 | } | |
206 | ||
207 | return ""; | |
208 | } | |
209 | ||
210 | @Override | |
211 | protected Element getCommentContentElement(Element post) { | |
212 | return post.getElementsByClass("OLD__post-content-text").first(); | |
213 | } | |
214 | ||
215 | @Override | |
216 | protected ElementProcessor getElementProcessorComment() { | |
217 | return new BasicElementProcessor() { | |
218 | @Override | |
219 | public boolean detectQuote(Node node) { | |
220 | if (node instanceof Element) { | |
221 | if (((Element) node).hasClass("quote_container")) { | |
222 | return true; | |
223 | } | |
224 | } | |
225 | ||
226 | return super.detectQuote(node); | |
227 | } | |
228 | ||
229 | @Override | |
230 | public boolean ignoreNode(Node node) { | |
231 | if (node instanceof Element) { | |
232 | if (((Element) node).hasClass("b-icon")) { | |
233 | return true; | |
234 | } | |
235 | } | |
236 | ||
237 | return super.ignoreNode(node); | |
238 | } | |
239 | }; | |
240 | } | |
241 | ||
183f2d47 NR |
242 | @Override |
243 | public void fetch(Story story) throws IOException { | |
244 | super.fetch(story); | |
245 | ||
246 | // First comment is a copy of the article, discard it | |
247 | List<Comment> comments = story.getComments(); | |
248 | if (comments != null && comments.size() > 1) { | |
249 | comments = comments.subList(1, comments.size()); | |
250 | } | |
251 | story.setComments(comments); | |
252 | } | |
127e065f | 253 | } |