Commit | Line | Data |
---|---|---|
127e065f NR |
1 | package be.nikiroo.gofetch.support; |
2 | ||
3 | import java.io.IOException; | |
4 | import java.io.InputStream; | |
5 | import java.net.URL; | |
6 | import java.util.AbstractMap; | |
7 | import java.util.ArrayList; | |
8 | import java.util.List; | |
9 | import java.util.Map.Entry; | |
10 | ||
11 | import org.jsoup.helper.DataUtil; | |
12 | import org.jsoup.nodes.Document; | |
13 | import org.jsoup.nodes.Element; | |
14 | import org.jsoup.nodes.Node; | |
15 | import org.jsoup.nodes.TextNode; | |
16 | ||
17 | class Phoronix extends BasicSupport { | |
18 | @Override | |
19 | public String getDescription() { | |
20 | return "Phoronix: news regarding free and open-source software"; | |
21 | } | |
22 | ||
23 | @Override | |
24 | protected List<Entry<URL, String>> getUrls() throws IOException { | |
25 | List<Entry<URL, String>> urls = new ArrayList<Entry<URL, String>>(); | |
26 | urls.add(new AbstractMap.SimpleEntry<URL, String>(new URL( | |
27 | "https://www.phoronix.com/"), "")); | |
28 | return urls; | |
29 | } | |
30 | ||
31 | @Override | |
32 | protected List<Element> getArticles(Document doc) { | |
33 | return doc.getElementsByTag("article"); | |
34 | } | |
35 | ||
36 | @Override | |
37 | protected String getArticleId(Document doc, Element article) { | |
38 | Element comments = article.getElementsByClass("comments").first(); | |
39 | if (comments != null) { | |
40 | Element forumLink = comments.getElementsByTag("a").first(); | |
41 | if (forumLink != null) { | |
42 | String id = forumLink.absUrl("href"); | |
43 | int pos = id.lastIndexOf("/"); | |
44 | if (pos >= 0) { | |
45 | id = id.substring(pos + 1); | |
46 | } | |
47 | ||
48 | return id; | |
49 | } | |
50 | } | |
51 | ||
52 | return ""; | |
53 | } | |
54 | ||
55 | @Override | |
56 | protected String getArticleTitle(Document doc, Element article) { | |
57 | Element header = article.getElementsByTag("header").first(); | |
58 | if (header != null) { | |
59 | return header.text(); | |
60 | } | |
61 | ||
62 | return ""; | |
63 | } | |
64 | ||
65 | @Override | |
66 | protected String getArticleAuthor(Document doc, Element article) { | |
67 | return ""; | |
68 | } | |
69 | ||
70 | @Override | |
71 | protected String getArticleDate(Document doc, Element article) { | |
72 | return getArticleDetail(article, 0); | |
73 | } | |
74 | ||
75 | @Override | |
76 | protected String getArticleCategory(Document doc, Element article, | |
77 | String currentCategory) { | |
78 | return getArticleDetail(article, 1); | |
79 | } | |
80 | ||
81 | @Override | |
82 | protected String getArticleDetails(Document doc, Element article) { | |
83 | return getArticleDetail(article, 2); | |
84 | } | |
85 | ||
86 | private String getArticleDetail(Element article, int index) { | |
87 | Element details = article.getElementsByClass("details").first(); | |
88 | if (details != null && details.childNodes().size() > index) { | |
89 | Node valueNode = details.childNodes().get(index); | |
90 | String value = ""; | |
91 | if (valueNode instanceof TextNode) { | |
92 | value = ((TextNode) valueNode).text().trim(); | |
93 | } else if (valueNode instanceof Element) { | |
94 | value = ((Element) valueNode).text().trim(); | |
95 | } | |
96 | ||
97 | if (value.startsWith("-")) { | |
98 | value = value.substring(1).trim(); | |
99 | } | |
100 | if (value.endsWith("-")) { | |
101 | value = value.substring(0, value.length() - 1).trim(); | |
102 | } | |
103 | ||
104 | return value; | |
105 | } | |
106 | ||
107 | return ""; | |
108 | } | |
109 | ||
110 | @Override | |
111 | protected String getArticleIntUrl(Document doc, Element article) { | |
112 | Element a = article.getElementsByTag("a").first(); | |
113 | if (a != null) { | |
114 | return a.absUrl("href"); | |
115 | } | |
116 | ||
117 | return ""; | |
118 | } | |
119 | ||
120 | @Override | |
121 | protected String getArticleExtUrl(Document doc, Element article) { | |
122 | return ""; | |
123 | } | |
124 | ||
125 | @Override | |
126 | protected String getArticleContent(Document doc, Element article) { | |
127 | Element p = article.getElementsByTag("p").first(); | |
128 | if (p != null) { | |
129 | return p.text(); | |
130 | } | |
131 | ||
132 | return ""; | |
133 | } | |
134 | ||
135 | @Override | |
136 | protected Element getFullArticle(Document doc) { | |
137 | return doc.getElementsByClass("content").first(); | |
138 | } | |
139 | ||
140 | @Override | |
141 | protected List<Element> getFullArticleCommentPosts(Document doc, URL intUrl) { | |
142 | Element linkToComments = doc.getElementsByClass("comments-label") | |
143 | .first(); | |
144 | try { | |
145 | if (linkToComments != null) { | |
146 | Element a = linkToComments.getElementsByTag("a").first(); | |
147 | if (a != null) { | |
148 | String url = a.absUrl("href"); | |
149 | InputStream in = downloader.open(new URL(url)); | |
150 | try { | |
151 | doc = DataUtil.load(in, "UTF-8", url.toString()); | |
152 | return doc.getElementsByClass("b-post"); | |
153 | } finally { | |
154 | in.close(); | |
155 | } | |
156 | } | |
157 | } | |
158 | } catch (IOException e) { | |
159 | } | |
160 | ||
161 | return null; | |
162 | } | |
163 | ||
164 | @Override | |
165 | protected ElementProcessor getElementProcessorFullArticle() { | |
166 | return new BasicElementProcessor(); | |
167 | } | |
168 | ||
169 | @Override | |
170 | protected List<Element> getCommentCommentPosts(Document doc, | |
171 | Element container) { | |
172 | return null; | |
173 | } | |
174 | ||
175 | @Override | |
176 | protected String getCommentId(Element post) { | |
177 | return post.id(); | |
178 | } | |
179 | ||
180 | @Override | |
181 | protected String getCommentAuthor(Element post) { | |
182 | // We have an author, but no title, so, switch both: | |
183 | return ""; | |
184 | } | |
185 | ||
186 | @Override | |
187 | protected String getCommentTitle(Element post) { | |
188 | // We have an author, but no title, so, switch both: | |
189 | Element author = post.getElementsByClass("author").first(); | |
190 | if (author != null) { | |
191 | return author.text(); | |
192 | } | |
193 | ||
194 | return ""; | |
195 | } | |
196 | ||
197 | @Override | |
198 | protected String getCommentDate(Element post) { | |
199 | Element date = post.getElementsByTag("time").first(); | |
200 | if (date != null) { | |
201 | return date.attr("datetime"); | |
202 | } | |
203 | ||
204 | return ""; | |
205 | } | |
206 | ||
207 | @Override | |
208 | protected Element getCommentContentElement(Element post) { | |
209 | return post.getElementsByClass("OLD__post-content-text").first(); | |
210 | } | |
211 | ||
212 | @Override | |
213 | protected ElementProcessor getElementProcessorComment() { | |
214 | return new BasicElementProcessor() { | |
215 | @Override | |
216 | public boolean detectQuote(Node node) { | |
217 | if (node instanceof Element) { | |
218 | if (((Element) node).hasClass("quote_container")) { | |
219 | return true; | |
220 | } | |
221 | } | |
222 | ||
223 | return super.detectQuote(node); | |
224 | } | |
225 | ||
226 | @Override | |
227 | public boolean ignoreNode(Node node) { | |
228 | if (node instanceof Element) { | |
229 | if (((Element) node).hasClass("b-icon")) { | |
230 | return true; | |
231 | } | |
232 | } | |
233 | ||
234 | return super.ignoreNode(node); | |
235 | } | |
236 | }; | |
237 | } | |
238 | ||
239 | } |