Commit | Line | Data |
---|---|---|
2d95a873 NR |
1 | package be.nikiroo.gofetch.support; |
2 | ||
3 | import java.io.IOException; | |
2d95a873 | 4 | import java.net.URL; |
3e62b034 | 5 | import java.util.AbstractMap; |
2d95a873 NR |
6 | import java.util.ArrayList; |
7 | import java.util.List; | |
3e62b034 | 8 | import java.util.Map.Entry; |
2d95a873 | 9 | |
2d95a873 NR |
10 | import org.jsoup.nodes.Document; |
11 | import org.jsoup.nodes.Element; | |
27008a87 | 12 | import org.jsoup.nodes.Node; |
2d95a873 NR |
13 | import org.jsoup.select.Elements; |
14 | ||
2d95a873 NR |
15 | /** |
16 | * Support <a href='https://pipedot.org/'>https://pipedot.org/</a>. | |
17 | * | |
18 | * @author niki | |
19 | */ | |
20 | public class Pipedot extends BasicSupport { | |
21 | @Override | |
22 | public String getDescription() { | |
23 | return "Pipedot: News for nerds, without the corporate slant"; | |
24 | } | |
25 | ||
26 | @Override | |
3e62b034 NR |
27 | protected List<Entry<URL, String>> getUrls() throws IOException { |
28 | List<Entry<URL, String>> urls = new ArrayList<Entry<URL, String>>(); | |
29 | urls.add(new AbstractMap.SimpleEntry<URL, String>(new URL( | |
30 | "https://pipedot.org/"), "")); | |
31 | return urls; | |
32 | } | |
2d95a873 | 33 | |
3e62b034 NR |
34 | @Override |
35 | protected List<Element> getArticles(Document doc) { | |
36 | return doc.getElementsByClass("story"); | |
37 | } | |
2d95a873 | 38 | |
3e62b034 NR |
39 | @Override |
40 | protected String getArticleId(Document doc, Element article) { | |
41 | // Don't try on bad articles | |
42 | if (getArticleTitle(doc, article).isEmpty()) { | |
43 | return ""; | |
44 | } | |
2d95a873 | 45 | |
3e62b034 NR |
46 | for (Element idElem : article.getElementsByTag("a")) { |
47 | if (idElem.attr("href").startsWith("/pipe/")) { | |
48 | return idElem.attr("href").substring("/pipe/".length()); | |
2d95a873 | 49 | } |
3e62b034 | 50 | } |
2d95a873 | 51 | |
3e62b034 NR |
52 | return ""; |
53 | } | |
2d95a873 | 54 | |
3e62b034 NR |
55 | @Override |
56 | protected String getArticleTitle(Document doc, Element article) { | |
57 | Element title = article.getElementsByTag("h1").first(); | |
58 | if (title != null) { | |
59 | return title.text(); | |
60 | } | |
2d95a873 | 61 | |
3e62b034 NR |
62 | return ""; |
63 | } | |
2d95a873 | 64 | |
3e62b034 NR |
65 | @Override |
66 | protected String getArticleAuthor(Document doc, Element article) { | |
67 | String value = getArticleDetailsReal(article); | |
68 | int pos = value.indexOf("by "); | |
69 | if (pos >= 0) { | |
70 | value = value.substring(pos + "by ".length()).trim(); | |
71 | pos = value.indexOf(" in "); | |
c9cffa91 | 72 | if (pos >= 0) { |
3e62b034 | 73 | value = value.substring(0, pos).trim(); |
c9cffa91 NR |
74 | } |
75 | ||
3e62b034 NR |
76 | return value; |
77 | } | |
78 | ||
79 | return ""; | |
80 | } | |
81 | ||
82 | @Override | |
83 | protected String getArticleDate(Document doc, Element article) { | |
84 | Element dateElement = article.getElementsByTag("time").first(); | |
85 | if (dateElement != null) { | |
86 | return dateElement.attr("datetime"); | |
87 | } | |
88 | ||
89 | return ""; | |
90 | } | |
91 | ||
92 | @Override | |
93 | protected String getArticleCategory(Document doc, Element article, | |
94 | String currentCategory) { | |
95 | String value = getArticleDetailsReal(article); | |
96 | int pos = value.indexOf(" in "); | |
97 | if (pos >= 0) { | |
98 | value = value.substring(pos + " in ".length()).trim(); | |
99 | pos = value.indexOf(" on "); | |
c9cffa91 | 100 | if (pos >= 0) { |
3e62b034 | 101 | value = value.substring(0, pos).trim(); |
c9cffa91 NR |
102 | } |
103 | ||
3e62b034 NR |
104 | return value; |
105 | } | |
c9cffa91 | 106 | |
3e62b034 NR |
107 | return ""; |
108 | } | |
c9cffa91 | 109 | |
3e62b034 NR |
110 | @Override |
111 | protected String getArticleDetails(Document doc, Element article) { | |
112 | return ""; // We alrady extracted all the info | |
113 | } | |
114 | ||
115 | @Override | |
116 | protected String getArticleIntUrl(Document doc, Element article) { | |
117 | Element link = article.getElementsByTag("a").first(); | |
118 | if (link != null) { | |
119 | return link.absUrl("href"); | |
120 | } | |
121 | ||
122 | return ""; | |
123 | } | |
124 | ||
125 | @Override | |
126 | protected String getArticleExtUrl(Document doc, Element article) { | |
127 | Element link = article.getElementsByTag("a").first(); | |
128 | if (link != null) { | |
129 | String possibleExtLink = link.absUrl("href").trim(); | |
130 | if (!possibleExtLink.isEmpty() | |
131 | && !possibleExtLink.contains("pipedot.org/")) { | |
132 | return possibleExtLink; | |
2d95a873 | 133 | } |
3e62b034 | 134 | } |
2d95a873 | 135 | |
3e62b034 NR |
136 | return ""; |
137 | } | |
138 | ||
139 | @Override | |
140 | protected String getArticleContent(Document doc, Element article) { | |
141 | for (Element elem : article.children()) { | |
142 | String tag = elem.tagName(); | |
143 | if (!tag.equals("header") && !tag.equals("footer")) { | |
e818d449 | 144 | return getArticleText(elem); |
3e62b034 | 145 | } |
2d95a873 NR |
146 | } |
147 | ||
3e62b034 NR |
148 | return ""; |
149 | } | |
150 | ||
151 | @Override | |
152 | protected Element getFullArticle(Document doc) { | |
153 | return null; | |
154 | } | |
155 | ||
156 | @Override | |
157 | protected List<Element> getFullArticleCommentPosts(Document doc, URL intUrl) { | |
158 | return getCommentElements(doc.getElementsByTag("main").first()); | |
159 | } | |
160 | ||
161 | @Override | |
162 | protected ElementProcessor getElementProcessorFullArticle() { | |
163 | return new BasicElementProcessor(); | |
2d95a873 NR |
164 | } |
165 | ||
166 | @Override | |
3e62b034 NR |
167 | protected List<Element> getCommentCommentPosts(Document doc, |
168 | Element container) { | |
2d95a873 | 169 | |
3e62b034 NR |
170 | if (container != null) { |
171 | container = container.getElementsByClass("comment-outline").first(); | |
2d95a873 NR |
172 | } |
173 | ||
3e62b034 | 174 | return getCommentElements(container); |
2d95a873 NR |
175 | } |
176 | ||
3e62b034 NR |
177 | @Override |
178 | protected String getCommentId(Element post) { | |
179 | return post.id(); | |
180 | } | |
181 | ||
182 | @Override | |
183 | protected String getCommentAuthor(Element post) { | |
184 | Element authorDateE = post.getElementsByTag("h3").first(); | |
185 | if (authorDateE != null) { | |
186 | String authorDate = authorDateE.text(); | |
187 | int pos = authorDate.lastIndexOf(" on "); | |
188 | if (pos >= 0) { | |
189 | return authorDate.substring(0, pos).trim(); | |
2d95a873 NR |
190 | } |
191 | } | |
2d95a873 | 192 | |
3e62b034 NR |
193 | return ""; |
194 | } | |
2d95a873 | 195 | |
3e62b034 NR |
196 | @Override |
197 | protected String getCommentTitle(Element post) { | |
198 | Element title = post.getElementsByTag("h3").first(); | |
199 | if (title != null) { | |
200 | return title.text(); | |
2d95a873 NR |
201 | } |
202 | ||
3e62b034 NR |
203 | return ""; |
204 | } | |
2d95a873 | 205 | |
3e62b034 NR |
206 | @Override |
207 | protected String getCommentDate(Element post) { | |
208 | Element authorDateE = post.getElementsByTag("h3").first(); | |
209 | if (authorDateE != null) { | |
210 | String authorDate = authorDateE.text(); | |
211 | int pos = authorDate.lastIndexOf(" on "); | |
212 | if (pos >= 0) { | |
213 | return authorDate.substring(pos + " on ".length()).trim(); | |
214 | } | |
2d95a873 NR |
215 | } |
216 | ||
3e62b034 NR |
217 | return ""; |
218 | } | |
219 | ||
220 | @Override | |
221 | protected Element getCommentContentElement(Element post) { | |
222 | return post.getElementsByClass("comment-body").first(); | |
2d95a873 NR |
223 | } |
224 | ||
3e62b034 NR |
225 | @Override |
226 | protected ElementProcessor getElementProcessorComment() { | |
227 | return new BasicElementProcessor() { | |
27008a87 NR |
228 | @Override |
229 | public boolean detectQuote(Node node) { | |
230 | if (node instanceof Element) { | |
231 | Element elementNode = (Element) node; | |
232 | if (elementNode.tagName().equals("blockquote") | |
233 | || elementNode.hasClass("quote")) { | |
234 | return true; | |
235 | } | |
236 | } | |
2d95a873 | 237 | |
27008a87 NR |
238 | return false; |
239 | } | |
3e62b034 NR |
240 | }; |
241 | } | |
242 | ||
243 | private String getArticleDetailsReal(Element article) { | |
244 | Elements detailsElements = article.getElementsByTag("div"); | |
245 | if (detailsElements.size() > 0) { | |
246 | return detailsElements.get(0).text().trim(); | |
247 | } | |
248 | ||
249 | return ""; | |
250 | } | |
251 | ||
252 | private List<Element> getCommentElements(Element container) { | |
253 | List<Element> commentElements = new ArrayList<Element>(); | |
254 | if (container != null) { | |
255 | for (Element commentElement : container.children()) { | |
256 | if (commentElement.hasClass("comment")) { | |
257 | commentElements.add(commentElement); | |
258 | } | |
259 | } | |
260 | } | |
261 | return commentElements; | |
2d95a873 NR |
262 | } |
263 | } |