Reddit test: add expected files
[gofetch.git] / src / be / nikiroo / gofetch / support / Pipedot.java
1 package be.nikiroo.gofetch.support;
2
3 import java.io.IOException;
4 import java.net.URL;
5 import java.util.AbstractMap;
6 import java.util.ArrayList;
7 import java.util.List;
8 import java.util.Map.Entry;
9
10 import org.jsoup.nodes.Document;
11 import org.jsoup.nodes.Element;
12 import org.jsoup.nodes.Node;
13 import org.jsoup.select.Elements;
14
15 /**
16 * Support <a href='https://pipedot.org/'>https://pipedot.org/</a>.
17 *
18 * @author niki
19 */
20 public class Pipedot extends BasicSupport {
21 @Override
22 public String getDescription() {
23 return "Pipedot: News for nerds, without the corporate slant";
24 }
25
26 @Override
27 protected List<Entry<URL, String>> getUrls() throws IOException {
28 List<Entry<URL, String>> urls = new ArrayList<Entry<URL, String>>();
29 urls.add(new AbstractMap.SimpleEntry<URL, String>(new URL(
30 "https://pipedot.org/"), ""));
31 return urls;
32 }
33
34 @Override
35 protected List<Element> getArticles(Document doc) {
36 return doc.getElementsByClass("story");
37 }
38
39 @Override
40 protected String getArticleId(Document doc, Element article) {
41 // Don't try on bad articles
42 if (getArticleTitle(doc, article).isEmpty()) {
43 return "";
44 }
45
46 for (Element idElem : article.getElementsByTag("a")) {
47 if (idElem.attr("href").startsWith("/pipe/")) {
48 return idElem.attr("href").substring("/pipe/".length());
49 }
50 }
51
52 return "";
53 }
54
55 @Override
56 protected String getArticleTitle(Document doc, Element article) {
57 Element title = article.getElementsByTag("h1").first();
58 if (title != null) {
59 return title.text();
60 }
61
62 return "";
63 }
64
65 @Override
66 protected String getArticleAuthor(Document doc, Element article) {
67 String value = getArticleDetailsReal(article);
68 int pos = value.indexOf("by ");
69 if (pos >= 0) {
70 value = value.substring(pos + "by ".length()).trim();
71 pos = value.indexOf(" in ");
72 if (pos >= 0) {
73 value = value.substring(0, pos).trim();
74 }
75
76 return value;
77 }
78
79 return "";
80 }
81
82 @Override
83 protected String getArticleDate(Document doc, Element article) {
84 Element dateElement = article.getElementsByTag("time").first();
85 if (dateElement != null) {
86 return dateElement.attr("datetime");
87 }
88
89 return "";
90 }
91
92 @Override
93 protected String getArticleCategory(Document doc, Element article,
94 String currentCategory) {
95 String value = getArticleDetailsReal(article);
96 int pos = value.indexOf(" in ");
97 if (pos >= 0) {
98 value = value.substring(pos + " in ".length()).trim();
99 pos = value.indexOf(" on ");
100 if (pos >= 0) {
101 value = value.substring(0, pos).trim();
102 }
103
104 return value;
105 }
106
107 return "";
108 }
109
110 @Override
111 protected String getArticleDetails(Document doc, Element article) {
112 return ""; // We alrady extracted all the info
113 }
114
115 @Override
116 protected String getArticleIntUrl(Document doc, Element article) {
117 Element link = article.getElementsByTag("a").first();
118 if (link != null) {
119 return link.absUrl("href");
120 }
121
122 return "";
123 }
124
125 @Override
126 protected String getArticleExtUrl(Document doc, Element article) {
127 Element link = article.getElementsByTag("a").first();
128 if (link != null) {
129 String possibleExtLink = link.absUrl("href").trim();
130 if (!possibleExtLink.isEmpty()
131 && !possibleExtLink.contains("pipedot.org/")) {
132 return possibleExtLink;
133 }
134 }
135
136 return "";
137 }
138
139 @Override
140 protected String getArticleContent(Document doc, Element article) {
141 for (Element elem : article.children()) {
142 String tag = elem.tagName();
143 if (!tag.equals("header") && !tag.equals("footer")) {
144 return getArticleText(elem);
145 }
146 }
147
148 return "";
149 }
150
151 @Override
152 protected Element getFullArticle(Document doc) {
153 return null;
154 }
155
156 @Override
157 protected List<Element> getFullArticleCommentPosts(Document doc, URL intUrl) {
158 return getCommentElements(doc.getElementsByTag("main").first());
159 }
160
161 @Override
162 protected ElementProcessor getElementProcessorFullArticle() {
163 return new BasicElementProcessor();
164 }
165
166 @Override
167 protected List<Element> getCommentCommentPosts(Document doc,
168 Element container) {
169
170 if (container != null) {
171 container = container.getElementsByClass("comment-outline").first();
172 }
173
174 return getCommentElements(container);
175 }
176
177 @Override
178 protected String getCommentId(Element post) {
179 return post.id();
180 }
181
182 @Override
183 protected String getCommentAuthor(Element post) {
184 Element authorDateE = post.getElementsByTag("h3").first();
185 if (authorDateE != null) {
186 String authorDate = authorDateE.text();
187 int pos = authorDate.lastIndexOf(" on ");
188 if (pos >= 0) {
189 return authorDate.substring(0, pos).trim();
190 }
191 }
192
193 return "";
194 }
195
196 @Override
197 protected String getCommentTitle(Element post) {
198 Element title = post.getElementsByTag("h3").first();
199 if (title != null) {
200 return title.text();
201 }
202
203 return "";
204 }
205
206 @Override
207 protected String getCommentDate(Element post) {
208 Element authorDateE = post.getElementsByTag("h3").first();
209 if (authorDateE != null) {
210 String authorDate = authorDateE.text();
211 int pos = authorDate.lastIndexOf(" on ");
212 if (pos >= 0) {
213 return authorDate.substring(pos + " on ".length()).trim();
214 }
215 }
216
217 return "";
218 }
219
220 @Override
221 protected Element getCommentContentElement(Element post) {
222 return post.getElementsByClass("comment-body").first();
223 }
224
225 @Override
226 protected ElementProcessor getElementProcessorComment() {
227 return new BasicElementProcessor() {
228 @Override
229 public boolean detectQuote(Node node) {
230 if (node instanceof Element) {
231 Element elementNode = (Element) node;
232 if (elementNode.tagName().equals("blockquote")
233 || elementNode.hasClass("quote")) {
234 return true;
235 }
236 }
237
238 return false;
239 }
240 };
241 }
242
243 private String getArticleDetailsReal(Element article) {
244 Elements detailsElements = article.getElementsByTag("div");
245 if (detailsElements.size() > 0) {
246 return detailsElements.get(0).text().trim();
247 }
248
249 return "";
250 }
251
252 private List<Element> getCommentElements(Element container) {
253 List<Element> commentElements = new ArrayList<Element>();
254 if (container != null) {
255 for (Element commentElement : container.children()) {
256 if (commentElement.hasClass("comment")) {
257 commentElements.add(commentElement);
258 }
259 }
260 }
261 return commentElements;
262 }
263 }