Fix layout issues in getContent() text
[gofetch.git] / src / be / nikiroo / gofetch / support / Pipedot.java
CommitLineData
2d95a873
NR
1package be.nikiroo.gofetch.support;
2
3import java.io.IOException;
2d95a873 4import java.net.URL;
3e62b034 5import java.util.AbstractMap;
2d95a873
NR
6import java.util.ArrayList;
7import java.util.List;
3e62b034 8import java.util.Map.Entry;
2d95a873 9
2d95a873
NR
10import org.jsoup.nodes.Document;
11import org.jsoup.nodes.Element;
27008a87 12import org.jsoup.nodes.Node;
2d95a873
NR
13import org.jsoup.select.Elements;
14
2d95a873
NR
15/**
16 * Support <a href='https://pipedot.org/'>https://pipedot.org/</a>.
17 *
18 * @author niki
19 */
20public class Pipedot extends BasicSupport {
21 @Override
22 public String getDescription() {
23 return "Pipedot: News for nerds, without the corporate slant";
24 }
25
26 @Override
3e62b034
NR
27 protected List<Entry<URL, String>> getUrls() throws IOException {
28 List<Entry<URL, String>> urls = new ArrayList<Entry<URL, String>>();
29 urls.add(new AbstractMap.SimpleEntry<URL, String>(new URL(
30 "https://pipedot.org/"), ""));
31 return urls;
32 }
2d95a873 33
3e62b034
NR
34 @Override
35 protected List<Element> getArticles(Document doc) {
36 return doc.getElementsByClass("story");
37 }
2d95a873 38
3e62b034
NR
39 @Override
40 protected String getArticleId(Document doc, Element article) {
41 // Don't try on bad articles
42 if (getArticleTitle(doc, article).isEmpty()) {
43 return "";
44 }
2d95a873 45
3e62b034
NR
46 for (Element idElem : article.getElementsByTag("a")) {
47 if (idElem.attr("href").startsWith("/pipe/")) {
48 return idElem.attr("href").substring("/pipe/".length());
2d95a873 49 }
3e62b034 50 }
2d95a873 51
3e62b034
NR
52 return "";
53 }
2d95a873 54
3e62b034
NR
55 @Override
56 protected String getArticleTitle(Document doc, Element article) {
57 Element title = article.getElementsByTag("h1").first();
58 if (title != null) {
59 return title.text();
60 }
2d95a873 61
3e62b034
NR
62 return "";
63 }
2d95a873 64
3e62b034
NR
65 @Override
66 protected String getArticleAuthor(Document doc, Element article) {
67 String value = getArticleDetailsReal(article);
68 int pos = value.indexOf("by ");
69 if (pos >= 0) {
70 value = value.substring(pos + "by ".length()).trim();
71 pos = value.indexOf(" in ");
c9cffa91 72 if (pos >= 0) {
3e62b034 73 value = value.substring(0, pos).trim();
c9cffa91
NR
74 }
75
3e62b034
NR
76 return value;
77 }
78
79 return "";
80 }
81
82 @Override
83 protected String getArticleDate(Document doc, Element article) {
84 Element dateElement = article.getElementsByTag("time").first();
85 if (dateElement != null) {
86 return dateElement.attr("datetime");
87 }
88
89 return "";
90 }
91
92 @Override
93 protected String getArticleCategory(Document doc, Element article,
94 String currentCategory) {
95 String value = getArticleDetailsReal(article);
96 int pos = value.indexOf(" in ");
97 if (pos >= 0) {
98 value = value.substring(pos + " in ".length()).trim();
99 pos = value.indexOf(" on ");
c9cffa91 100 if (pos >= 0) {
3e62b034 101 value = value.substring(0, pos).trim();
c9cffa91
NR
102 }
103
3e62b034
NR
104 return value;
105 }
c9cffa91 106
3e62b034
NR
107 return "";
108 }
c9cffa91 109
3e62b034
NR
110 @Override
111 protected String getArticleDetails(Document doc, Element article) {
112 return ""; // We alrady extracted all the info
113 }
114
115 @Override
116 protected String getArticleIntUrl(Document doc, Element article) {
117 Element link = article.getElementsByTag("a").first();
118 if (link != null) {
119 return link.absUrl("href");
120 }
121
122 return "";
123 }
124
125 @Override
126 protected String getArticleExtUrl(Document doc, Element article) {
127 Element link = article.getElementsByTag("a").first();
128 if (link != null) {
129 String possibleExtLink = link.absUrl("href").trim();
130 if (!possibleExtLink.isEmpty()
131 && !possibleExtLink.contains("pipedot.org/")) {
132 return possibleExtLink;
2d95a873 133 }
3e62b034 134 }
2d95a873 135
3e62b034
NR
136 return "";
137 }
138
139 @Override
140 protected String getArticleContent(Document doc, Element article) {
141 for (Element elem : article.children()) {
142 String tag = elem.tagName();
143 if (!tag.equals("header") && !tag.equals("footer")) {
e818d449 144 return getArticleText(elem);
3e62b034 145 }
2d95a873
NR
146 }
147
3e62b034
NR
148 return "";
149 }
150
151 @Override
152 protected Element getFullArticle(Document doc) {
153 return null;
154 }
155
156 @Override
157 protected List<Element> getFullArticleCommentPosts(Document doc, URL intUrl) {
158 return getCommentElements(doc.getElementsByTag("main").first());
159 }
160
161 @Override
162 protected ElementProcessor getElementProcessorFullArticle() {
163 return new BasicElementProcessor();
2d95a873
NR
164 }
165
166 @Override
3e62b034
NR
167 protected List<Element> getCommentCommentPosts(Document doc,
168 Element container) {
2d95a873 169
3e62b034
NR
170 if (container != null) {
171 container = container.getElementsByClass("comment-outline").first();
2d95a873
NR
172 }
173
3e62b034 174 return getCommentElements(container);
2d95a873
NR
175 }
176
3e62b034
NR
177 @Override
178 protected String getCommentId(Element post) {
179 return post.id();
180 }
181
182 @Override
183 protected String getCommentAuthor(Element post) {
184 Element authorDateE = post.getElementsByTag("h3").first();
185 if (authorDateE != null) {
186 String authorDate = authorDateE.text();
187 int pos = authorDate.lastIndexOf(" on ");
188 if (pos >= 0) {
189 return authorDate.substring(0, pos).trim();
2d95a873
NR
190 }
191 }
2d95a873 192
3e62b034
NR
193 return "";
194 }
2d95a873 195
3e62b034
NR
196 @Override
197 protected String getCommentTitle(Element post) {
198 Element title = post.getElementsByTag("h3").first();
199 if (title != null) {
200 return title.text();
2d95a873
NR
201 }
202
3e62b034
NR
203 return "";
204 }
2d95a873 205
3e62b034
NR
206 @Override
207 protected String getCommentDate(Element post) {
208 Element authorDateE = post.getElementsByTag("h3").first();
209 if (authorDateE != null) {
210 String authorDate = authorDateE.text();
211 int pos = authorDate.lastIndexOf(" on ");
212 if (pos >= 0) {
213 return authorDate.substring(pos + " on ".length()).trim();
214 }
2d95a873
NR
215 }
216
3e62b034
NR
217 return "";
218 }
219
220 @Override
221 protected Element getCommentContentElement(Element post) {
222 return post.getElementsByClass("comment-body").first();
2d95a873
NR
223 }
224
3e62b034
NR
225 @Override
226 protected ElementProcessor getElementProcessorComment() {
227 return new BasicElementProcessor() {
27008a87
NR
228 @Override
229 public boolean detectQuote(Node node) {
230 if (node instanceof Element) {
231 Element elementNode = (Element) node;
232 if (elementNode.tagName().equals("blockquote")
233 || elementNode.hasClass("quote")) {
234 return true;
235 }
236 }
2d95a873 237
27008a87
NR
238 return false;
239 }
3e62b034
NR
240 };
241 }
242
243 private String getArticleDetailsReal(Element article) {
244 Elements detailsElements = article.getElementsByTag("div");
245 if (detailsElements.size() > 0) {
246 return detailsElements.get(0).text().trim();
247 }
248
249 return "";
250 }
251
252 private List<Element> getCommentElements(Element container) {
253 List<Element> commentElements = new ArrayList<Element>();
254 if (container != null) {
255 for (Element commentElement : container.children()) {
256 if (commentElement.hasClass("comment")) {
257 commentElements.add(commentElement);
258 }
259 }
260 }
261 return commentElements;
2d95a873
NR
262 }
263}