1 package be
.nikiroo
.gofetch
.support
;
3 import java
.io
.IOException
;
5 import java
.util
.AbstractMap
;
6 import java
.util
.ArrayList
;
8 import java
.util
.Map
.Entry
;
10 import org
.jsoup
.nodes
.Document
;
11 import org
.jsoup
.nodes
.Element
;
12 import org
.jsoup
.nodes
.Node
;
13 import org
.jsoup
.select
.Elements
;
16 * Support <a href='https://pipedot.org/'>https://pipedot.org/</a>.
20 public class Pipedot
extends BasicSupport
{
22 public String
getDescription() {
23 return "Pipedot: News for nerds, without the corporate slant";
27 protected List
<Entry
<URL
, String
>> getUrls() throws IOException
{
28 List
<Entry
<URL
, String
>> urls
= new ArrayList
<Entry
<URL
, String
>>();
29 urls
.add(new AbstractMap
.SimpleEntry
<URL
, String
>(new URL(
30 "https://pipedot.org/"), ""));
35 protected List
<Element
> getArticles(Document doc
) {
36 return doc
.getElementsByClass("story");
40 protected String
getArticleId(Document doc
, Element article
) {
41 // Don't try on bad articles
42 if (getArticleTitle(doc
, article
).isEmpty()) {
46 for (Element idElem
: article
.getElementsByTag("a")) {
47 if (idElem
.attr("href").startsWith("/pipe/")) {
48 return idElem
.attr("href").substring("/pipe/".length());
56 protected String
getArticleTitle(Document doc
, Element article
) {
57 Element title
= article
.getElementsByTag("h1").first();
66 protected String
getArticleAuthor(Document doc
, Element article
) {
67 String value
= getArticleDetailsReal(article
);
68 int pos
= value
.indexOf("by ");
70 value
= value
.substring(pos
+ "by ".length()).trim();
71 pos
= value
.indexOf(" in ");
73 value
= value
.substring(0, pos
).trim();
83 protected String
getArticleDate(Document doc
, Element article
) {
84 Element dateElement
= article
.getElementsByTag("time").first();
85 if (dateElement
!= null) {
86 return dateElement
.attr("datetime");
93 protected String
getArticleCategory(Document doc
, Element article
,
94 String currentCategory
) {
95 String value
= getArticleDetailsReal(article
);
96 int pos
= value
.indexOf(" in ");
98 value
= value
.substring(pos
+ " in ".length()).trim();
99 pos
= value
.indexOf(" on ");
101 value
= value
.substring(0, pos
).trim();
111 protected String
getArticleDetails(Document doc
, Element article
) {
112 return ""; // We alrady extracted all the info
116 protected String
getArticleIntUrl(Document doc
, Element article
) {
117 Element link
= article
.getElementsByTag("a").first();
119 return link
.absUrl("href");
126 protected String
getArticleExtUrl(Document doc
, Element article
) {
127 Element link
= article
.getElementsByTag("a").first();
129 String possibleExtLink
= link
.absUrl("href").trim();
130 if (!possibleExtLink
.isEmpty()
131 && !possibleExtLink
.contains("pipedot.org/")) {
132 return possibleExtLink
;
140 protected String
getArticleContent(Document doc
, Element article
) {
141 for (Element elem
: article
.children()) {
142 String tag
= elem
.tagName();
143 if (!tag
.equals("header") && !tag
.equals("footer")) {
144 return getArticleText(elem
);
152 protected Element
getFullArticle(Document doc
) {
157 protected List
<Element
> getFullArticleCommentPosts(Document doc
, URL intUrl
) {
158 return getCommentElements(doc
.getElementsByTag("main").first());
162 protected ElementProcessor
getElementProcessorFullArticle() {
163 return new BasicElementProcessor();
167 protected List
<Element
> getCommentCommentPosts(Document doc
,
170 if (container
!= null) {
171 container
= container
.getElementsByClass("comment-outline").first();
174 return getCommentElements(container
);
178 protected String
getCommentId(Element post
) {
183 protected String
getCommentAuthor(Element post
) {
184 Element authorDateE
= post
.getElementsByTag("h3").first();
185 if (authorDateE
!= null) {
186 String authorDate
= authorDateE
.text();
187 int pos
= authorDate
.lastIndexOf(" on ");
189 return authorDate
.substring(0, pos
).trim();
197 protected String
getCommentTitle(Element post
) {
198 Element title
= post
.getElementsByTag("h3").first();
207 protected String
getCommentDate(Element post
) {
208 Element authorDateE
= post
.getElementsByTag("h3").first();
209 if (authorDateE
!= null) {
210 String authorDate
= authorDateE
.text();
211 int pos
= authorDate
.lastIndexOf(" on ");
213 return authorDate
.substring(pos
+ " on ".length()).trim();
221 protected Element
getCommentContentElement(Element post
) {
222 return post
.getElementsByClass("comment-body").first();
226 protected ElementProcessor
getElementProcessorComment() {
227 return new BasicElementProcessor() {
229 public boolean detectQuote(Node node
) {
230 if (node
instanceof Element
) {
231 Element elementNode
= (Element
) node
;
232 if (elementNode
.tagName().equals("blockquote")
233 || elementNode
.hasClass("quote")) {
243 private String
getArticleDetailsReal(Element article
) {
244 Elements detailsElements
= article
.getElementsByTag("div");
245 if (detailsElements
.size() > 0) {
246 return detailsElements
.get(0).text().trim();
252 private List
<Element
> getCommentElements(Element container
) {
253 List
<Element
> commentElements
= new ArrayList
<Element
>();
254 if (container
!= null) {
255 for (Element commentElement
: container
.children()) {
256 if (commentElement
.hasClass("comment")) {
257 commentElements
.add(commentElement
);
261 return commentElements
;