Small fixes in different places
[gofetch.git] / src / be / nikiroo / gofetch / support / Pipedot.java
CommitLineData
2d95a873
NR
1package be.nikiroo.gofetch.support;
2
3import java.io.IOException;
4import java.io.InputStream;
5import java.net.URL;
6import java.util.ArrayList;
7import java.util.List;
8
9import org.jsoup.helper.DataUtil;
10import org.jsoup.nodes.Document;
11import org.jsoup.nodes.Element;
27008a87 12import org.jsoup.nodes.Node;
2d95a873
NR
13import org.jsoup.select.Elements;
14
15import be.nikiroo.gofetch.data.Comment;
16import be.nikiroo.gofetch.data.Story;
17
18/**
19 * Support <a href='https://pipedot.org/'>https://pipedot.org/</a>.
20 *
21 * @author niki
22 */
23public class Pipedot extends BasicSupport {
24 @Override
25 public String getDescription() {
26 return "Pipedot: News for nerds, without the corporate slant";
27 }
28
29 @Override
30 public List<Story> list() throws IOException {
31 List<Story> list = new ArrayList<Story>();
32
33 URL url = new URL("https://pipedot.org/");
136ab801 34 InputStream in = downloader.open(url);
2d95a873 35 Document doc = DataUtil.load(in, "UTF-8", url.toString());
100a8395
NR
36 Elements articles = doc.getElementsByClass("story");
37 for (Element article : articles) {
38 Elements titles = article.getElementsByTag("h1");
2d95a873
NR
39 if (titles.size() == 0) {
40 continue;
41 }
42
43 Element title = titles.get(0);
44
45 String id = "";
100a8395 46 for (Element idElem : article.getElementsByTag("a")) {
2d95a873
NR
47 if (idElem.attr("href").startsWith("/pipe/")) {
48 id = idElem.attr("href").substring("/pipe/".length());
49 break;
50 }
51 }
52
53 String intUrl = null;
54 String extUrl = null;
55
100a8395 56 Elements links = article.getElementsByTag("a");
2d95a873
NR
57 if (links.size() > 0) {
58 intUrl = links.get(0).absUrl("href");
59 }
60
61 // Take first ext URL as original source
62 for (Element link : links) {
63 String uuu = link.absUrl("href");
64 if (!uuu.isEmpty() && !uuu.contains("pipedot.org/")) {
65 extUrl = uuu;
66 break;
67 }
68 }
69
70 String details = "";
100a8395 71 Elements detailsElements = article.getElementsByTag("div");
2d95a873 72 if (detailsElements.size() > 0) {
c9cffa91 73 details = detailsElements.get(0).text().trim();
2d95a873
NR
74 }
75
c9cffa91
NR
76 String author = "";
77 int pos = details.indexOf("by ");
78 if (pos >= 0) {
79 author = details.substring(pos + "by ".length()).trim();
80 pos = author.indexOf(" in ");
81 if (pos >= 0) {
82 author = author.substring(0, pos).trim();
83 }
84 }
85
86 String categ = "";
87 pos = details.indexOf(" in ");
88 if (pos >= 0) {
89 categ = details.substring(pos + " in ".length()).trim();
90 pos = categ.indexOf(" on ");
91 if (pos >= 0) {
92 categ = categ.substring(0, pos).trim();
93 }
94 }
95
96 String date = "";
97 Element dateElement = article.getElementsByTag("time").first();
98 if (dateElement != null) {
99 date = date(dateElement.attr("datetime"));
100 }
101
102 // We already have all the details (date, author, id, categ)
103 details = "";
104
2d95a873 105 String body = "";
100a8395 106 for (Element elem : article.children()) {
2d95a873
NR
107 String tag = elem.tag().toString();
108 if (!tag.equals("header") && !tag.equals("footer")) {
109 body = elem.text();
110 break;
111 }
112 }
113
c9cffa91
NR
114 list.add(new Story(getType(), id, title.text(), author, date,
115 categ, details, intUrl, extUrl, body));
2d95a873
NR
116 }
117
118 return list;
119 }
120
121 @Override
5c056aad 122 public void fetch(Story story) throws IOException {
2d95a873
NR
123 List<Comment> comments = new ArrayList<Comment>();
124
125 URL url = new URL(story.getUrlInternal());
136ab801 126 InputStream in = downloader.open(url);
2d95a873
NR
127 Document doc = DataUtil.load(in, "UTF-8", url.toString());
128 Elements listing = doc.getElementsByTag("main");
129 if (listing.size() > 0) {
130 comments.addAll(getComments(listing.get(0)));
131 }
132
5c056aad 133 story.setComments(comments);
2d95a873
NR
134 }
135
136 private List<Comment> getComments(Element listing) {
137 List<Comment> comments = new ArrayList<Comment>();
138 for (Element commentElement : listing.children()) {
139 if (commentElement.hasClass("comment")) {
140 Comment comment = getComment(commentElement);
141 if (!comment.isEmpty()) {
142 comments.add(comment);
143 }
144 }
145 }
146 return comments;
147 }
148
149 private Comment getComment(Element commentElement) {
27008a87
NR
150 String title = firstOrEmptyTag(commentElement, "h3").text();
151 String author = firstOrEmpty(commentElement, "h4").text();
152 Element content = firstOrEmpty(commentElement, "comment-body");
2d95a873
NR
153
154 String date = "";
155 int pos = author.lastIndexOf(" on ");
156 if (pos >= 0) {
157 date = author.substring(pos + " on ".length()).trim();
158 author = author.substring(0, pos).trim();
159 }
160
161 Comment comment = new Comment(commentElement.id(), author, title, date,
27008a87 162 toLines(content));
2d95a873
NR
163
164 Elements commentOutline = commentElement
165 .getElementsByClass("comment-outline");
166 if (commentOutline.size() > 0) {
167 comment.addAll(getComments(commentOutline.get(0)));
168 }
169
170 return comment;
171 }
172
27008a87 173 private List<String> toLines(Element element) {
20217360 174 return toLines(element, new BasicElementProcessor() {
27008a87
NR
175 @Override
176 public boolean detectQuote(Node node) {
177 if (node instanceof Element) {
178 Element elementNode = (Element) node;
179 if (elementNode.tagName().equals("blockquote")
180 || elementNode.hasClass("quote")) {
181 return true;
182 }
183 }
2d95a873 184
27008a87
NR
185 return false;
186 }
27008a87 187 });
2d95a873
NR
188 }
189}