Small fixes in different places
[gofetch.git] / src / be / nikiroo / gofetch / support / Pipedot.java
1 package be.nikiroo.gofetch.support;
2
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.net.URL;
6 import java.util.ArrayList;
7 import java.util.List;
8
9 import org.jsoup.helper.DataUtil;
10 import org.jsoup.nodes.Document;
11 import org.jsoup.nodes.Element;
12 import org.jsoup.nodes.Node;
13 import org.jsoup.select.Elements;
14
15 import be.nikiroo.gofetch.data.Comment;
16 import be.nikiroo.gofetch.data.Story;
17
18 /**
19 * Support <a href='https://pipedot.org/'>https://pipedot.org/</a>.
20 *
21 * @author niki
22 */
23 public class Pipedot extends BasicSupport {
24 @Override
25 public String getDescription() {
26 return "Pipedot: News for nerds, without the corporate slant";
27 }
28
29 @Override
30 public List<Story> list() throws IOException {
31 List<Story> list = new ArrayList<Story>();
32
33 URL url = new URL("https://pipedot.org/");
34 InputStream in = downloader.open(url);
35 Document doc = DataUtil.load(in, "UTF-8", url.toString());
36 Elements articles = doc.getElementsByClass("story");
37 for (Element article : articles) {
38 Elements titles = article.getElementsByTag("h1");
39 if (titles.size() == 0) {
40 continue;
41 }
42
43 Element title = titles.get(0);
44
45 String id = "";
46 for (Element idElem : article.getElementsByTag("a")) {
47 if (idElem.attr("href").startsWith("/pipe/")) {
48 id = idElem.attr("href").substring("/pipe/".length());
49 break;
50 }
51 }
52
53 String intUrl = null;
54 String extUrl = null;
55
56 Elements links = article.getElementsByTag("a");
57 if (links.size() > 0) {
58 intUrl = links.get(0).absUrl("href");
59 }
60
61 // Take first ext URL as original source
62 for (Element link : links) {
63 String uuu = link.absUrl("href");
64 if (!uuu.isEmpty() && !uuu.contains("pipedot.org/")) {
65 extUrl = uuu;
66 break;
67 }
68 }
69
70 String details = "";
71 Elements detailsElements = article.getElementsByTag("div");
72 if (detailsElements.size() > 0) {
73 details = detailsElements.get(0).text().trim();
74 }
75
76 String author = "";
77 int pos = details.indexOf("by ");
78 if (pos >= 0) {
79 author = details.substring(pos + "by ".length()).trim();
80 pos = author.indexOf(" in ");
81 if (pos >= 0) {
82 author = author.substring(0, pos).trim();
83 }
84 }
85
86 String categ = "";
87 pos = details.indexOf(" in ");
88 if (pos >= 0) {
89 categ = details.substring(pos + " in ".length()).trim();
90 pos = categ.indexOf(" on ");
91 if (pos >= 0) {
92 categ = categ.substring(0, pos).trim();
93 }
94 }
95
96 String date = "";
97 Element dateElement = article.getElementsByTag("time").first();
98 if (dateElement != null) {
99 date = date(dateElement.attr("datetime"));
100 }
101
102 // We already have all the details (date, author, id, categ)
103 details = "";
104
105 String body = "";
106 for (Element elem : article.children()) {
107 String tag = elem.tag().toString();
108 if (!tag.equals("header") && !tag.equals("footer")) {
109 body = elem.text();
110 break;
111 }
112 }
113
114 list.add(new Story(getType(), id, title.text(), author, date,
115 categ, details, intUrl, extUrl, body));
116 }
117
118 return list;
119 }
120
121 @Override
122 public void fetch(Story story) throws IOException {
123 List<Comment> comments = new ArrayList<Comment>();
124
125 URL url = new URL(story.getUrlInternal());
126 InputStream in = downloader.open(url);
127 Document doc = DataUtil.load(in, "UTF-8", url.toString());
128 Elements listing = doc.getElementsByTag("main");
129 if (listing.size() > 0) {
130 comments.addAll(getComments(listing.get(0)));
131 }
132
133 story.setComments(comments);
134 }
135
136 private List<Comment> getComments(Element listing) {
137 List<Comment> comments = new ArrayList<Comment>();
138 for (Element commentElement : listing.children()) {
139 if (commentElement.hasClass("comment")) {
140 Comment comment = getComment(commentElement);
141 if (!comment.isEmpty()) {
142 comments.add(comment);
143 }
144 }
145 }
146 return comments;
147 }
148
149 private Comment getComment(Element commentElement) {
150 String title = firstOrEmptyTag(commentElement, "h3").text();
151 String author = firstOrEmpty(commentElement, "h4").text();
152 Element content = firstOrEmpty(commentElement, "comment-body");
153
154 String date = "";
155 int pos = author.lastIndexOf(" on ");
156 if (pos >= 0) {
157 date = author.substring(pos + " on ".length()).trim();
158 author = author.substring(0, pos).trim();
159 }
160
161 Comment comment = new Comment(commentElement.id(), author, title, date,
162 toLines(content));
163
164 Elements commentOutline = commentElement
165 .getElementsByClass("comment-outline");
166 if (commentOutline.size() > 0) {
167 comment.addAll(getComments(commentOutline.get(0)));
168 }
169
170 return comment;
171 }
172
173 private List<String> toLines(Element element) {
174 return toLines(element, new BasicElementProcessor() {
175 @Override
176 public boolean detectQuote(Node node) {
177 if (node instanceof Element) {
178 Element elementNode = (Element) node;
179 if (elementNode.tagName().equals("blockquote")
180 || elementNode.hasClass("quote")) {
181 return true;
182 }
183 }
184
185 return false;
186 }
187 });
188 }
189 }