Small fixes in different places
[gofetch.git] / src / be / nikiroo / gofetch / support / Slashdot.java
1 package be.nikiroo.gofetch.support;
2
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.net.URL;
6 import java.util.ArrayList;
7 import java.util.List;
8
9 import org.jsoup.helper.DataUtil;
10 import org.jsoup.nodes.Document;
11 import org.jsoup.nodes.Element;
12 import org.jsoup.nodes.Node;
13 import org.jsoup.select.Elements;
14
15 import be.nikiroo.gofetch.data.Comment;
16 import be.nikiroo.gofetch.data.Story;
17 import be.nikiroo.utils.StringUtils;
18
19 /**
20 * Support <a href='https://slashdot.org/'>https://slashdot.org/</a>.
21 *
22 * @author niki
23 */
24 public class Slashdot extends BasicSupport {
25 @Override
26 public String getDescription() {
27 return "Slashdot: News for nerds, stuff that matters!";
28 }
29
30 @Override
31 public List<Story> list() throws IOException {
32 List<Story> list = new ArrayList<Story>();
33
34 URL url = new URL("https://slashdot.org/");
35 InputStream in = downloader.open(url);
36 Document doc = DataUtil.load(in, "UTF-8", url.toString());
37 Elements articles = doc.getElementsByTag("header");
38 for (Element article : articles) {
39 Elements titles = article.getElementsByClass("story-title");
40 if (titles.size() == 0) {
41 continue;
42 }
43
44 Element title = titles.get(0);
45
46 String id = "" + title.attr("id");
47 if (id.startsWith("title-")) {
48 id = id.substring("title-".length());
49 }
50
51 Elements links = title.getElementsByTag("a");
52 String intUrl = "";
53 String extUrl = "";
54 if (links.size() > 0) {
55 intUrl = links.get(0).absUrl("href");
56 }
57 if (links.size() > 1) {
58 extUrl = links.get(1).absUrl("href");
59 }
60
61 String details = "";
62 Elements detailsElements = article.getElementsByClass("details");
63 if (detailsElements.size() > 0) {
64 details = detailsElements.get(0).text();
65 }
66
67 // details:
68 // "Posted by AUTHOR on DATE from the further-crackdown dept."
69 String author = "";
70 int pos = details.indexOf(" on ");
71 if (details.startsWith("Posted by ") && pos >= 0) {
72 author = details.substring("Posted by ".length(), pos).trim();
73 }
74 pos = details.indexOf(" from the ");
75 if (pos >= 0) {
76 details = details.substring(pos).trim();
77 }
78
79 String body = "";
80 Element bodyElement = doc.getElementById("text-" + id);
81 if (bodyElement != null) {
82 body = bodyElement.text();
83 }
84
85 String categ = "";
86 Element categElement = doc.getElementsByClass("topic").first();
87 if (categElement != null) {
88 categ = StringUtils.unhtml(categElement.text()).trim();
89 }
90
91 String date = "";
92 Element dateElement = doc.getElementsByTag("time").first();
93 if (dateElement != null) {
94 date = StringUtils.unhtml(dateElement.text()).trim();
95 if (date.startsWith("on ")) {
96 date = date.substring("on ".length());
97 }
98 }
99
100 list.add(new Story(getType(), id, title.text(), author, date,
101 categ, details, intUrl, extUrl, body));
102 }
103
104 return list;
105 }
106
107 @Override
108 public void fetch(Story story) throws IOException {
109 List<Comment> comments = new ArrayList<Comment>();
110
111 URL url = new URL(story.getUrlInternal());
112 InputStream in = downloader.open(url);
113 Document doc = DataUtil.load(in, "UTF-8", url.toString());
114 Element listing = doc.getElementById("commentlisting");
115 if (listing != null) {
116 comments.addAll(getComments(listing));
117 }
118
119 story.setComments(comments);
120 }
121
122 private List<Comment> getComments(Element listing) {
123 List<Comment> comments = new ArrayList<Comment>();
124 Comment lastComment = null;
125 for (Element commentElement : listing.children()) {
126 if (commentElement.hasClass("comment")) {
127 if (!commentElement.hasClass("hidden")) {
128 lastComment = getComment(commentElement);
129 comments.add(lastComment);
130 }
131
132 List<Comment> subComments = new ArrayList<Comment>();
133 for (Element child : commentElement.children()) {
134 if (child.id().contains("commtree_")) {
135 subComments.addAll(getComments(child));
136 }
137 }
138
139 if (lastComment == null) {
140 comments.addAll(subComments);
141 } else {
142 lastComment.addAll(subComments);
143 }
144 }
145 }
146
147 return comments;
148 }
149
150 /**
151 * Get a comment from the given element.
152 *
153 * @param commentElement
154 * the element to get the comment of.
155 *
156 * @return the comment, <b>NOT</b> including sub-comments
157 */
158 private Comment getComment(Element commentElement) {
159 String title = firstOrEmpty(commentElement, "title").text();
160 String author = firstOrEmpty(commentElement, "by").text();
161 String date = firstOrEmpty(commentElement, "otherdetails").text();
162 Element content = firstOrEmpty(commentElement, "commentBody");
163
164 return new Comment(commentElement.id(), author, title, date,
165 toLines(content));
166 }
167
168 private List<String> toLines(Element element) {
169 return toLines(element, new BasicElementProcessor() {
170 @Override
171 public String processText(String text) {
172 while (text.startsWith(">")) { // comment in one-liners
173 text = text.substring(1).trim();
174 }
175
176 return text;
177 }
178
179 @Override
180 public boolean detectQuote(Node node) {
181 if (node instanceof Element) {
182 Element elementNode = (Element) node;
183 if (elementNode.tagName().equals("blockquote")
184 || elementNode.hasClass("quote")
185 || (elementNode.tagName().equals("p")
186 && elementNode.textNodes().size() == 1 && elementNode
187 .textNodes().get(0).getWholeText()
188 .startsWith(">"))) {
189 return true;
190 }
191 }
192
193 return false;
194 }
195 });
196 }
197 }