4746cc2eacfb28b41d3983ae6baddd34e5939dd0
[gofetch.git] / src / be / nikiroo / gofetch / support / Slashdot.java
1 package be.nikiroo.gofetch.support;
2
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.net.URL;
6 import java.util.ArrayList;
7 import java.util.List;
8
9 import org.jsoup.helper.DataUtil;
10 import org.jsoup.nodes.Document;
11 import org.jsoup.nodes.Element;
12 import org.jsoup.nodes.Node;
13 import org.jsoup.select.Elements;
14
15 import be.nikiroo.gofetch.data.Comment;
16 import be.nikiroo.gofetch.data.Story;
17 import be.nikiroo.utils.StringUtils;
18
19 /**
20 * Support <a href='https://slashdot.org/'>https://slashdot.org/</a>.
21 *
22 * @author niki
23 */
24 public class Slashdot extends BasicSupport {
25 @Override
26 public String getDescription() {
27 return "Slashdot: News for nerds, stuff that matters!";
28 }
29
30 @Override
31 public List<Story> list() throws IOException {
32 List<Story> list = new ArrayList<Story>();
33
34 URL url = new URL("https://slashdot.org/");
35 InputStream in = downloader.open(url);
36 Document doc = DataUtil.load(in, "UTF-8", url.toString());
37 Elements articles = doc.getElementsByTag("header");
38 for (Element article : articles) {
39 Elements titles = article.getElementsByClass("story-title");
40 if (titles.size() == 0) {
41 continue;
42 }
43
44 Element title = titles.get(0);
45
46 String id = "" + title.attr("id");
47 if (id.startsWith("title-")) {
48 id = id.substring("title-".length());
49 }
50
51 Elements links = title.getElementsByTag("a");
52 String intUrl = null;
53 String extUrl = null;
54 if (links.size() > 0) {
55 intUrl = links.get(0).absUrl("href");
56 }
57 if (links.size() > 1) {
58 extUrl = links.get(1).absUrl("href");
59 }
60
61 String details = "";
62 Elements detailsElements = article.getElementsByClass("details");
63 if (detailsElements.size() > 0) {
64 details = detailsElements.get(0).text();
65 }
66
67 // details:
68 // "Posted by AUTHOR on DATE from the further-crackdown dept."
69 String author = "";
70 int pos = details.indexOf(" on ");
71 if (details.startsWith("Posted by ") && pos >= 0) {
72 author = details.substring("Posted by ".length(), pos).trim();
73 }
74 pos = details.indexOf(" from the ");
75 if (pos >= 0) {
76 details = details.substring(pos).trim();
77 }
78
79 String body = "";
80 Element bodyElement = doc.getElementById("text-" + id);
81 if (bodyElement != null) {
82 body = bodyElement.text();
83 }
84
85 String categ = "";
86 Element categElement = doc.getElementsByClass("topic").first();
87 if (categElement != null) {
88 categ = StringUtils.unhtml(categElement.text()).trim();
89 }
90
91 String date = "";
92 Element dateElement = doc.getElementsByTag("time").first();
93 if (dateElement != null) {
94 date = StringUtils.unhtml(dateElement.text()).trim();
95 }
96
97 list.add(new Story(getType(), id, title.text(), author, date,
98 categ, details, intUrl, extUrl, body));
99 }
100
101 return list;
102 }
103
104 @Override
105 public void fetch(Story story) throws IOException {
106 List<Comment> comments = new ArrayList<Comment>();
107
108 URL url = new URL(story.getUrlInternal());
109 InputStream in = downloader.open(url);
110 Document doc = DataUtil.load(in, "UTF-8", url.toString());
111 Element listing = doc.getElementById("commentlisting");
112 if (listing != null) {
113 comments.addAll(getComments(listing));
114 }
115
116 story.setComments(comments);
117 }
118
119 private List<Comment> getComments(Element listing) {
120 List<Comment> comments = new ArrayList<Comment>();
121 Comment lastComment = null;
122 for (Element commentElement : listing.children()) {
123 if (commentElement.hasClass("comment")) {
124 if (!commentElement.hasClass("hidden")) {
125 lastComment = getComment(commentElement);
126 comments.add(lastComment);
127 }
128
129 List<Comment> subComments = new ArrayList<Comment>();
130 for (Element child : commentElement.children()) {
131 if (child.id().contains("commtree_")) {
132 subComments.addAll(getComments(child));
133 }
134 }
135
136 if (lastComment == null) {
137 comments.addAll(subComments);
138 } else {
139 lastComment.addAll(subComments);
140 }
141 }
142 }
143
144 return comments;
145 }
146
147 /**
148 * Get a comment from the given element.
149 *
150 * @param commentElement
151 * the element to get the comment of.
152 *
153 * @return the comment, <b>NOT</b> including sub-comments
154 */
155 private Comment getComment(Element commentElement) {
156 String title = firstOrEmpty(commentElement, "title").text();
157 String author = firstOrEmpty(commentElement, "by").text();
158 String date = firstOrEmpty(commentElement, "otherdetails").text();
159 Element content = firstOrEmpty(commentElement, "commentBody");
160
161 return new Comment(commentElement.id(), author, title, date,
162 toLines(content));
163 }
164
165 private List<String> toLines(Element element) {
166 return toLines(element, new BasicElementProcessor() {
167 @Override
168 public String processText(String text) {
169 while (text.startsWith(">")) { // comment in one-liners
170 text = text.substring(1).trim();
171 }
172
173 return text;
174 }
175
176 @Override
177 public boolean detectQuote(Node node) {
178 if (node instanceof Element) {
179 Element elementNode = (Element) node;
180 if (elementNode.tagName().equals("blockquote")
181 || elementNode.hasClass("quote")
182 || (elementNode.tagName().equals("p")
183 && elementNode.textNodes().size() == 1 && elementNode
184 .textNodes().get(0).getWholeText()
185 .startsWith(">"))) {
186 return true;
187 }
188 }
189
190 return false;
191 }
192 });
193 }
194 }