Version 0.2.0: supports LWN, quotes, <br>s
[gofetch.git] / src / be / nikiroo / gofetch / support / Slashdot.java
1 package be.nikiroo.gofetch.support;
2
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.net.URL;
6 import java.util.ArrayList;
7 import java.util.List;
8
9 import org.jsoup.helper.DataUtil;
10 import org.jsoup.nodes.Document;
11 import org.jsoup.nodes.Element;
12 import org.jsoup.nodes.Node;
13 import org.jsoup.select.Elements;
14
15 import be.nikiroo.gofetch.data.Comment;
16 import be.nikiroo.gofetch.data.Story;
17
18 /**
19 * Support <a href='https://slashdot.org/'>https://slashdot.org/</a>.
20 *
21 * @author niki
22 */
23 public class Slashdot extends BasicSupport {
24 @Override
25 public String getDescription() {
26 return "Slashdot: News for nerds, stuff that matters!";
27 }
28
29 @Override
30 public List<Story> list() throws IOException {
31 List<Story> list = new ArrayList<Story>();
32
33 URL url = new URL("https://slashdot.org/");
34 InputStream in = open(url);
35 Document doc = DataUtil.load(in, "UTF-8", url.toString());
36 Elements stories = doc.getElementsByTag("header");
37 for (Element story : stories) {
38 Elements titles = story.getElementsByClass("story-title");
39 if (titles.size() == 0) {
40 continue;
41 }
42
43 Element title = titles.get(0);
44
45 String id = "" + title.attr("id");
46 if (id.startsWith("title-")) {
47 id = id.substring("title-".length());
48 }
49
50 Elements links = title.getElementsByTag("a");
51 String intUrl = null;
52 String extUrl = null;
53 if (links.size() > 0) {
54 intUrl = links.get(0).absUrl("href");
55 }
56 if (links.size() > 1) {
57 extUrl = links.get(1).absUrl("href");
58 }
59
60 String details = "";
61 Elements detailsElements = story.getElementsByClass("details");
62 if (detailsElements.size() > 0) {
63 details = detailsElements.get(0).text();
64 }
65
66 String body = "";
67 Element bodyElement = doc.getElementById("text-" + id);
68 if (bodyElement != null) {
69 body = bodyElement.text();
70 }
71
72 list.add(new Story(getType(), id, title.text(), details, intUrl,
73 extUrl, body));
74 }
75
76 return list;
77 }
78
79 @Override
80 public void fetch(Story story) throws IOException {
81 List<Comment> comments = new ArrayList<Comment>();
82
83 URL url = new URL(story.getUrlInternal());
84 InputStream in = open(url);
85 Document doc = DataUtil.load(in, "UTF-8", url.toString());
86 Element listing = doc.getElementById("commentlisting");
87 if (listing != null) {
88 comments.addAll(getComments(listing));
89 }
90
91 story.setComments(comments);
92 }
93
94 private List<Comment> getComments(Element listing) {
95 List<Comment> comments = new ArrayList<Comment>();
96 Comment lastComment = null;
97 for (Element commentElement : listing.children()) {
98 if (commentElement.hasClass("comment")) {
99 if (!commentElement.hasClass("hidden")) {
100 lastComment = getComment(commentElement);
101 comments.add(lastComment);
102 }
103
104 List<Comment> subComments = new ArrayList<Comment>();
105 for (Element child : commentElement.children()) {
106 if (child.id().contains("commtree_")) {
107 subComments.addAll(getComments(child));
108 }
109 }
110
111 if (lastComment == null) {
112 comments.addAll(subComments);
113 } else {
114 lastComment.addAll(subComments);
115 }
116 }
117 }
118
119 return comments;
120 }
121
122 /**
123 * Get a comment from the given element.
124 *
125 * @param commentElement
126 * the element to get the comment of.
127 *
128 * @return the comment, <b>NOT</b> including sub-comments
129 */
130 private Comment getComment(Element commentElement) {
131 String title = firstOrEmpty(commentElement, "title").text();
132 String author = firstOrEmpty(commentElement, "by").text();
133 String date = firstOrEmpty(commentElement, "otherdetails").text();
134 Element content = firstOrEmpty(commentElement, "commentBody");
135
136 return new Comment(commentElement.id(), author, title, date,
137 toLines(content));
138 }
139
140 private List<String> toLines(Element element) {
141 return toLines(element, new QuoteProcessor() {
142 @Override
143 public String processText(String text) {
144 while (text.startsWith(">")) { // comment in one-liners
145 text = text.substring(1).trim();
146 }
147
148 return text;
149 }
150
151 @Override
152 public boolean detectQuote(Node node) {
153 if (node instanceof Element) {
154 Element elementNode = (Element) node;
155 if (elementNode.tagName().equals("blockquote")
156 || elementNode.hasClass("quote")
157 || (elementNode.tagName().equals("p")
158 && elementNode.textNodes().size() == 1 && elementNode
159 .textNodes().get(0).getWholeText()
160 .startsWith(">"))) {
161 return true;
162 }
163 }
164
165 return false;
166 }
167
168 @Override
169 public boolean ignoreNode(Node node) {
170 return false;
171 }
172 });
173 }
174 }