Small fixes in different places
[gofetch.git] / src / be / nikiroo / gofetch / support / LWN.java
CommitLineData
eaaeae39
NR
1package be.nikiroo.gofetch.support;
2
3import java.io.IOException;
4import java.io.InputStream;
5import java.net.URL;
6import java.util.ArrayList;
7import java.util.List;
8
9import org.jsoup.helper.DataUtil;
10import org.jsoup.nodes.Document;
11import org.jsoup.nodes.Element;
bb0d9eb2 12import org.jsoup.nodes.Node;
eaaeae39
NR
13import org.jsoup.select.Elements;
14
15import be.nikiroo.gofetch.data.Comment;
16import be.nikiroo.gofetch.data.Story;
17
18/**
19 * Support <a href='https://lwn.net/'>https://lwn.net/</a>.
20 *
21 * @author niki
22 */
23public class LWN extends BasicSupport {
24 @Override
25 public String getDescription() {
26 return "LWN: Linux Weekly Newsletter";
27 }
28
29 @Override
30 public List<Story> list() throws IOException {
eaaeae39
NR
31 List<Story> list = new ArrayList<Story>();
32
33 URL url = new URL("https://lwn.net/");
136ab801 34 InputStream in = downloader.open(url);
eaaeae39 35 Document doc = DataUtil.load(in, "UTF-8", url.toString());
100a8395
NR
36 Elements articles = doc.getElementsByClass("pure-u-1");
37 for (Element article : articles) {
38 Elements titles = article.getElementsByClass("Headline");
39 Elements listings = article.getElementsByClass("BlurbListing");
eaaeae39
NR
40 if (titles.size() == 0) {
41 continue;
42 }
43 if (listings.size() == 0) {
44 continue;
45 }
5c056aad 46
eaaeae39
NR
47 Element listing = listings.get(0);
48 if (listing.children().size() < 2) {
49 continue;
50 }
eaaeae39
NR
51
52 String title = titles.get(0).text();
53 String details = listing.children().get(0).text();
25271075
NR
54 String body = "";
55 // All but the first and two last children
5c056aad 56 for (int i = 1; i < listing.children().size() - 2; i++) {
25271075
NR
57 Element e = listing.children().get(i);
58 body = body.trim() + " " + e.text().trim();
59 }
60 body = body.trim();
5c056aad 61
b34d1f35
NR
62 int pos;
63
64 String categ = "";
65 pos = details.indexOf("]");
66 if (pos >= 0) {
c9cffa91 67 categ = details.substring(1, pos).trim();
b34d1f35
NR
68 }
69
eaaeae39 70 String author = "";
b34d1f35 71 pos = details.indexOf(" by ");
eaaeae39
NR
72 if (pos >= 0) {
73 author = details.substring(pos + " by ".length()).trim();
74 }
5c056aad 75
eaaeae39
NR
76 String date = "";
77 pos = details.indexOf(" Posted ");
78 if (pos >= 0) {
79 date = details.substring(pos + " Posted ".length()).trim();
c9cffa91 80 pos = date.indexOf(" by ");
b34d1f35 81 if (pos >= 0) {
c9cffa91 82 date = date.substring(0, pos).trim();
b34d1f35 83 }
eaaeae39 84 }
eaaeae39 85
b34d1f35
NR
86 // We extracted everything from details so...
87 details = "";
88
eaaeae39
NR
89 String id = "";
90 String intUrl = "";
91 String extUrl = "";
100a8395 92 for (Element idElem : article.getElementsByTag("a")) {
eaaeae39
NR
93 // Last link is the story link
94 intUrl = idElem.absUrl("href");
95 pos = intUrl.indexOf("#Comments");
96 if (pos >= 0) {
5c056aad 97 intUrl = intUrl.substring(0, pos - 1);
eaaeae39
NR
98 }
99 id = intUrl.replaceAll("[^0-9]", "");
100 }
101
b34d1f35
NR
102 list.add(new Story(getType(), id, title, author, date, categ,
103 details, intUrl, extUrl, body));
eaaeae39
NR
104 }
105
106 return list;
107 }
108
109 @Override
5c056aad 110 public void fetch(Story story) throws IOException {
bb0d9eb2
NR
111 List<Comment> comments = new ArrayList<Comment>();
112 String fullContent = story.getContent();
113
114 // Do not try the paid-for stories...
115 if (!story.getTitle().startsWith("[$]")) {
116 URL url = new URL(story.getUrlInternal());
136ab801 117 InputStream in = downloader.open(url);
bb0d9eb2
NR
118 Document doc = DataUtil.load(in, "UTF-8", url.toString());
119 Elements fullContentElements = doc
120 .getElementsByClass("ArticleText");
121 if (fullContentElements.size() > 0) {
122 // comments.addAll(getComments(listing.get(0)));
123 fullContent = fullContentElements.get(0).text();
124 }
125
126 Elements listing = doc.getElementsByClass("lwn-u-1");
127 if (listing.size() > 0) {
128 comments.addAll(getComments(listing.get(0)));
129 }
130 } else {
131 fullContent = "[$] Sorry, this article is currently available to LWN suscribers only [https://lwn.net/subscribe/].";
132 }
133
134 story.setFullContent(fullContent);
135 story.setComments(comments);
eaaeae39
NR
136 }
137
138 private List<Comment> getComments(Element listing) {
139 List<Comment> comments = new ArrayList<Comment>();
140 for (Element commentElement : listing.children()) {
bb0d9eb2 141 if (commentElement.hasClass("CommentBox")) {
eaaeae39
NR
142 Comment comment = getComment(commentElement);
143 if (!comment.isEmpty()) {
144 comments.add(comment);
145 }
bb0d9eb2
NR
146 } else if (commentElement.hasClass("Comment")) {
147 if (comments.size() > 0) {
148 comments.get(comments.size() - 1).addAll(
149 getComments(commentElement));
150 }
eaaeae39
NR
151 }
152 }
153 return comments;
154 }
155
156 private Comment getComment(Element commentElement) {
27008a87
NR
157 String title = firstOrEmpty(commentElement, "CommentTitle").text();
158 String author = firstOrEmpty(commentElement, "CommentPoster").text();
eaaeae39
NR
159
160 String date = "";
bb0d9eb2 161 int pos = author.lastIndexOf(" by ");
eaaeae39 162 if (pos >= 0) {
bb0d9eb2
NR
163 date = author.substring(0, pos).trim();
164 author = author.substring(pos + " by ".length()).trim();
165
166 if (author.startsWith("Posted ")) {
167 author = author.substring("Posted ".length()).trim();
168 }
eaaeae39
NR
169 }
170
27008a87 171 Element content = null;
bb0d9eb2
NR
172 Elements commentBodyElements = commentElement
173 .getElementsByClass("CommentBody");
174 if (commentBodyElements.size() > 0) {
27008a87 175 content = commentBodyElements.get(0);
eaaeae39
NR
176 }
177
bb0d9eb2 178 Comment comment = new Comment(commentElement.id(), author, title, date,
27008a87 179 toLines(content));
bb0d9eb2 180
eaaeae39
NR
181 return comment;
182 }
183
27008a87 184 private List<String> toLines(Element element) {
20217360 185 return toLines(element, new BasicElementProcessor() {
27008a87
NR
186 @Override
187 public String processText(String text) {
188 while (text.startsWith(">")) { // comments
189 text = text.substring(1).trim();
190 }
eaaeae39 191
27008a87
NR
192 return text;
193 }
eaaeae39 194
27008a87
NR
195 @Override
196 public boolean detectQuote(Node node) {
197 if (node instanceof Element) {
198 Element elementNode = (Element) node;
199 if (elementNode.tagName().equals("blockquote")
200 || elementNode.hasClass("QuotedText")) {
201 return true;
202 }
203 }
204
205 return false;
206 }
eaaeae39 207
27008a87
NR
208 @Override
209 public boolean ignoreNode(Node node) {
210 if (node instanceof Element) {
211 Element elementNode = (Element) node;
212 if (elementNode.hasClass("CommentPoster")) {
213 return true;
214 }
215 }
216
217 return false;
218 }
219 });
eaaeae39
NR
220 }
221}