Fix download order and comments/content storing
[gofetch.git] / src / be / nikiroo / gofetch / support / LWN.java
CommitLineData
eaaeae39
NR
1package be.nikiroo.gofetch.support;
2
3import java.io.IOException;
4import java.io.InputStream;
5import java.net.URL;
6import java.util.ArrayList;
7import java.util.List;
8
9import org.jsoup.helper.DataUtil;
10import org.jsoup.nodes.Document;
11import org.jsoup.nodes.Element;
12import org.jsoup.select.Elements;
13
14import be.nikiroo.gofetch.data.Comment;
15import be.nikiroo.gofetch.data.Story;
16
17/**
18 * Support <a href='https://lwn.net/'>https://lwn.net/</a>.
19 *
20 * @author niki
21 */
22public class LWN extends BasicSupport {
23 @Override
24 public String getDescription() {
25 return "LWN: Linux Weekly Newsletter";
26 }
27
28 @Override
29 public List<Story> list() throws IOException {
30 // TODO: comments + do not get comment for [$] stories
5c056aad 31
eaaeae39
NR
32 List<Story> list = new ArrayList<Story>();
33
34 URL url = new URL("https://lwn.net/");
35 InputStream in = open(url);
36 Document doc = DataUtil.load(in, "UTF-8", url.toString());
37 Elements stories = doc.getElementsByClass("pure-u-1");
38 for (Element story : stories) {
39 Elements titles = story.getElementsByClass("Headline");
40 Elements listings = story.getElementsByClass("BlurbListing");
41 if (titles.size() == 0) {
42 continue;
43 }
44 if (listings.size() == 0) {
45 continue;
46 }
5c056aad 47
eaaeae39
NR
48 Element listing = listings.get(0);
49 if (listing.children().size() < 2) {
50 continue;
51 }
eaaeae39
NR
52
53 String title = titles.get(0).text();
54 String details = listing.children().get(0).text();
25271075
NR
55 String body = "";
56 // All but the first and two last children
5c056aad 57 for (int i = 1; i < listing.children().size() - 2; i++) {
25271075
NR
58 Element e = listing.children().get(i);
59 body = body.trim() + " " + e.text().trim();
60 }
61 body = body.trim();
5c056aad 62
eaaeae39
NR
63 String author = "";
64 int pos = details.indexOf(" by ");
65 if (pos >= 0) {
66 author = details.substring(pos + " by ".length()).trim();
67 }
5c056aad 68
eaaeae39
NR
69 String date = "";
70 pos = details.indexOf(" Posted ");
71 if (pos >= 0) {
72 date = details.substring(pos + " Posted ".length()).trim();
73 }
eaaeae39
NR
74
75 String id = "";
76 String intUrl = "";
77 String extUrl = "";
78 for (Element idElem : story.getElementsByTag("a")) {
79 // Last link is the story link
80 intUrl = idElem.absUrl("href");
81 pos = intUrl.indexOf("#Comments");
82 if (pos >= 0) {
5c056aad 83 intUrl = intUrl.substring(0, pos - 1);
eaaeae39
NR
84 }
85 id = intUrl.replaceAll("[^0-9]", "");
86 }
87
5c056aad
NR
88 list.add(new Story(getType(), id, title, details, intUrl, extUrl,
89 body));
eaaeae39
NR
90 }
91
92 return list;
93 }
94
95 @Override
5c056aad 96 public void fetch(Story story) throws IOException {
eaaeae39 97 /*
5c056aad
NR
98 * URL url = new URL(story.getUrlInternal()); InputStream in =
99 * open(url); Document doc = DataUtil.load(in, "UTF-8", url.toString());
100 * Elements listing = doc.getElementsByTag("main"); if (listing.size() >
101 * 0) { comments.addAll(getComments(listing.get(0))); }
102 */
eaaeae39
NR
103 }
104
105 private List<Comment> getComments(Element listing) {
106 List<Comment> comments = new ArrayList<Comment>();
107 for (Element commentElement : listing.children()) {
108 if (commentElement.hasClass("comment")) {
109 Comment comment = getComment(commentElement);
110 if (!comment.isEmpty()) {
111 comments.add(comment);
112 }
113 }
114 }
115 return comments;
116 }
117
118 private Comment getComment(Element commentElement) {
119 String title = firstOrEmptyTag(commentElement, "h3");
120 String author = firstOrEmpty(commentElement, "h4");
121 String content = firstOrEmpty(commentElement, "comment-body");
122
123 String date = "";
124 int pos = author.lastIndexOf(" on ");
125 if (pos >= 0) {
126 date = author.substring(pos + " on ".length()).trim();
127 author = author.substring(0, pos).trim();
128 }
129
130 Comment comment = new Comment(commentElement.id(), author, title, date,
131 content);
132
133 Elements commentOutline = commentElement
134 .getElementsByClass("comment-outline");
135 if (commentOutline.size() > 0) {
136 comment.addAll(getComments(commentOutline.get(0)));
137 }
138
139 return comment;
140 }
141
142 /**
143 * Get the first element of the given class, or an empty {@link String} if
144 * none found.
145 *
146 * @param element
147 * the element to look in
148 * @param className
149 * the class to look for
150 *
151 * @return the value or an empty {@link String}
152 */
153 private String firstOrEmpty(Element element, String className) {
154 Elements subElements = element.getElementsByClass(className);
155 if (subElements.size() > 0) {
156 return subElements.get(0).text();
157 }
158
159 return "";
160 }
161
162 /**
163 * Get the first element of the given tag, or an empty {@link String} if
164 * none found.
165 *
166 * @param element
167 * the element to look in
168 * @param tagName
169 * the tag to look for
170 *
171 * @return the value or an empty {@link String}
172 */
173 private String firstOrEmptyTag(Element element, String tagName) {
174 Elements subElements = element.getElementsByTag(tagName);
175 if (subElements.size() > 0) {
176 return subElements.get(0).text();
177 }
178
179 return "";
180 }
181}