1 package be
.nikiroo
.gofetch
.support
;
3 import java
.io
.IOException
;
4 import java
.io
.InputStream
;
6 import java
.util
.ArrayList
;
9 import org
.jsoup
.helper
.DataUtil
;
10 import org
.jsoup
.nodes
.Document
;
11 import org
.jsoup
.nodes
.Element
;
12 import org
.jsoup
.nodes
.Node
;
13 import org
.jsoup
.select
.Elements
;
15 import be
.nikiroo
.gofetch
.data
.Comment
;
16 import be
.nikiroo
.gofetch
.data
.Story
;
19 * Support <a href='https://lwn.net/'>https://lwn.net/</a>.
23 public class LWN
extends BasicSupport
{
25 public String
getDescription() {
26 return "LWN: Linux Weekly Newsletter";
30 public List
<Story
> list() throws IOException
{
31 List
<Story
> list
= new ArrayList
<Story
>();
33 URL url
= new URL("https://lwn.net/");
34 InputStream in
= open(url
);
35 Document doc
= DataUtil
.load(in
, "UTF-8", url
.toString());
36 Elements stories
= doc
.getElementsByClass("pure-u-1");
37 for (Element story
: stories
) {
38 Elements titles
= story
.getElementsByClass("Headline");
39 Elements listings
= story
.getElementsByClass("BlurbListing");
40 if (titles
.size() == 0) {
43 if (listings
.size() == 0) {
47 Element listing
= listings
.get(0);
48 if (listing
.children().size() < 2) {
52 String title
= titles
.get(0).text();
53 String details
= listing
.children().get(0).text();
55 // All but the first and two last children
56 for (int i
= 1; i
< listing
.children().size() - 2; i
++) {
57 Element e
= listing
.children().get(i
);
58 body
= body
.trim() + " " + e
.text().trim();
63 int pos
= details
.indexOf(" by ");
65 author
= details
.substring(pos
+ " by ".length()).trim();
69 pos
= details
.indexOf(" Posted ");
71 date
= details
.substring(pos
+ " Posted ".length()).trim();
77 for (Element idElem
: story
.getElementsByTag("a")) {
78 // Last link is the story link
79 intUrl
= idElem
.absUrl("href");
80 pos
= intUrl
.indexOf("#Comments");
82 intUrl
= intUrl
.substring(0, pos
- 1);
84 id
= intUrl
.replaceAll("[^0-9]", "");
87 list
.add(new Story(getType(), id
, title
, details
, intUrl
, extUrl
,
95 public void fetch(Story story
) throws IOException
{
96 List
<Comment
> comments
= new ArrayList
<Comment
>();
97 String fullContent
= story
.getContent();
99 // Do not try the paid-for stories...
100 if (!story
.getTitle().startsWith("[$]")) {
101 URL url
= new URL(story
.getUrlInternal());
102 InputStream in
= open(url
);
103 Document doc
= DataUtil
.load(in
, "UTF-8", url
.toString());
104 Elements fullContentElements
= doc
105 .getElementsByClass("ArticleText");
106 if (fullContentElements
.size() > 0) {
107 // comments.addAll(getComments(listing.get(0)));
108 fullContent
= fullContentElements
.get(0).text();
111 Elements listing
= doc
.getElementsByClass("lwn-u-1");
112 if (listing
.size() > 0) {
113 comments
.addAll(getComments(listing
.get(0)));
116 fullContent
= "[$] Sorry, this article is currently available to LWN suscribers only [https://lwn.net/subscribe/].";
119 story
.setFullContent(fullContent
);
120 story
.setComments(comments
);
123 private List
<Comment
> getComments(Element listing
) {
124 List
<Comment
> comments
= new ArrayList
<Comment
>();
125 for (Element commentElement
: listing
.children()) {
126 if (commentElement
.hasClass("CommentBox")) {
127 Comment comment
= getComment(commentElement
);
128 if (!comment
.isEmpty()) {
129 comments
.add(comment
);
131 } else if (commentElement
.hasClass("Comment")) {
132 if (comments
.size() > 0) {
133 comments
.get(comments
.size() - 1).addAll(
134 getComments(commentElement
));
141 private Comment
getComment(Element commentElement
) {
142 String title
= firstOrEmpty(commentElement
, "CommentTitle");
143 String author
= firstOrEmpty(commentElement
, "CommentPoster");
146 int pos
= author
.lastIndexOf(" by ");
148 date
= author
.substring(0, pos
).trim();
149 author
= author
.substring(pos
+ " by ".length()).trim();
151 if (author
.startsWith("Posted ")) {
152 author
= author
.substring("Posted ".length()).trim();
157 Elements commentBodyElements
= commentElement
158 .getElementsByClass("CommentBody");
159 if (commentBodyElements
.size() > 0) {
160 for (Node contentNode
: commentBodyElements
.get(0).childNodes()) {
161 if (contentNode
instanceof Element
) {
162 Element contentElement
= (Element
) contentNode
;
163 if (!contentElement
.hasClass("CommentPoster")) {
164 content
= content
.trim() + " "
165 + contentElement
.text().trim();
168 content
= content
.trim() + " "
169 + contentNode
.outerHtml().trim();
173 content
= content
.trim();
176 Comment comment
= new Comment(commentElement
.id(), author
, title
, date
,
183 * Get the first element of the given class, or an empty {@link String} if
187 * the element to look in
189 * the class to look for
191 * @return the value or an empty {@link String}
193 private String
firstOrEmpty(Element element
, String className
) {
194 Elements subElements
= element
.getElementsByClass(className
);
195 if (subElements
.size() > 0) {
196 return subElements
.get(0).text();
203 * Get the first element of the given tag, or an empty {@link String} if
207 * the element to look in
209 * the tag to look for
211 * @return the value or an empty {@link String}
213 private String
firstOrEmptyTag(Element element
, String tagName
) {
214 Elements subElements
= element
.getElementsByTag(tagName
);
215 if (subElements
.size() > 0) {
216 return subElements
.get(0).text();