1 package be
.nikiroo
.gofetch
.support
;
3 import java
.io
.IOException
;
4 import java
.io
.InputStream
;
6 import java
.util
.ArrayList
;
9 import org
.jsoup
.helper
.DataUtil
;
10 import org
.jsoup
.nodes
.Document
;
11 import org
.jsoup
.nodes
.Element
;
12 import org
.jsoup
.nodes
.Node
;
13 import org
.jsoup
.select
.Elements
;
15 import be
.nikiroo
.gofetch
.data
.Comment
;
16 import be
.nikiroo
.gofetch
.data
.Story
;
19 * Support <a href='https://lwn.net/'>https://lwn.net/</a>.
23 public class LWN
extends BasicSupport
{
25 public String
getDescription() {
26 return "LWN: Linux Weekly Newsletter";
30 public List
<Story
> list() throws IOException
{
31 List
<Story
> list
= new ArrayList
<Story
>();
33 URL url
= new URL("https://lwn.net/");
34 InputStream in
= downloader
.open(url
);
35 Document doc
= DataUtil
.load(in
, "UTF-8", url
.toString());
36 Elements articles
= doc
.getElementsByClass("pure-u-1");
37 for (Element article
: articles
) {
38 Elements titles
= article
.getElementsByClass("Headline");
39 Elements listings
= article
.getElementsByClass("BlurbListing");
40 if (titles
.size() == 0) {
43 if (listings
.size() == 0) {
47 Element listing
= listings
.get(0);
48 if (listing
.children().size() < 2) {
52 String title
= titles
.get(0).text();
53 String details
= listing
.children().get(0).text();
55 // All but the first and two last children
56 for (int i
= 1; i
< listing
.children().size() - 2; i
++) {
57 Element e
= listing
.children().get(i
);
58 body
= body
.trim() + " " + e
.text().trim();
65 pos
= details
.indexOf("]");
67 categ
= details
.substring(1, pos
).trim();
71 pos
= details
.indexOf(" by ");
73 author
= details
.substring(pos
+ " by ".length()).trim();
77 pos
= details
.indexOf(" Posted ");
79 date
= details
.substring(pos
+ " Posted ".length()).trim();
80 pos
= date
.indexOf(" by ");
82 date
= date
.substring(0, pos
).trim();
86 // We extracted everything from details so...
92 for (Element idElem
: article
.getElementsByTag("a")) {
93 // Last link is the story link
94 intUrl
= idElem
.absUrl("href");
95 pos
= intUrl
.indexOf("#Comments");
97 intUrl
= intUrl
.substring(0, pos
- 1);
99 id
= intUrl
.replaceAll("[^0-9]", "");
102 list
.add(new Story(getType(), id
, title
, author
, date
, categ
,
103 details
, intUrl
, extUrl
, body
));
110 public void fetch(Story story
) throws IOException
{
111 List
<Comment
> comments
= new ArrayList
<Comment
>();
112 String fullContent
= story
.getContent();
114 // Do not try the paid-for stories...
115 if (!story
.getTitle().startsWith("[$]")) {
116 URL url
= new URL(story
.getUrlInternal());
117 InputStream in
= downloader
.open(url
);
118 Document doc
= DataUtil
.load(in
, "UTF-8", url
.toString());
119 Elements fullContentElements
= doc
120 .getElementsByClass("ArticleText");
121 if (fullContentElements
.size() > 0) {
122 // comments.addAll(getComments(listing.get(0)));
123 fullContent
= fullContentElements
.get(0).text();
126 Elements listing
= doc
.getElementsByClass("lwn-u-1");
127 if (listing
.size() > 0) {
128 comments
.addAll(getComments(listing
.get(0)));
131 fullContent
= "[$] Sorry, this article is currently available to LWN suscribers only [https://lwn.net/subscribe/].";
134 story
.setFullContent(fullContent
);
135 story
.setComments(comments
);
138 private List
<Comment
> getComments(Element listing
) {
139 List
<Comment
> comments
= new ArrayList
<Comment
>();
140 for (Element commentElement
: listing
.children()) {
141 if (commentElement
.hasClass("CommentBox")) {
142 Comment comment
= getComment(commentElement
);
143 if (!comment
.isEmpty()) {
144 comments
.add(comment
);
146 } else if (commentElement
.hasClass("Comment")) {
147 if (comments
.size() > 0) {
148 comments
.get(comments
.size() - 1).addAll(
149 getComments(commentElement
));
156 private Comment
getComment(Element commentElement
) {
157 String title
= firstOrEmpty(commentElement
, "CommentTitle").text();
158 String author
= firstOrEmpty(commentElement
, "CommentPoster").text();
161 int pos
= author
.lastIndexOf(" by ");
163 date
= author
.substring(0, pos
).trim();
164 author
= author
.substring(pos
+ " by ".length()).trim();
166 if (author
.startsWith("Posted ")) {
167 author
= author
.substring("Posted ".length()).trim();
171 Element content
= null;
172 Elements commentBodyElements
= commentElement
173 .getElementsByClass("CommentBody");
174 if (commentBodyElements
.size() > 0) {
175 content
= commentBodyElements
.get(0);
178 Comment comment
= new Comment(commentElement
.id(), author
, title
, date
,
184 private List
<String
> toLines(Element element
) {
185 return toLines(element
, new BasicElementProcessor() {
187 public String
processText(String text
) {
188 while (text
.startsWith(">")) { // comments
189 text
= text
.substring(1).trim();
196 public boolean detectQuote(Node node
) {
197 if (node
instanceof Element
) {
198 Element elementNode
= (Element
) node
;
199 if (elementNode
.tagName().equals("blockquote")
200 || elementNode
.hasClass("QuotedText")) {
209 public boolean ignoreNode(Node node
) {
210 if (node
instanceof Element
) {
211 Element elementNode
= (Element
) node
;
212 if (elementNode
.hasClass("CommentPoster")) {