1 package be
.nikiroo
.gofetch
.support
;
3 import java
.io
.IOException
;
4 import java
.io
.InputStream
;
6 import java
.util
.ArrayList
;
9 import org
.jsoup
.helper
.DataUtil
;
10 import org
.jsoup
.nodes
.Document
;
11 import org
.jsoup
.nodes
.Element
;
12 import org
.jsoup
.nodes
.Node
;
13 import org
.jsoup
.select
.Elements
;
15 import be
.nikiroo
.gofetch
.data
.Comment
;
16 import be
.nikiroo
.gofetch
.data
.Story
;
19 * Support <a href='https://lwn.net/'>https://lwn.net/</a>.
23 public class LWN
extends BasicSupport
{
25 public String
getDescription() {
26 return "LWN: Linux Weekly Newsletter";
30 public List
<Story
> list() throws IOException
{
31 List
<Story
> list
= new ArrayList
<Story
>();
33 URL url
= new URL("https://lwn.net/");
34 InputStream in
= open(url
);
35 Document doc
= DataUtil
.load(in
, "UTF-8", url
.toString());
36 Elements articles
= doc
.getElementsByClass("pure-u-1");
37 for (Element article
: articles
) {
38 Elements titles
= article
.getElementsByClass("Headline");
39 Elements listings
= article
.getElementsByClass("BlurbListing");
40 if (titles
.size() == 0) {
43 if (listings
.size() == 0) {
47 Element listing
= listings
.get(0);
48 if (listing
.children().size() < 2) {
52 String title
= titles
.get(0).text();
53 String details
= listing
.children().get(0).text();
55 // All but the first and two last children
56 for (int i
= 1; i
< listing
.children().size() - 2; i
++) {
57 Element e
= listing
.children().get(i
);
58 body
= body
.trim() + " " + e
.text().trim();
63 int pos
= details
.indexOf(" by ");
65 author
= details
.substring(pos
+ " by ".length()).trim();
69 pos
= details
.indexOf(" Posted ");
71 date
= details
.substring(pos
+ " Posted ".length()).trim();
77 for (Element idElem
: article
.getElementsByTag("a")) {
78 // Last link is the story link
79 intUrl
= idElem
.absUrl("href");
80 pos
= intUrl
.indexOf("#Comments");
82 intUrl
= intUrl
.substring(0, pos
- 1);
84 id
= intUrl
.replaceAll("[^0-9]", "");
87 list
.add(new Story(getType(), id
, title
, details
, intUrl
, extUrl
,
95 public void fetch(Story story
) throws IOException
{
96 List
<Comment
> comments
= new ArrayList
<Comment
>();
97 String fullContent
= story
.getContent();
99 // Do not try the paid-for stories...
100 if (!story
.getTitle().startsWith("[$]")) {
101 URL url
= new URL(story
.getUrlInternal());
102 InputStream in
= open(url
);
103 Document doc
= DataUtil
.load(in
, "UTF-8", url
.toString());
104 Elements fullContentElements
= doc
105 .getElementsByClass("ArticleText");
106 if (fullContentElements
.size() > 0) {
107 // comments.addAll(getComments(listing.get(0)));
108 fullContent
= fullContentElements
.get(0).text();
111 Elements listing
= doc
.getElementsByClass("lwn-u-1");
112 if (listing
.size() > 0) {
113 comments
.addAll(getComments(listing
.get(0)));
116 fullContent
= "[$] Sorry, this article is currently available to LWN suscribers only [https://lwn.net/subscribe/].";
119 story
.setFullContent(fullContent
);
120 story
.setComments(comments
);
123 private List
<Comment
> getComments(Element listing
) {
124 List
<Comment
> comments
= new ArrayList
<Comment
>();
125 for (Element commentElement
: listing
.children()) {
126 if (commentElement
.hasClass("CommentBox")) {
127 Comment comment
= getComment(commentElement
);
128 if (!comment
.isEmpty()) {
129 comments
.add(comment
);
131 } else if (commentElement
.hasClass("Comment")) {
132 if (comments
.size() > 0) {
133 comments
.get(comments
.size() - 1).addAll(
134 getComments(commentElement
));
141 private Comment
getComment(Element commentElement
) {
142 String title
= firstOrEmpty(commentElement
, "CommentTitle").text();
143 String author
= firstOrEmpty(commentElement
, "CommentPoster").text();
146 int pos
= author
.lastIndexOf(" by ");
148 date
= author
.substring(0, pos
).trim();
149 author
= author
.substring(pos
+ " by ".length()).trim();
151 if (author
.startsWith("Posted ")) {
152 author
= author
.substring("Posted ".length()).trim();
156 Element content
= null;
157 Elements commentBodyElements
= commentElement
158 .getElementsByClass("CommentBody");
159 if (commentBodyElements
.size() > 0) {
160 content
= commentBodyElements
.get(0);
163 Comment comment
= new Comment(commentElement
.id(), author
, title
, date
,
169 private List
<String
> toLines(Element element
) {
170 return toLines(element
, new QuoteProcessor() {
172 public String
processText(String text
) {
173 while (text
.startsWith(">")) { // comments
174 text
= text
.substring(1).trim();
181 public boolean detectQuote(Node node
) {
182 if (node
instanceof Element
) {
183 Element elementNode
= (Element
) node
;
184 if (elementNode
.tagName().equals("blockquote")
185 || elementNode
.hasClass("QuotedText")) {
194 public boolean ignoreNode(Node node
) {
195 if (node
instanceof Element
) {
196 Element elementNode
= (Element
) node
;
197 if (elementNode
.hasClass("CommentPoster")) {
206 public String
manualProcessing(Node node
) {