1 package be
.nikiroo
.gofetch
.support
;
3 import java
.io
.IOException
;
4 import java
.io
.InputStream
;
6 import java
.util
.ArrayList
;
9 import org
.jsoup
.helper
.DataUtil
;
10 import org
.jsoup
.nodes
.Document
;
11 import org
.jsoup
.nodes
.Element
;
12 import org
.jsoup
.select
.Elements
;
14 import be
.nikiroo
.gofetch
.data
.Comment
;
15 import be
.nikiroo
.gofetch
.data
.Story
;
18 * Support <a href='https://lwn.net/'>https://lwn.net/</a>.
22 public class LWN
extends BasicSupport
{
24 public String
getDescription() {
25 return "LWN: Linux Weekly Newsletter";
29 public List
<Story
> list() throws IOException
{
30 // TODO: comments + do not get comment for [$] stories
32 List
<Story
> list
= new ArrayList
<Story
>();
34 URL url
= new URL("https://lwn.net/");
35 InputStream in
= open(url
);
36 Document doc
= DataUtil
.load(in
, "UTF-8", url
.toString());
37 Elements stories
= doc
.getElementsByClass("pure-u-1");
38 for (Element story
: stories
) {
39 Elements titles
= story
.getElementsByClass("Headline");
40 Elements listings
= story
.getElementsByClass("BlurbListing");
41 if (titles
.size() == 0) {
44 if (listings
.size() == 0) {
48 Element listing
= listings
.get(0);
49 if (listing
.children().size() < 2) {
53 String title
= titles
.get(0).text();
54 String details
= listing
.children().get(0).text();
56 // All but the first and two last children
57 for (int i
= 1; i
< listing
.children().size() - 2; i
++) {
58 Element e
= listing
.children().get(i
);
59 body
= body
.trim() + " " + e
.text().trim();
64 int pos
= details
.indexOf(" by ");
66 author
= details
.substring(pos
+ " by ".length()).trim();
70 pos
= details
.indexOf(" Posted ");
72 date
= details
.substring(pos
+ " Posted ".length()).trim();
78 for (Element idElem
: story
.getElementsByTag("a")) {
79 // Last link is the story link
80 intUrl
= idElem
.absUrl("href");
81 pos
= intUrl
.indexOf("#Comments");
83 intUrl
= intUrl
.substring(0, pos
- 1);
85 id
= intUrl
.replaceAll("[^0-9]", "");
88 list
.add(new Story(getType(), id
, title
, details
, intUrl
, extUrl
,
96 public void fetch(Story story
) throws IOException
{
98 * URL url = new URL(story.getUrlInternal()); InputStream in =
99 * open(url); Document doc = DataUtil.load(in, "UTF-8", url.toString());
100 * Elements listing = doc.getElementsByTag("main"); if (listing.size() >
101 * 0) { comments.addAll(getComments(listing.get(0))); }
105 private List
<Comment
> getComments(Element listing
) {
106 List
<Comment
> comments
= new ArrayList
<Comment
>();
107 for (Element commentElement
: listing
.children()) {
108 if (commentElement
.hasClass("comment")) {
109 Comment comment
= getComment(commentElement
);
110 if (!comment
.isEmpty()) {
111 comments
.add(comment
);
118 private Comment
getComment(Element commentElement
) {
119 String title
= firstOrEmptyTag(commentElement
, "h3");
120 String author
= firstOrEmpty(commentElement
, "h4");
121 String content
= firstOrEmpty(commentElement
, "comment-body");
124 int pos
= author
.lastIndexOf(" on ");
126 date
= author
.substring(pos
+ " on ".length()).trim();
127 author
= author
.substring(0, pos
).trim();
130 Comment comment
= new Comment(commentElement
.id(), author
, title
, date
,
133 Elements commentOutline
= commentElement
134 .getElementsByClass("comment-outline");
135 if (commentOutline
.size() > 0) {
136 comment
.addAll(getComments(commentOutline
.get(0)));
143 * Get the first element of the given class, or an empty {@link String} if
147 * the element to look in
149 * the class to look for
151 * @return the value or an empty {@link String}
153 private String
firstOrEmpty(Element element
, String className
) {
154 Elements subElements
= element
.getElementsByClass(className
);
155 if (subElements
.size() > 0) {
156 return subElements
.get(0).text();
163 * Get the first element of the given tag, or an empty {@link String} if
167 * the element to look in
169 * the tag to look for
171 * @return the value or an empty {@link String}
173 private String
firstOrEmptyTag(Element element
, String tagName
) {
174 Elements subElements
= element
.getElementsByTag(tagName
);
175 if (subElements
.size() > 0) {
176 return subElements
.get(0).text();