From eaaeae39d61b9fa1a44619566c4ebf7f1e99e811 Mon Sep 17 00:00:00 2001 From: Niki Roo Date: Mon, 7 Aug 2017 11:56:01 +0200 Subject: [PATCH 1/1] Add support for lwn.net (WIP) --- changelog.md | 10 +- .../nikiroo/gofetch/support/BasicSupport.java | 5 +- src/be/nikiroo/gofetch/support/LWN.java | 184 ++++++++++++++++++ 3 files changed, 195 insertions(+), 4 deletions(-) create mode 100644 src/be/nikiroo/gofetch/support/LWN.java diff --git a/changelog.md b/changelog.md index 5e4ba8f..a3ec4a2 100644 --- a/changelog.md +++ b/changelog.md @@ -1,12 +1,16 @@ # Gofetch -## Version 0.0.1 +## Version WIP -- First version -- Slashdot supported +- Add Linux Weekly News support ## Version 0.1.1 - Fix a few bugs - Add Pipedot support +## Version 0.0.1 + +- First version +- Slashdot supported + diff --git a/src/be/nikiroo/gofetch/support/BasicSupport.java b/src/be/nikiroo/gofetch/support/BasicSupport.java index daa0ed1..d9e273a 100644 --- a/src/be/nikiroo/gofetch/support/BasicSupport.java +++ b/src/be/nikiroo/gofetch/support/BasicSupport.java @@ -12,7 +12,7 @@ import be.nikiroo.gofetch.data.Story; public abstract class BasicSupport { public enum Type { - SLASHDOT, PIPEDOT, + SLASHDOT, PIPEDOT, LWN, } static private String preselector; @@ -56,6 +56,9 @@ public abstract class BasicSupport { case PIPEDOT: support = new Pipedot(); break; + case LWN: + support = new LWN(); + break; } if (support != null) { diff --git a/src/be/nikiroo/gofetch/support/LWN.java b/src/be/nikiroo/gofetch/support/LWN.java new file mode 100644 index 0000000..869380d --- /dev/null +++ b/src/be/nikiroo/gofetch/support/LWN.java @@ -0,0 +1,184 @@ +package be.nikiroo.gofetch.support; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; + +import org.jsoup.helper.DataUtil; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import be.nikiroo.gofetch.data.Comment; +import be.nikiroo.gofetch.data.Story; + +/** + * Support https://lwn.net/. + * + * @author niki + */ +public class LWN extends BasicSupport { + @Override + public String getDescription() { + return "LWN: Linux Weekly Newsletter"; + } + + @Override + public List list() throws IOException { + // TODO: comments + do not get comment for [$] stories + // + update body on getComment (global change, also LinuxToday) + + List list = new ArrayList(); + + URL url = new URL("https://lwn.net/"); + InputStream in = open(url); + Document doc = DataUtil.load(in, "UTF-8", url.toString()); + Elements stories = doc.getElementsByClass("pure-u-1"); + for (Element story : stories) { + Elements titles = story.getElementsByClass("Headline"); + Elements listings = story.getElementsByClass("BlurbListing"); + if (titles.size() == 0) { + continue; + } + if (listings.size() == 0) { + continue; + } + + Element listing = listings.get(0); + if (listing.children().size() < 2) { + continue; + } + + + String title = titles.get(0).text(); + String details = listing.children().get(0).text(); + String body = listing.children().get(1).text(); + + String author = ""; + int pos = details.indexOf(" by "); + if (pos >= 0) { + author = details.substring(pos + " by ".length()).trim(); + } + + String date = ""; + pos = details.indexOf(" Posted "); + if (pos >= 0) { + date = details.substring(pos + " Posted ".length()).trim(); + } + + + String id = ""; + String intUrl = ""; + String extUrl = ""; + for (Element idElem : story.getElementsByTag("a")) { + // Last link is the story link + intUrl = idElem.absUrl("href"); + pos = intUrl.indexOf("#Comments"); + if (pos >= 0) { + intUrl = intUrl.substring(0, pos -1); + } + id = intUrl.replaceAll("[^0-9]", ""); + } + + list.add(new Story(getType(), id, title, details, intUrl, extUrl, body)); + } + + return list; + } + + @Override + public List getComments(Story story) throws IOException { + List comments = new ArrayList(); + + /* + URL url = new URL(story.getUrlInternal()); + InputStream in = open(url); + Document doc = DataUtil.load(in, "UTF-8", url.toString()); + Elements listing = doc.getElementsByTag("main"); + if (listing.size() > 0) { + comments.addAll(getComments(listing.get(0))); + } + */ + + return comments; + } + + private List getComments(Element listing) { + List comments = new ArrayList(); + for (Element commentElement : listing.children()) { + if (commentElement.hasClass("comment")) { + Comment comment = getComment(commentElement); + if (!comment.isEmpty()) { + comments.add(comment); + } + } + } + return comments; + } + + private Comment getComment(Element commentElement) { + String title = firstOrEmptyTag(commentElement, "h3"); + String author = firstOrEmpty(commentElement, "h4"); + String content = firstOrEmpty(commentElement, "comment-body"); + + String date = ""; + int pos = author.lastIndexOf(" on "); + if (pos >= 0) { + date = author.substring(pos + " on ".length()).trim(); + author = author.substring(0, pos).trim(); + } + + Comment comment = new Comment(commentElement.id(), author, title, date, + content); + + Elements commentOutline = commentElement + .getElementsByClass("comment-outline"); + if (commentOutline.size() > 0) { + comment.addAll(getComments(commentOutline.get(0))); + } + + return comment; + } + + /** + * Get the first element of the given class, or an empty {@link String} if + * none found. + * + * @param element + * the element to look in + * @param className + * the class to look for + * + * @return the value or an empty {@link String} + */ + private String firstOrEmpty(Element element, String className) { + Elements subElements = element.getElementsByClass(className); + if (subElements.size() > 0) { + return subElements.get(0).text(); + } + + return ""; + } + + /** + * Get the first element of the given tag, or an empty {@link String} if + * none found. + * + * @param element + * the element to look in + * @param tagName + * the tag to look for + * + * @return the value or an empty {@link String} + */ + private String firstOrEmptyTag(Element element, String tagName) { + Elements subElements = element.getElementsByTag(tagName); + if (subElements.size() > 0) { + return subElements.get(0).text(); + } + + return ""; + } +} -- 2.27.0