From 100a839503d23e324d2db3f6d3e47892def3bf81 Mon Sep 17 00:00:00 2001 From: Niki Roo Date: Tue, 22 Aug 2017 20:00:32 +0200 Subject: [PATCH] Version 1.0.0: add Le Monde support --- VERSION | 2 +- changelog.md | 5 + src/be/nikiroo/gofetch/output/Gopher.java | 20 +-- src/be/nikiroo/gofetch/output/Html.java | 13 +- .../nikiroo/gofetch/support/BasicSupport.java | 40 +++++- src/be/nikiroo/gofetch/support/LWN.java | 15 ++- src/be/nikiroo/gofetch/support/LeMonde.java | 127 ++++++++++++++++++ src/be/nikiroo/gofetch/support/Pipedot.java | 19 ++- src/be/nikiroo/gofetch/support/Slashdot.java | 13 +- 9 files changed, 223 insertions(+), 31 deletions(-) create mode 100644 src/be/nikiroo/gofetch/support/LeMonde.java diff --git a/VERSION b/VERSION index 0ea3a94..3eefcb9 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.2.0 +1.0.0 diff --git a/changelog.md b/changelog.md index 850dad2..0bd673f 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,10 @@ # Gofetch +## Version 1.0.0 + +- Add Le Monde support +- Fix some small textual issues + ## Version 0.2.0 - Add Linux Weekly News support diff --git a/src/be/nikiroo/gofetch/output/Gopher.java b/src/be/nikiroo/gofetch/output/Gopher.java index 6dcb4aa..2fa0c91 100644 --- a/src/be/nikiroo/gofetch/output/Gopher.java +++ b/src/be/nikiroo/gofetch/output/Gopher.java @@ -148,9 +148,11 @@ public class Gopher extends Output { // note: adds "i" private static void appendJustified(StringBuilder builder, String text, String space) { - for (String line : StringJustifier.full(text, - LINE_SIZE - space.length())) { - builder.append("i").append(line).append("\r\n"); + for (String line : text.split("\n")) { + for (String subline : StringJustifier.full(line, + LINE_SIZE - space.length())) { + builder.append("i").append(subline).append("\r\n"); + } } } @@ -164,11 +166,13 @@ public class Gopher extends Output { private static void appendLeft(StringBuilder builder, String text, String prependFirst, String prependOthers, String space) { String prepend = prependFirst; - for (String line : StringJustifier.left(text, - LINE_SIZE - space.length())) { - builder.append("i").append(space).append(prepend).append(line) - .append("\r\n"); - prepend = prependOthers; + for (String line : text.split("\n")) { + for (String subline : StringJustifier.left(line, + LINE_SIZE - space.length())) { + builder.append("i").append(space).append(prepend) + .append(subline).append("\r\n"); + prepend = prependOthers; + } } } } diff --git a/src/be/nikiroo/gofetch/output/Html.java b/src/be/nikiroo/gofetch/output/Html.java index 33c99c8..0f4c5a4 100644 --- a/src/be/nikiroo/gofetch/output/Html.java +++ b/src/be/nikiroo/gofetch/output/Html.java @@ -120,8 +120,12 @@ public class Html extends Output { } else { builder.append("

" + story.getTitle() + "

\n"); } - builder.append("
(" + story.getDetails() - + ")
\n"); + + builder.append("
"); + if (story.getDetails() != null && !story.getDetails().isEmpty()) { + builder.append("(").append(story.getDetails()).append(")"); + } + builder.append("
\n"); builder.append("
\n"); if (!resume) { @@ -140,7 +144,10 @@ public class Html extends Output { if (resume) { builder.append(" " + story.getContent() + "\n"); } else { - builder.append(" " + story.getFullContent() + "\n"); + builder.append(" " + + story.getFullContent().replace("\n", "
") + .replace("[ ", "

").replace(" ]", "

") + + "\n"); } builder.append(" \n"); diff --git a/src/be/nikiroo/gofetch/support/BasicSupport.java b/src/be/nikiroo/gofetch/support/BasicSupport.java index 1db066b..102023e 100644 --- a/src/be/nikiroo/gofetch/support/BasicSupport.java +++ b/src/be/nikiroo/gofetch/support/BasicSupport.java @@ -20,7 +20,7 @@ import be.nikiroo.gofetch.data.Story; public abstract class BasicSupport { public enum Type { - SLASHDOT, PIPEDOT, LWN, + SLASHDOT, PIPEDOT, LWN, LEMONDE, } public interface QuoteProcessor { @@ -29,12 +29,33 @@ public abstract class BasicSupport { public String processText(String text); public boolean ignoreNode(Node node); + + /** + * Manually process this node if so desired. + * + * @param node + * the node to optionally process + * + * @return NULL if not processed, a {@link String} (may be empty) if we + * must not process it any further + */ + public String manualProcessing(Node node); } static private String preselector; private Type type; + /** + * List all the recent items, but only assure the ID and internal URL to + * fetch it later on (until it has been fetched, the rest of the + * {@link Story} is not confirmed). + * + * @return the list of new stories + * + * @throws IOException + * in case of I/O + */ abstract public List list() throws IOException; /** @@ -85,6 +106,9 @@ public abstract class BasicSupport { case LWN: support = new LWN(); break; + case LEMONDE: + support = new LeMonde(); + break; } if (support != null) { @@ -162,8 +186,18 @@ public abstract class BasicSupport { new NodeTraversor(new NodeVisitor() { @Override public void head(Node node, int depth) { - if (quoteProcessor.ignoreNode(node) - || ignoredNodes.contains(node.parentNode())) { + String manual = null; + boolean ignore = quoteProcessor.ignoreNode(node) + || ignoredNodes.contains(node.parentNode()); + if (!ignore) { + manual = quoteProcessor.manualProcessing(node); + if (manual != null) { + currentLine.append(manual); + ignore = true; + } + } + + if (ignore) { ignoredNodes.add(node); return; } diff --git a/src/be/nikiroo/gofetch/support/LWN.java b/src/be/nikiroo/gofetch/support/LWN.java index dba4c3b..c492d10 100644 --- a/src/be/nikiroo/gofetch/support/LWN.java +++ b/src/be/nikiroo/gofetch/support/LWN.java @@ -33,10 +33,10 @@ public class LWN extends BasicSupport { URL url = new URL("https://lwn.net/"); InputStream in = open(url); Document doc = DataUtil.load(in, "UTF-8", url.toString()); - Elements stories = doc.getElementsByClass("pure-u-1"); - for (Element story : stories) { - Elements titles = story.getElementsByClass("Headline"); - Elements listings = story.getElementsByClass("BlurbListing"); + Elements articles = doc.getElementsByClass("pure-u-1"); + for (Element article : articles) { + Elements titles = article.getElementsByClass("Headline"); + Elements listings = article.getElementsByClass("BlurbListing"); if (titles.size() == 0) { continue; } @@ -74,7 +74,7 @@ public class LWN extends BasicSupport { String id = ""; String intUrl = ""; String extUrl = ""; - for (Element idElem : story.getElementsByTag("a")) { + for (Element idElem : article.getElementsByTag("a")) { // Last link is the story link intUrl = idElem.absUrl("href"); pos = intUrl.indexOf("#Comments"); @@ -201,6 +201,11 @@ public class LWN extends BasicSupport { return false; } + + @Override + public String manualProcessing(Node node) { + return null; + } }); } } diff --git a/src/be/nikiroo/gofetch/support/LeMonde.java b/src/be/nikiroo/gofetch/support/LeMonde.java new file mode 100644 index 0000000..4e22b4c --- /dev/null +++ b/src/be/nikiroo/gofetch/support/LeMonde.java @@ -0,0 +1,127 @@ +package be.nikiroo.gofetch.support; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; + +import org.jsoup.helper.DataUtil; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.nodes.Node; +import org.jsoup.select.Elements; + +import be.nikiroo.gofetch.data.Comment; +import be.nikiroo.gofetch.data.Story; + +public class LeMonde extends BasicSupport { + @Override + public String getDescription() { + return "Le Monde: Actualités et Infos en France et dans le monde"; + } + + @Override + public List list() throws IOException { + List list = new ArrayList(); + + for (String topic : new String[] { "international", "politique", + "societe", "sciences" }) { + URL url = new URL("http://www.lemonde.fr/" + topic + "/1.html"); + InputStream in = open(url); + Document doc = DataUtil.load(in, "UTF-8", url.toString()); + Elements articles = doc.getElementsByTag("article"); + for (Element article : articles) { + Elements times = article.getElementsByTag("time"); + Elements titleElements = article.getElementsByTag("h3"); + Elements contentElements = article.getElementsByClass("txt3"); + if (times.size() > 0 && titleElements.size() > 0 + && contentElements.size() > 0) { + String id = times.get(0).attr("datetime").replace(":", "_"); + String title = "[" + topic + "] " + + titleElements.get(0).text(); + String content = contentElements.get(0).text(); + String intUrl = ""; + String extUrl = ""; + String details = ""; + + Elements detailsElements = article + .getElementsByClass("signature"); + if (detailsElements.size() > 0) { + details = detailsElements.get(0).text(); + } + + Elements links = titleElements.get(0).getElementsByTag("a"); + if (links.size() > 0) { + intUrl = links.get(0).absUrl("href"); + list.add(new Story(getType(), id, title, details, + intUrl, extUrl, content)); + } + } + } + } + + return list; + } + + @Override + public void fetch(Story story) throws IOException { + String fullContent = story.getContent(); + List comments = new ArrayList(); + + // Note: no comments on this site as far as I can see (or maybe with + // some javascript, I need to check...) + + URL url = new URL(story.getUrlInternal()); + InputStream in = open(url); + Document doc = DataUtil.load(in, "UTF-8", url.toString()); + Element article = doc.getElementById("articleBody"); + if (article != null) { + for (String line : toLines(article, new QuoteProcessor() { + @Override + public String processText(String text) { + return text; + } + + @Override + public boolean ignoreNode(Node node) { + if (node instanceof Element) { + Element element = (Element) node; + if (element.hasClass("lire")) { + return true; + } + } + + return false; + } + + @Override + public boolean detectQuote(Node node) { + return false; + } + + @Override + public String manualProcessing(Node node) { + if (node instanceof Element) { + Element element = (Element) node; + if (element.hasClass("intertitre")) { + return "\n[ " + element.text() + " ]\n"; + } + } + return null; + } + })) { + fullContent += line + "\n"; + } + + // Content is too tight with a single break per line: + fullContent = fullContent.replace("\n", "\n\n") // + .replace("\n\n\n\n", "\n\n") // + .replace("\n\n\n\n", "\n\n") // + .trim(); + } + + story.setFullContent(fullContent); + story.setComments(comments); + } +} diff --git a/src/be/nikiroo/gofetch/support/Pipedot.java b/src/be/nikiroo/gofetch/support/Pipedot.java index 1bd5173..89932f7 100644 --- a/src/be/nikiroo/gofetch/support/Pipedot.java +++ b/src/be/nikiroo/gofetch/support/Pipedot.java @@ -33,9 +33,9 @@ public class Pipedot extends BasicSupport { URL url = new URL("https://pipedot.org/"); InputStream in = open(url); Document doc = DataUtil.load(in, "UTF-8", url.toString()); - Elements stories = doc.getElementsByClass("story"); - for (Element story : stories) { - Elements titles = story.getElementsByTag("h1"); + Elements articles = doc.getElementsByClass("story"); + for (Element article : articles) { + Elements titles = article.getElementsByTag("h1"); if (titles.size() == 0) { continue; } @@ -43,7 +43,7 @@ public class Pipedot extends BasicSupport { Element title = titles.get(0); String id = ""; - for (Element idElem : story.getElementsByTag("a")) { + for (Element idElem : article.getElementsByTag("a")) { if (idElem.attr("href").startsWith("/pipe/")) { id = idElem.attr("href").substring("/pipe/".length()); break; @@ -53,7 +53,7 @@ public class Pipedot extends BasicSupport { String intUrl = null; String extUrl = null; - Elements links = story.getElementsByTag("a"); + Elements links = article.getElementsByTag("a"); if (links.size() > 0) { intUrl = links.get(0).absUrl("href"); } @@ -68,13 +68,13 @@ public class Pipedot extends BasicSupport { } String details = ""; - Elements detailsElements = story.getElementsByTag("div"); + Elements detailsElements = article.getElementsByTag("div"); if (detailsElements.size() > 0) { details = detailsElements.get(0).text(); } String body = ""; - for (Element elem : story.children()) { + for (Element elem : article.children()) { String tag = elem.tag().toString(); if (!tag.equals("header") && !tag.equals("footer")) { body = elem.text(); @@ -165,6 +165,11 @@ public class Pipedot extends BasicSupport { public boolean ignoreNode(Node node) { return false; } + + @Override + public String manualProcessing(Node node) { + return null; + } }); } } diff --git a/src/be/nikiroo/gofetch/support/Slashdot.java b/src/be/nikiroo/gofetch/support/Slashdot.java index 8776e35..378b3a4 100644 --- a/src/be/nikiroo/gofetch/support/Slashdot.java +++ b/src/be/nikiroo/gofetch/support/Slashdot.java @@ -33,9 +33,9 @@ public class Slashdot extends BasicSupport { URL url = new URL("https://slashdot.org/"); InputStream in = open(url); Document doc = DataUtil.load(in, "UTF-8", url.toString()); - Elements stories = doc.getElementsByTag("header"); - for (Element story : stories) { - Elements titles = story.getElementsByClass("story-title"); + Elements articles = doc.getElementsByTag("header"); + for (Element article : articles) { + Elements titles = article.getElementsByClass("story-title"); if (titles.size() == 0) { continue; } @@ -58,7 +58,7 @@ public class Slashdot extends BasicSupport { } String details = ""; - Elements detailsElements = story.getElementsByClass("details"); + Elements detailsElements = article.getElementsByClass("details"); if (detailsElements.size() > 0) { details = detailsElements.get(0).text(); } @@ -169,6 +169,11 @@ public class Slashdot extends BasicSupport { public boolean ignoreNode(Node node) { return false; } + + @Override + public String manualProcessing(Node node) { + return null; + } }); } } -- 2.27.0