Version 1.0.0: add Le Monde support
authorNiki Roo <niki@nikiroo.be>
Tue, 22 Aug 2017 18:00:32 +0000 (20:00 +0200)
committerNiki Roo <niki@nikiroo.be>
Tue, 22 Aug 2017 18:00:32 +0000 (20:00 +0200)
VERSION
changelog.md
src/be/nikiroo/gofetch/output/Gopher.java
src/be/nikiroo/gofetch/output/Html.java
src/be/nikiroo/gofetch/support/BasicSupport.java
src/be/nikiroo/gofetch/support/LWN.java
src/be/nikiroo/gofetch/support/LeMonde.java [new file with mode: 0644]
src/be/nikiroo/gofetch/support/Pipedot.java
src/be/nikiroo/gofetch/support/Slashdot.java

diff --git a/VERSION b/VERSION
index 0ea3a944b399d25f7e1b8fe684d754eb8da9fe7f..3eefcb9dd5b38e2c1dc061052455dd97bcd51e6c 100644 (file)
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.2.0
+1.0.0
index 850dad2bb417201928714869d1097b6197751cd2..0bd673f1d1c0c775d0cac11e1c64b387a8bbd87a 100644 (file)
@@ -1,5 +1,10 @@
 # Gofetch
 
+## Version 1.0.0
+
+- Add Le Monde support
+- Fix some small textual issues
+
 ## Version 0.2.0
 
 - Add Linux Weekly News support
index 6dcb4aa8b284777bff8338c530e83d225e8b0ba5..2fa0c913b31d9951a1007b0269acd50324b2416a 100644 (file)
@@ -148,9 +148,11 @@ public class Gopher extends Output {
        // note: adds "i"
        private static void appendJustified(StringBuilder builder, String text,
                        String space) {
-               for (String line : StringJustifier.full(text,
-                               LINE_SIZE - space.length())) {
-                       builder.append("i").append(line).append("\r\n");
+               for (String line : text.split("\n")) {
+                       for (String subline : StringJustifier.full(line,
+                                       LINE_SIZE - space.length())) {
+                               builder.append("i").append(subline).append("\r\n");
+                       }
                }
        }
 
@@ -164,11 +166,13 @@ public class Gopher extends Output {
        private static void appendLeft(StringBuilder builder, String text,
                        String prependFirst, String prependOthers, String space) {
                String prepend = prependFirst;
-               for (String line : StringJustifier.left(text,
-                               LINE_SIZE - space.length())) {
-                       builder.append("i").append(space).append(prepend).append(line)
-                                       .append("\r\n");
-                       prepend = prependOthers;
+               for (String line : text.split("\n")) {
+                       for (String subline : StringJustifier.left(line,
+                                       LINE_SIZE - space.length())) {
+                               builder.append("i").append(space).append(prepend)
+                                               .append(subline).append("\r\n");
+                               prepend = prependOthers;
+                       }
                }
        }
 }
index 33c99c81f8874d4a8b249dddee43e0f56ff69a0c..0f4c5a436b602ce6cf8ef744fea8c9ce52c67350 100644 (file)
@@ -120,8 +120,12 @@ public class Html extends Output {
                } else {
                        builder.append("        <h1>" + story.getTitle() + "</h1>\n");
                }
-               builder.append("        <div class='details'>(" + story.getDetails()
-                               + ")</div>\n");
+
+               builder.append("        <div class='details'>");
+               if (story.getDetails() != null && !story.getDetails().isEmpty()) {
+                       builder.append("(").append(story.getDetails()).append(")");
+               }
+               builder.append("</div>\n");
                builder.append("        <br/>\n");
 
                if (!resume) {
@@ -140,7 +144,10 @@ public class Html extends Output {
                if (resume) {
                        builder.append("                " + story.getContent() + "\n");
                } else {
-                       builder.append("                " + story.getFullContent() + "\n");
+                       builder.append("                "
+                                       + story.getFullContent().replace("\n", "<br/>")
+                                                       .replace("[ ", "<h2>").replace(" ]", "</h2>")
+                                       + "\n");
                }
                builder.append("        </div>\n");
 
index 1db066b3d107c8fefb26218675fa8c5c0767b815..102023eb051a02b6c13a41e5ddc0d64e98743156 100644 (file)
@@ -20,7 +20,7 @@ import be.nikiroo.gofetch.data.Story;
 
 public abstract class BasicSupport {
        public enum Type {
-               SLASHDOT, PIPEDOT, LWN,
+               SLASHDOT, PIPEDOT, LWN, LEMONDE,
        }
 
        public interface QuoteProcessor {
@@ -29,12 +29,33 @@ public abstract class BasicSupport {
                public String processText(String text);
 
                public boolean ignoreNode(Node node);
+
+               /**
+                * Manually process this node if so desired.
+                * 
+                * @param node
+                *            the node to optionally process
+                * 
+                * @return NULL if not processed, a {@link String} (may be empty) if we
+                *         must not process it any further
+                */
+               public String manualProcessing(Node node);
        }
 
        static private String preselector;
 
        private Type type;
 
+       /**
+        * List all the recent items, but only assure the ID and internal URL to
+        * fetch it later on (until it has been fetched, the rest of the
+        * {@link Story} is not confirmed).
+        * 
+        * @return the list of new stories
+        * 
+        * @throws IOException
+        *             in case of I/O
+        */
        abstract public List<Story> list() throws IOException;
 
        /**
@@ -85,6 +106,9 @@ public abstract class BasicSupport {
                        case LWN:
                                support = new LWN();
                                break;
+                       case LEMONDE:
+                               support = new LeMonde();
+                               break;
                        }
 
                        if (support != null) {
@@ -162,8 +186,18 @@ public abstract class BasicSupport {
                        new NodeTraversor(new NodeVisitor() {
                                @Override
                                public void head(Node node, int depth) {
-                                       if (quoteProcessor.ignoreNode(node)
-                                                       || ignoredNodes.contains(node.parentNode())) {
+                                       String manual = null;
+                                       boolean ignore = quoteProcessor.ignoreNode(node)
+                                                       || ignoredNodes.contains(node.parentNode());
+                                       if (!ignore) {
+                                               manual = quoteProcessor.manualProcessing(node);
+                                               if (manual != null) {
+                                                       currentLine.append(manual);
+                                                       ignore = true;
+                                               }
+                                       }
+
+                                       if (ignore) {
                                                ignoredNodes.add(node);
                                                return;
                                        }
index dba4c3bfa922f684729352baa2d438e075c1ac88..c492d10d21cde68e48f1390db8efcf7b0a10c470 100644 (file)
@@ -33,10 +33,10 @@ public class LWN extends BasicSupport {
                URL url = new URL("https://lwn.net/");
                InputStream in = open(url);
                Document doc = DataUtil.load(in, "UTF-8", url.toString());
-               Elements stories = doc.getElementsByClass("pure-u-1");
-               for (Element story : stories) {
-                       Elements titles = story.getElementsByClass("Headline");
-                       Elements listings = story.getElementsByClass("BlurbListing");
+               Elements articles = doc.getElementsByClass("pure-u-1");
+               for (Element article : articles) {
+                       Elements titles = article.getElementsByClass("Headline");
+                       Elements listings = article.getElementsByClass("BlurbListing");
                        if (titles.size() == 0) {
                                continue;
                        }
@@ -74,7 +74,7 @@ public class LWN extends BasicSupport {
                        String id = "";
                        String intUrl = "";
                        String extUrl = "";
-                       for (Element idElem : story.getElementsByTag("a")) {
+                       for (Element idElem : article.getElementsByTag("a")) {
                                // Last link is the story link
                                intUrl = idElem.absUrl("href");
                                pos = intUrl.indexOf("#Comments");
@@ -201,6 +201,11 @@ public class LWN extends BasicSupport {
 
                                return false;
                        }
+
+                       @Override
+                       public String manualProcessing(Node node) {
+                               return null;
+                       }
                });
        }
 }
diff --git a/src/be/nikiroo/gofetch/support/LeMonde.java b/src/be/nikiroo/gofetch/support/LeMonde.java
new file mode 100644 (file)
index 0000000..4e22b4c
--- /dev/null
@@ -0,0 +1,127 @@
+package be.nikiroo.gofetch.support;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.jsoup.helper.DataUtil;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.nodes.Node;
+import org.jsoup.select.Elements;
+
+import be.nikiroo.gofetch.data.Comment;
+import be.nikiroo.gofetch.data.Story;
+
+public class LeMonde extends BasicSupport {
+       @Override
+       public String getDescription() {
+               return "Le Monde: Actualités et Infos en France et dans le monde";
+       }
+
+       @Override
+       public List<Story> list() throws IOException {
+               List<Story> list = new ArrayList<Story>();
+
+               for (String topic : new String[] { "international", "politique",
+                               "societe", "sciences" }) {
+                       URL url = new URL("http://www.lemonde.fr/" + topic + "/1.html");
+                       InputStream in = open(url);
+                       Document doc = DataUtil.load(in, "UTF-8", url.toString());
+                       Elements articles = doc.getElementsByTag("article");
+                       for (Element article : articles) {
+                               Elements times = article.getElementsByTag("time");
+                               Elements titleElements = article.getElementsByTag("h3");
+                               Elements contentElements = article.getElementsByClass("txt3");
+                               if (times.size() > 0 && titleElements.size() > 0
+                                               && contentElements.size() > 0) {
+                                       String id = times.get(0).attr("datetime").replace(":", "_");
+                                       String title = "[" + topic + "] "
+                                                       + titleElements.get(0).text();
+                                       String content = contentElements.get(0).text();
+                                       String intUrl = "";
+                                       String extUrl = "";
+                                       String details = "";
+
+                                       Elements detailsElements = article
+                                                       .getElementsByClass("signature");
+                                       if (detailsElements.size() > 0) {
+                                               details = detailsElements.get(0).text();
+                                       }
+
+                                       Elements links = titleElements.get(0).getElementsByTag("a");
+                                       if (links.size() > 0) {
+                                               intUrl = links.get(0).absUrl("href");
+                                               list.add(new Story(getType(), id, title, details,
+                                                               intUrl, extUrl, content));
+                                       }
+                               }
+                       }
+               }
+
+               return list;
+       }
+
+       @Override
+       public void fetch(Story story) throws IOException {
+               String fullContent = story.getContent();
+               List<Comment> comments = new ArrayList<Comment>();
+
+               // Note: no comments on this site as far as I can see (or maybe with
+               // some javascript, I need to check...)
+
+               URL url = new URL(story.getUrlInternal());
+               InputStream in = open(url);
+               Document doc = DataUtil.load(in, "UTF-8", url.toString());
+               Element article = doc.getElementById("articleBody");
+               if (article != null) {
+                       for (String line : toLines(article, new QuoteProcessor() {
+                               @Override
+                               public String processText(String text) {
+                                       return text;
+                               }
+
+                               @Override
+                               public boolean ignoreNode(Node node) {
+                                       if (node instanceof Element) {
+                                               Element element = (Element) node;
+                                               if (element.hasClass("lire")) {
+                                                       return true;
+                                               }
+                                       }
+
+                                       return false;
+                               }
+
+                               @Override
+                               public boolean detectQuote(Node node) {
+                                       return false;
+                               }
+
+                               @Override
+                               public String manualProcessing(Node node) {
+                                       if (node instanceof Element) {
+                                               Element element = (Element) node;
+                                               if (element.hasClass("intertitre")) {
+                                                       return "\n[ " + element.text() + " ]\n";
+                                               }
+                                       }
+                                       return null;
+                               }
+                       })) {
+                               fullContent += line + "\n";
+                       }
+
+                       // Content is too tight with a single break per line:
+                       fullContent = fullContent.replace("\n", "\n\n") //
+                                       .replace("\n\n\n\n", "\n\n") //
+                                       .replace("\n\n\n\n", "\n\n") //
+                                       .trim();
+               }
+
+               story.setFullContent(fullContent);
+               story.setComments(comments);
+       }
+}
index 1bd5173b331e6e2ee69552efaa77e964a1e3fe20..89932f7636c170abc3b34b7c71542880b255984c 100644 (file)
@@ -33,9 +33,9 @@ public class Pipedot extends BasicSupport {
                URL url = new URL("https://pipedot.org/");
                InputStream in = open(url);
                Document doc = DataUtil.load(in, "UTF-8", url.toString());
-               Elements stories = doc.getElementsByClass("story");
-               for (Element story : stories) {
-                       Elements titles = story.getElementsByTag("h1");
+               Elements articles = doc.getElementsByClass("story");
+               for (Element article : articles) {
+                       Elements titles = article.getElementsByTag("h1");
                        if (titles.size() == 0) {
                                continue;
                        }
@@ -43,7 +43,7 @@ public class Pipedot extends BasicSupport {
                        Element title = titles.get(0);
 
                        String id = "";
-                       for (Element idElem : story.getElementsByTag("a")) {
+                       for (Element idElem : article.getElementsByTag("a")) {
                                if (idElem.attr("href").startsWith("/pipe/")) {
                                        id = idElem.attr("href").substring("/pipe/".length());
                                        break;
@@ -53,7 +53,7 @@ public class Pipedot extends BasicSupport {
                        String intUrl = null;
                        String extUrl = null;
 
-                       Elements links = story.getElementsByTag("a");
+                       Elements links = article.getElementsByTag("a");
                        if (links.size() > 0) {
                                intUrl = links.get(0).absUrl("href");
                        }
@@ -68,13 +68,13 @@ public class Pipedot extends BasicSupport {
                        }
 
                        String details = "";
-                       Elements detailsElements = story.getElementsByTag("div");
+                       Elements detailsElements = article.getElementsByTag("div");
                        if (detailsElements.size() > 0) {
                                details = detailsElements.get(0).text();
                        }
 
                        String body = "";
-                       for (Element elem : story.children()) {
+                       for (Element elem : article.children()) {
                                String tag = elem.tag().toString();
                                if (!tag.equals("header") && !tag.equals("footer")) {
                                        body = elem.text();
@@ -165,6 +165,11 @@ public class Pipedot extends BasicSupport {
                        public boolean ignoreNode(Node node) {
                                return false;
                        }
+
+                       @Override
+                       public String manualProcessing(Node node) {
+                               return null;
+                       }
                });
        }
 }
index 8776e35fc7eac132aea50ff6ed7ead90d301fff7..378b3a4bfdb0cf0c7277397075ccf13d4c33aff5 100644 (file)
@@ -33,9 +33,9 @@ public class Slashdot extends BasicSupport {
                URL url = new URL("https://slashdot.org/");
                InputStream in = open(url);
                Document doc = DataUtil.load(in, "UTF-8", url.toString());
-               Elements stories = doc.getElementsByTag("header");
-               for (Element story : stories) {
-                       Elements titles = story.getElementsByClass("story-title");
+               Elements articles = doc.getElementsByTag("header");
+               for (Element article : articles) {
+                       Elements titles = article.getElementsByClass("story-title");
                        if (titles.size() == 0) {
                                continue;
                        }
@@ -58,7 +58,7 @@ public class Slashdot extends BasicSupport {
                        }
 
                        String details = "";
-                       Elements detailsElements = story.getElementsByClass("details");
+                       Elements detailsElements = article.getElementsByClass("details");
                        if (detailsElements.size() > 0) {
                                details = detailsElements.get(0).text();
                        }
@@ -169,6 +169,11 @@ public class Slashdot extends BasicSupport {
                        public boolean ignoreNode(Node node) {
                                return false;
                        }
+
+                       @Override
+                       public String manualProcessing(Node node) {
+                               return null;
+                       }
                });
        }
 }