Version 1.0.0: add Le Monde support

author Niki Roo <niki@nikiroo.be>

Tue, 22 Aug 2017 18:00:32 +0000 (20:00 +0200)

committer Niki Roo <niki@nikiroo.be>

Tue, 22 Aug 2017 18:00:32 +0000 (20:00 +0200)
author Niki Roo <niki@nikiroo.be>
Tue, 22 Aug 2017 18:00:32 +0000 (20:00 +0200)
committer Niki Roo <niki@nikiroo.be>
Tue, 22 Aug 2017 18:00:32 +0000 (20:00 +0200)
diff --git a/VERSION b/VERSION

index 0ea3a944b399d25f7e1b8fe684d754eb8da9fe7f..3eefcb9dd5b38e2c1dc061052455dd97bcd51e6c 100644 (file)
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.2.0
+1.0.0
diff --git a/changelog.md b/changelog.md

index 850dad2bb417201928714869d1097b6197751cd2..0bd673f1d1c0c775d0cac11e1c64b387a8bbd87a 100644 (file)
--- a/changelog.md
+++ b/changelog.md
@@ -1,5 +1,10 @@
  # Gofetch
  
+## Version 1.0.0
+
+- Add Le Monde support
+- Fix some small textual issues
+
  ## Version 0.2.0
  
  - Add Linux Weekly News support
diff --git a/src/be/nikiroo/gofetch/output/Gopher.java b/src/be/nikiroo/gofetch/output/Gopher.java

index 6dcb4aa8b284777bff8338c530e83d225e8b0ba5..2fa0c913b31d9951a1007b0269acd50324b2416a 100644 (file)
--- a/src/be/nikiroo/gofetch/output/Gopher.java
+++ b/src/be/nikiroo/gofetch/output/Gopher.java
@@ -148,9 +148,11 @@ public class Gopher extends Output {
         // note: adds "i"
         private static void appendJustified(StringBuilder builder, String text,
                         String space) {
-               for (String line : StringJustifier.full(text,
-                               LINE_SIZE - space.length())) {
-                       builder.append("i").append(line).append("\r\n");
+               for (String line : text.split("\n")) {
+                       for (String subline : StringJustifier.full(line,
+                                       LINE_SIZE - space.length())) {
+                               builder.append("i").append(subline).append("\r\n");
+                       }
                 }
         }
  
@@ -164,11 +166,13 @@ public class Gopher extends Output {
         private static void appendLeft(StringBuilder builder, String text,
                         String prependFirst, String prependOthers, String space) {
                 String prepend = prependFirst;
-               for (String line : StringJustifier.left(text,
-                               LINE_SIZE - space.length())) {
-                       builder.append("i").append(space).append(prepend).append(line)
-                                       .append("\r\n");
-                       prepend = prependOthers;
+               for (String line : text.split("\n")) {
+                       for (String subline : StringJustifier.left(line,
+                                       LINE_SIZE - space.length())) {
+                               builder.append("i").append(space).append(prepend)
+                                               .append(subline).append("\r\n");
+                               prepend = prependOthers;
+                       }
                 }
         }
  }
diff --git a/src/be/nikiroo/gofetch/output/Html.java b/src/be/nikiroo/gofetch/output/Html.java

index 33c99c81f8874d4a8b249dddee43e0f56ff69a0c..0f4c5a436b602ce6cf8ef744fea8c9ce52c67350 100644 (file)
--- a/src/be/nikiroo/gofetch/output/Html.java
+++ b/src/be/nikiroo/gofetch/output/Html.java
@@ -120,8 +120,12 @@ public class Html extends Output {
                 } else {
                         builder.append("        <h1>" + story.getTitle() + "</h1>\n");
                 }
-               builder.append("        <div class='details'>(" + story.getDetails()
-                               + ")</div>\n");
+
+               builder.append("        <div class='details'>");
+               if (story.getDetails() != null && !story.getDetails().isEmpty()) {
+                       builder.append("(").append(story.getDetails()).append(")");
+               }
+               builder.append("</div>\n");
                 builder.append("        <br/>\n");
  
                 if (!resume) {
@@ -140,7 +144,10 @@ public class Html extends Output {
                 if (resume) {
                         builder.append("                " + story.getContent() + "\n");
                 } else {
-                       builder.append("                " + story.getFullContent() + "\n");
+                       builder.append("                "
+                                       + story.getFullContent().replace("\n", "<br/>")
+                                                       .replace("[ ", "<h2>").replace(" ]", "</h2>")
+                                       + "\n");
                 }
                 builder.append("        </div>\n");
  
diff --git a/src/be/nikiroo/gofetch/support/BasicSupport.java b/src/be/nikiroo/gofetch/support/BasicSupport.java

index 1db066b3d107c8fefb26218675fa8c5c0767b815..102023eb051a02b6c13a41e5ddc0d64e98743156 100644 (file)
--- a/src/be/nikiroo/gofetch/support/BasicSupport.java
+++ b/src/be/nikiroo/gofetch/support/BasicSupport.java
@@ -20,7 +20,7 @@ import be.nikiroo.gofetch.data.Story;
  
  public abstract class BasicSupport {
         public enum Type {
-               SLASHDOT, PIPEDOT, LWN,
+               SLASHDOT, PIPEDOT, LWN, LEMONDE,
         }
  
         public interface QuoteProcessor {
@@ -29,12 +29,33 @@ public abstract class BasicSupport {
                 public String processText(String text);
  
                 public boolean ignoreNode(Node node);
+
+               /**
+                * Manually process this node if so desired.
+                * 
+                * @param node
+                *            the node to optionally process
+                * 
+                * @return NULL if not processed, a {@link String} (may be empty) if we
+                *         must not process it any further
+                */
+               public String manualProcessing(Node node);
         }
  
         static private String preselector;
  
         private Type type;
  
+       /**
+        * List all the recent items, but only assure the ID and internal URL to
+        * fetch it later on (until it has been fetched, the rest of the
+        * {@link Story} is not confirmed).
+        * 
+        * @return the list of new stories
+        * 
+        * @throws IOException
+        *             in case of I/O
+        */
         abstract public List<Story> list() throws IOException;
  
         /**
@@ -85,6 +106,9 @@ public abstract class BasicSupport {
                         case LWN:
                                 support = new LWN();
                                 break;
+                       case LEMONDE:
+                               support = new LeMonde();
+                               break;
                         }
  
                         if (support != null) {
@@ -162,8 +186,18 @@ public abstract class BasicSupport {
                         new NodeTraversor(new NodeVisitor() {
                                 @Override
                                 public void head(Node node, int depth) {
-                                       if (quoteProcessor.ignoreNode(node)
-                                                       || ignoredNodes.contains(node.parentNode())) {
+                                       String manual = null;
+                                       boolean ignore = quoteProcessor.ignoreNode(node)
+                                                       || ignoredNodes.contains(node.parentNode());
+                                       if (!ignore) {
+                                               manual = quoteProcessor.manualProcessing(node);
+                                               if (manual != null) {
+                                                       currentLine.append(manual);
+                                                       ignore = true;
+                                               }
+                                       }
+
+                                       if (ignore) {
                                                 ignoredNodes.add(node);
                                                 return;
                                         }
diff --git a/src/be/nikiroo/gofetch/support/LWN.java b/src/be/nikiroo/gofetch/support/LWN.java

index dba4c3bfa922f684729352baa2d438e075c1ac88..c492d10d21cde68e48f1390db8efcf7b0a10c470 100644 (file)
--- a/src/be/nikiroo/gofetch/support/LWN.java
+++ b/src/be/nikiroo/gofetch/support/LWN.java
@@ -33,10 +33,10 @@ public class LWN extends BasicSupport {
                 URL url = new URL("https://lwn.net/");
                 InputStream in = open(url);
                 Document doc = DataUtil.load(in, "UTF-8", url.toString());
-               Elements stories = doc.getElementsByClass("pure-u-1");
-               for (Element story : stories) {
-                       Elements titles = story.getElementsByClass("Headline");
-                       Elements listings = story.getElementsByClass("BlurbListing");
+               Elements articles = doc.getElementsByClass("pure-u-1");
+               for (Element article : articles) {
+                       Elements titles = article.getElementsByClass("Headline");
+                       Elements listings = article.getElementsByClass("BlurbListing");
                         if (titles.size() == 0) {
                                 continue;
                         }
@@ -74,7 +74,7 @@ public class LWN extends BasicSupport {
                         String id = "";
                         String intUrl = "";
                         String extUrl = "";
-                       for (Element idElem : story.getElementsByTag("a")) {
+                       for (Element idElem : article.getElementsByTag("a")) {
                                 // Last link is the story link
                                 intUrl = idElem.absUrl("href");
                                 pos = intUrl.indexOf("#Comments");
@@ -201,6 +201,11 @@ public class LWN extends BasicSupport {
  
                                 return false;
                         }
+
+                       @Override
+                       public String manualProcessing(Node node) {
+                               return null;
+                       }
                 });
         }
  }
diff --git a/src/be/nikiroo/gofetch/support/LeMonde.java b/src/be/nikiroo/gofetch/support/LeMonde.java

new file mode 100644 (file)

index 0000000..4e22b4c
--- /dev/null
+++ b/src/be/nikiroo/gofetch/support/LeMonde.java
@@ -0,0 +1,127 @@
+package be.nikiroo.gofetch.support;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.jsoup.helper.DataUtil;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.nodes.Node;
+import org.jsoup.select.Elements;
+
+import be.nikiroo.gofetch.data.Comment;
+import be.nikiroo.gofetch.data.Story;
+
+public class LeMonde extends BasicSupport {
+       @Override
+       public String getDescription() {
+               return "Le Monde: Actualités et Infos en France et dans le monde";
+       }
+
+       @Override
+       public List<Story> list() throws IOException {
+               List<Story> list = new ArrayList<Story>();
+
+               for (String topic : new String[] { "international", "politique",
+                               "societe", "sciences" }) {
+                       URL url = new URL("http://www.lemonde.fr/" + topic + "/1.html");
+                       InputStream in = open(url);
+                       Document doc = DataUtil.load(in, "UTF-8", url.toString());
+                       Elements articles = doc.getElementsByTag("article");
+                       for (Element article : articles) {
+                               Elements times = article.getElementsByTag("time");
+                               Elements titleElements = article.getElementsByTag("h3");
+                               Elements contentElements = article.getElementsByClass("txt3");
+                               if (times.size() > 0 && titleElements.size() > 0
+                                               && contentElements.size() > 0) {
+                                       String id = times.get(0).attr("datetime").replace(":", "_");
+                                       String title = "[" + topic + "] "
+                                                       + titleElements.get(0).text();
+                                       String content = contentElements.get(0).text();
+                                       String intUrl = "";
+                                       String extUrl = "";
+                                       String details = "";
+
+                                       Elements detailsElements = article
+                                                       .getElementsByClass("signature");
+                                       if (detailsElements.size() > 0) {
+                                               details = detailsElements.get(0).text();
+                                       }
+
+                                       Elements links = titleElements.get(0).getElementsByTag("a");
+                                       if (links.size() > 0) {
+                                               intUrl = links.get(0).absUrl("href");
+                                               list.add(new Story(getType(), id, title, details,
+                                                               intUrl, extUrl, content));
+                                       }
+                               }
+                       }
+               }
+
+               return list;
+       }
+
+       @Override
+       public void fetch(Story story) throws IOException {
+               String fullContent = story.getContent();
+               List<Comment> comments = new ArrayList<Comment>();
+
+               // Note: no comments on this site as far as I can see (or maybe with
+               // some javascript, I need to check...)
+
+               URL url = new URL(story.getUrlInternal());
+               InputStream in = open(url);
+               Document doc = DataUtil.load(in, "UTF-8", url.toString());
+               Element article = doc.getElementById("articleBody");
+               if (article != null) {
+                       for (String line : toLines(article, new QuoteProcessor() {
+                               @Override
+                               public String processText(String text) {
+                                       return text;
+                               }
+
+                               @Override
+                               public boolean ignoreNode(Node node) {
+                                       if (node instanceof Element) {
+                                               Element element = (Element) node;
+                                               if (element.hasClass("lire")) {
+                                                       return true;
+                                               }
+                                       }
+
+                                       return false;
+                               }
+
+                               @Override
+                               public boolean detectQuote(Node node) {
+                                       return false;
+                               }
+
+                               @Override
+                               public String manualProcessing(Node node) {
+                                       if (node instanceof Element) {
+                                               Element element = (Element) node;
+                                               if (element.hasClass("intertitre")) {
+                                                       return "\n[ " + element.text() + " ]\n";
+                                               }
+                                       }
+                                       return null;
+                               }
+                       })) {
+                               fullContent += line + "\n";
+                       }
+
+                       // Content is too tight with a single break per line:
+                       fullContent = fullContent.replace("\n", "\n\n") //
+                                       .replace("\n\n\n\n", "\n\n") //
+                                       .replace("\n\n\n\n", "\n\n") //
+                                       .trim();
+               }
+
+               story.setFullContent(fullContent);
+               story.setComments(comments);
+       }
+}
diff --git a/src/be/nikiroo/gofetch/support/Pipedot.java b/src/be/nikiroo/gofetch/support/Pipedot.java

index 1bd5173b331e6e2ee69552efaa77e964a1e3fe20..89932f7636c170abc3b34b7c71542880b255984c 100644 (file)
--- a/src/be/nikiroo/gofetch/support/Pipedot.java
+++ b/src/be/nikiroo/gofetch/support/Pipedot.java
@@ -33,9 +33,9 @@ public class Pipedot extends BasicSupport {
                 URL url = new URL("https://pipedot.org/");
                 InputStream in = open(url);
                 Document doc = DataUtil.load(in, "UTF-8", url.toString());
-               Elements stories = doc.getElementsByClass("story");
-               for (Element story : stories) {
-                       Elements titles = story.getElementsByTag("h1");
+               Elements articles = doc.getElementsByClass("story");
+               for (Element article : articles) {
+                       Elements titles = article.getElementsByTag("h1");
                         if (titles.size() == 0) {
                                 continue;
                         }
@@ -43,7 +43,7 @@ public class Pipedot extends BasicSupport {
                         Element title = titles.get(0);
  
                         String id = "";
-                       for (Element idElem : story.getElementsByTag("a")) {
+                       for (Element idElem : article.getElementsByTag("a")) {
                                 if (idElem.attr("href").startsWith("/pipe/")) {
                                         id = idElem.attr("href").substring("/pipe/".length());
                                         break;
@@ -53,7 +53,7 @@ public class Pipedot extends BasicSupport {
                         String intUrl = null;
                         String extUrl = null;
  
-                       Elements links = story.getElementsByTag("a");
+                       Elements links = article.getElementsByTag("a");
                         if (links.size() > 0) {
                                 intUrl = links.get(0).absUrl("href");
                         }
@@ -68,13 +68,13 @@ public class Pipedot extends BasicSupport {
                         }
  
                         String details = "";
-                       Elements detailsElements = story.getElementsByTag("div");
+                       Elements detailsElements = article.getElementsByTag("div");
                         if (detailsElements.size() > 0) {
                                 details = detailsElements.get(0).text();
                         }
  
                         String body = "";
-                       for (Element elem : story.children()) {
+                       for (Element elem : article.children()) {
                                 String tag = elem.tag().toString();
                                 if (!tag.equals("header") && !tag.equals("footer")) {
                                         body = elem.text();
@@ -165,6 +165,11 @@ public class Pipedot extends BasicSupport {
                         public boolean ignoreNode(Node node) {
                                 return false;
                         }
+
+                       @Override
+                       public String manualProcessing(Node node) {
+                               return null;
+                       }
                 });
         }
  }
diff --git a/src/be/nikiroo/gofetch/support/Slashdot.java b/src/be/nikiroo/gofetch/support/Slashdot.java

index 8776e35fc7eac132aea50ff6ed7ead90d301fff7..378b3a4bfdb0cf0c7277397075ccf13d4c33aff5 100644 (file)
--- a/src/be/nikiroo/gofetch/support/Slashdot.java
+++ b/src/be/nikiroo/gofetch/support/Slashdot.java
@@ -33,9 +33,9 @@ public class Slashdot extends BasicSupport {
                 URL url = new URL("https://slashdot.org/");
                 InputStream in = open(url);
                 Document doc = DataUtil.load(in, "UTF-8", url.toString());
-               Elements stories = doc.getElementsByTag("header");
-               for (Element story : stories) {
-                       Elements titles = story.getElementsByClass("story-title");
+               Elements articles = doc.getElementsByTag("header");
+               for (Element article : articles) {
+                       Elements titles = article.getElementsByClass("story-title");
                         if (titles.size() == 0) {
                                 continue;
                         }
@@ -58,7 +58,7 @@ public class Slashdot extends BasicSupport {
                         }
  
                         String details = "";
-                       Elements detailsElements = story.getElementsByClass("details");
+                       Elements detailsElements = article.getElementsByClass("details");
                         if (detailsElements.size() > 0) {
                                 details = detailsElements.get(0).text();
                         }
@@ -169,6 +169,11 @@ public class Slashdot extends BasicSupport {
                         public boolean ignoreNode(Node node) {
                                 return false;
                         }
+
+                       @Override
+                       public String manualProcessing(Node node) {
+                               return null;
+                       }
                 });
         }
  }
author	Niki Roo <niki@nikiroo.be>
	Tue, 22 Aug 2017 18:00:32 +0000 (20:00 +0200)
committer	Niki Roo <niki@nikiroo.be>
	Tue, 22 Aug 2017 18:00:32 +0000 (20:00 +0200)
VERSION		patch \| blob \| blame \| history
changelog.md		patch \| blob \| blame \| history
src/be/nikiroo/gofetch/output/Gopher.java		patch \| blob \| blame \| history
src/be/nikiroo/gofetch/output/Html.java		patch \| blob \| blame \| history
src/be/nikiroo/gofetch/support/BasicSupport.java		patch \| blob \| blame \| history
src/be/nikiroo/gofetch/support/LWN.java		patch \| blob \| blame \| history
src/be/nikiroo/gofetch/support/LeMonde.java	[new file with mode: 0644]	patch \| blob
src/be/nikiroo/gofetch/support/Pipedot.java		patch \| blob \| blame \| history
src/be/nikiroo/gofetch/support/Slashdot.java		patch \| blob \| blame \| history