Version 0.2.0: supports LWN, quotes, <br>s
authorNiki Roo <niki@nikiroo.be>
Tue, 22 Aug 2017 16:56:51 +0000 (18:56 +0200)
committerNiki Roo <niki@nikiroo.be>
Tue, 22 Aug 2017 16:56:51 +0000 (18:56 +0200)
VERSION
changelog.md
src/be/nikiroo/gofetch/data/Comment.java
src/be/nikiroo/gofetch/output/Gopher.java
src/be/nikiroo/gofetch/output/Html.java
src/be/nikiroo/gofetch/support/BasicSupport.java
src/be/nikiroo/gofetch/support/LWN.java
src/be/nikiroo/gofetch/support/Pipedot.java
src/be/nikiroo/gofetch/support/Slashdot.java

diff --git a/VERSION b/VERSION
index 17e51c385ea382d4f2ef124b7032c1604845622d..0ea3a944b399d25f7e1b8fe684d754eb8da9fe7f 100644 (file)
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.1.1
+0.2.0
index a3ec4a2b8eac6c9dfdee9ba7fa5f9b12dcdce07c..13ebf3a738d39c979fce1579b91f07b734046919 100644 (file)
@@ -1,8 +1,10 @@
 # Gofetch
 
-## Version WIP
+## Version 0.2.0
 
 - Add Linux Weekly News support
+- Correctly handle BR tags
+- Supports quotes
 
 ## Version 0.1.1
 
index 44c0de105eef25831286f57447261b82ee98b6ef..963d6aa6d6dffe427d5bab979fc20cd1790550e7 100644 (file)
@@ -9,16 +9,16 @@ public class Comment implements Iterable<Comment> {
        private String author;
        private String title;
        private String date;
-       private String content;
+       private List<String> lines;
        private List<Comment> children;
 
        public Comment(String id, String author, String title, String date,
-                       String content) {
+                       List<String> lines) {
                this.id = id;
                this.author = author;
                this.title = title;
                this.date = date;
-               this.content = content;
+               this.lines = lines;
                this.children = new ArrayList<Comment>();
        }
 
@@ -61,13 +61,13 @@ public class Comment implements Iterable<Comment> {
        /**
         * @return the content
         */
-       public String getContent() {
-               return content;
+       public List<String> getContentLines() {
+               return lines;
        }
 
        public boolean isEmpty() {
-               return children.isEmpty()
-                               && ("" + author + title + content).trim().isEmpty();
+               return children.isEmpty() && lines.isEmpty()
+                               && ("" + author + title).trim().isEmpty();
        }
 
        @Override
index 3fa6035c328d3f0731f5bc3a81ce7e38f3220abb..6dcb4aa8b284777bff8338c530e83d225e8b0ba5 100644 (file)
@@ -65,13 +65,30 @@ public class Gopher extends Output {
                        space = space.substring(0, LINE_SIZE - 20);
                }
 
-               appendLeft(builder, comment.getTitle(), ">> ", "   ", space);
+               appendLeft(builder, comment.getTitle(), "** ", "   ", space);
                appendLeft(builder, "(" + comment.getAuthor() + ")", "   ", "   ",
                                space);
 
                builder.append("i\r\n");
 
-               appendLeft(builder, comment.getContent(), "   ", "   ", space);
+               for (String line : comment.getContentLines()) {
+                       int depth = 0;
+                       while (line.length() > depth && line.charAt(depth) == '>') {
+                               depth++;
+                       }
+                       line = line.substring(depth).trim();
+
+                       String prep = "   ";
+                       for (int i = 0; i < depth; i++) {
+                               prep += ">";
+                       }
+
+                       if (depth > 0) {
+                               prep += " ";
+                       }
+
+                       appendLeft(builder, line, prep, prep, space);
+               }
 
                builder.append("i\r\n");
                for (Comment subComment : comment) {
index cdc77a461056887ef1cafad7e6abf7d19019d1ab..33c99c81f8874d4a8b249dddee43e0f56ff69a0c 100644 (file)
@@ -17,7 +17,7 @@ public class Html extends Output {
                }
 
                String gopherUrl = "gopher://" + hostname + sel + ":" + port;
-               
+
                StringBuilder builder = new StringBuilder();
                appendPre(builder);
 
@@ -28,9 +28,9 @@ public class Html extends Output {
                                + "</a>.</p>\n"//
                                + "<p>They are simply scrapped from their associated webpage and updated a few times a day.</p>\n"//
                );
-               
+
                appendPost(builder);
-               
+
                return builder.toString();
        }
 
@@ -101,8 +101,11 @@ public class Html extends Output {
                                .append("</h2>\n");
                builder.append(space).append("  <div class='by'>")
                                .append(comment.getAuthor()).append("</div>\n");
-               builder.append(space).append("  <div class='comment_content'>")
-                               .append(comment.getContent()).append("</div>\n");
+               builder.append(space).append("  <div class='comment_content'>");
+               for (String line : comment.getContentLines()) {
+                       builder.append("<p>" + line + "</p>");
+               }
+               builder.append("</div>\n");
                for (Comment subComment : comment) {
                        appendHtml(builder, subComment, space + "  ");
                }
index 7a1d0eab9da69291bc112dcbd7f67abb127a49c6..1db066b3d107c8fefb26218675fa8c5c0767b815 100644 (file)
@@ -4,9 +4,18 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.net.URL;
 import java.net.URLConnection;
+import java.util.ArrayList;
 import java.util.List;
 import java.util.zip.GZIPInputStream;
 
+import org.jsoup.helper.StringUtil;
+import org.jsoup.nodes.Element;
+import org.jsoup.nodes.Node;
+import org.jsoup.nodes.TextNode;
+import org.jsoup.select.Elements;
+import org.jsoup.select.NodeTraversor;
+import org.jsoup.select.NodeVisitor;
+
 import be.nikiroo.gofetch.data.Story;
 
 public abstract class BasicSupport {
@@ -14,6 +23,14 @@ public abstract class BasicSupport {
                SLASHDOT, PIPEDOT, LWN,
        }
 
+       public interface QuoteProcessor {
+               public boolean detectQuote(Node node);
+
+               public String processText(String text);
+
+               public boolean ignoreNode(Node node);
+       }
+
        static private String preselector;
 
        private Type type;
@@ -93,4 +110,136 @@ public abstract class BasicSupport {
 
                return in;
        }
+
+       /**
+        * Get the first {@link Element} of the given class, or an empty span
+        * {@link Element} if none found.
+        * 
+        * @param element
+        *            the element to look in
+        * @param className
+        *            the class to look for
+        * 
+        * @return the value or an empty span {@link Element}
+        */
+       static protected Element firstOrEmpty(Element element, String className) {
+               Elements subElements = element.getElementsByClass(className);
+               if (subElements.size() > 0) {
+                       return subElements.get(0);
+               }
+
+               return new Element("span");
+       }
+
+       /**
+        * Get the first {@link Element} of the given tag, or an empty span
+        * {@link Element} if none found.
+        * 
+        * @param element
+        *            the element to look in
+        * @param tagName
+        *            the tag to look for
+        * 
+        * @return the value or an empty span {@link Element}
+        */
+       static protected Element firstOrEmptyTag(Element element, String tagName) {
+               Elements subElements = element.getElementsByTag(tagName);
+               if (subElements.size() > 0) {
+                       return subElements.get(0);
+               }
+
+               return new Element("span");
+       }
+
+       static protected List<String> toLines(Element element,
+                       final QuoteProcessor quoteProcessor) {
+               final List<String> lines = new ArrayList<String>();
+               final StringBuilder currentLine = new StringBuilder();
+               final List<Integer> quoted = new ArrayList<Integer>();
+               final List<Node> ignoredNodes = new ArrayList<Node>();
+
+               if (element != null) {
+                       new NodeTraversor(new NodeVisitor() {
+                               @Override
+                               public void head(Node node, int depth) {
+                                       if (quoteProcessor.ignoreNode(node)
+                                                       || ignoredNodes.contains(node.parentNode())) {
+                                               ignoredNodes.add(node);
+                                               return;
+                                       }
+
+                                       String prep = "";
+                                       for (int i = 0; i < quoted.size(); i++) {
+                                               prep += ">";
+                                       }
+                                       prep += " ";
+
+                                       boolean enterQuote = quoteProcessor.detectQuote(node);
+                                       boolean leaveQuote = quoted.contains(depth);
+
+                                       if (enterQuote) {
+                                               quoted.add(depth);
+                                       }
+
+                                       if (leaveQuote) {
+                                               quoted.remove(Integer.valueOf(depth));
+                                       }
+
+                                       if (enterQuote || leaveQuote) {
+                                               if (currentLine.length() > 0) {
+                                                       if (currentLine.charAt(currentLine.length() - 1) == '\n') {
+                                                               currentLine.setLength(currentLine.length() - 1);
+                                                       }
+                                                       for (String l : currentLine.toString().split("\n")) {
+                                                               lines.add(prep + l);
+                                                       }
+                                               }
+                                               currentLine.setLength(0);
+                                       }
+
+                                       if (node instanceof Element) {
+                                               Element element = (Element) node;
+                                               boolean block = element.isBlock()
+                                                               || element.tagName().equalsIgnoreCase("br");
+                                               if (block && currentLine.length() > 0) {
+                                                       currentLine.append("\n");
+                                               }
+                                       } else if (node instanceof TextNode) {
+                                               TextNode textNode = (TextNode) node;
+                                               String line = StringUtil.normaliseWhitespace(textNode
+                                                               .getWholeText());
+
+                                               currentLine.append(quoteProcessor.processText(line));
+                                               currentLine.append(" ");
+                                       }
+                               }
+
+                               @Override
+                               public void tail(Node node, int depth) {
+                               }
+                       }).traverse(element);
+               }
+
+               if (currentLine.length() > 0) {
+                       String prep = "";
+                       for (int i = 0; i < quoted.size(); i++) {
+                               prep += ">";
+                       }
+                       prep += " ";
+                       if (currentLine.length() > 0) {
+                               if (currentLine.charAt(currentLine.length() - 1) == '\n') {
+                                       currentLine.setLength(currentLine.length() - 1);
+                               }
+                               for (String l : currentLine.toString().split("\n")) {
+                                       lines.add(prep + l);
+                               }
+                       }
+               }
+
+               for (int i = 0; i < lines.size(); i++) {
+                       lines.set(i, lines.get(i).replace("  ", " ").trim());
+               }
+
+               return lines;
+       }
 }
index 2fea78a864855529e79d3022e92da3476027d97a..dba4c3bfa922f684729352baa2d438e075c1ac88 100644 (file)
@@ -139,8 +139,8 @@ public class LWN extends BasicSupport {
        }
 
        private Comment getComment(Element commentElement) {
-               String title = firstOrEmpty(commentElement, "CommentTitle");
-               String author = firstOrEmpty(commentElement, "CommentPoster");
+               String title = firstOrEmpty(commentElement, "CommentTitle").text();
+               String author = firstOrEmpty(commentElement, "CommentPoster").text();
 
                String date = "";
                int pos = author.lastIndexOf(" by ");
@@ -153,69 +153,54 @@ public class LWN extends BasicSupport {
                        }
                }
 
-               String content = "";
+               Element content = null;
                Elements commentBodyElements = commentElement
                                .getElementsByClass("CommentBody");
                if (commentBodyElements.size() > 0) {
-                       for (Node contentNode : commentBodyElements.get(0).childNodes()) {
-                               if (contentNode instanceof Element) {
-                                       Element contentElement = (Element) contentNode;
-                                       if (!contentElement.hasClass("CommentPoster")) {
-                                               content = content.trim() + " "
-                                                               + contentElement.text().trim();
-                                       }
-                               } else {
-                                       content = content.trim() + " "
-                                                       + contentNode.outerHtml().trim();
-                               }
-
-                       }
-                       content = content.trim();
+                       content = commentBodyElements.get(0);
                }
 
                Comment comment = new Comment(commentElement.id(), author, title, date,
-                               content);
+                               toLines(content));
 
                return comment;
        }
 
-       /**
-        * Get the first element of the given class, or an empty {@link String} if
-        * none found.
-        * 
-        * @param element
-        *            the element to look in
-        * @param className
-        *            the class to look for
-        * 
-        * @return the value or an empty {@link String}
-        */
-       private String firstOrEmpty(Element element, String className) {
-               Elements subElements = element.getElementsByClass(className);
-               if (subElements.size() > 0) {
-                       return subElements.get(0).text();
-               }
+       private List<String> toLines(Element element) {
+               return toLines(element, new QuoteProcessor() {
+                       @Override
+                       public String processText(String text) {
+                               while (text.startsWith(">")) { // comments
+                                       text = text.substring(1).trim();
+                               }
 
-               return "";
-       }
+                               return text;
+                       }
 
-       /**
-        * Get the first element of the given tag, or an empty {@link String} if
-        * none found.
-        * 
-        * @param element
-        *            the element to look in
-        * @param tagName
-        *            the tag to look for
-        * 
-        * @return the value or an empty {@link String}
-        */
-       private String firstOrEmptyTag(Element element, String tagName) {
-               Elements subElements = element.getElementsByTag(tagName);
-               if (subElements.size() > 0) {
-                       return subElements.get(0).text();
-               }
+                       @Override
+                       public boolean detectQuote(Node node) {
+                               if (node instanceof Element) {
+                                       Element elementNode = (Element) node;
+                                       if (elementNode.tagName().equals("blockquote")
+                                                       || elementNode.hasClass("QuotedText")) {
+                                               return true;
+                                       }
+                               }
+
+                               return false;
+                       }
 
-               return "";
+                       @Override
+                       public boolean ignoreNode(Node node) {
+                               if (node instanceof Element) {
+                                       Element elementNode = (Element) node;
+                                       if (elementNode.hasClass("CommentPoster")) {
+                                               return true;
+                                       }
+                               }
+
+                               return false;
+                       }
+               });
        }
 }
index 2436540f20c6c8748f9d4b8c6b693c4c985c45d1..1bd5173b331e6e2ee69552efaa77e964a1e3fe20 100644 (file)
@@ -9,6 +9,7 @@ import java.util.List;
 import org.jsoup.helper.DataUtil;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
+import org.jsoup.nodes.Node;
 import org.jsoup.select.Elements;
 
 import be.nikiroo.gofetch.data.Comment;
@@ -117,9 +118,9 @@ public class Pipedot extends BasicSupport {
        }
 
        private Comment getComment(Element commentElement) {
-               String title = firstOrEmptyTag(commentElement, "h3");
-               String author = firstOrEmpty(commentElement, "h4");
-               String content = firstOrEmpty(commentElement, "comment-body");
+               String title = firstOrEmptyTag(commentElement, "h3").text();
+               String author = firstOrEmpty(commentElement, "h4").text();
+               Element content = firstOrEmpty(commentElement, "comment-body");
 
                String date = "";
                int pos = author.lastIndexOf(" on ");
@@ -129,7 +130,7 @@ public class Pipedot extends BasicSupport {
                }
 
                Comment comment = new Comment(commentElement.id(), author, title, date,
-                               content);
+                               toLines(content));
 
                Elements commentOutline = commentElement
                                .getElementsByClass("comment-outline");
@@ -140,43 +141,30 @@ public class Pipedot extends BasicSupport {
                return comment;
        }
 
-       /**
-        * Get the first element of the given class, or an empty {@link String} if
-        * none found.
-        * 
-        * @param element
-        *            the element to look in
-        * @param className
-        *            the class to look for
-        * 
-        * @return the value or an empty {@link String}
-        */
-       private String firstOrEmpty(Element element, String className) {
-               Elements subElements = element.getElementsByClass(className);
-               if (subElements.size() > 0) {
-                       return subElements.get(0).text();
-               }
+       private List<String> toLines(Element element) {
+               return toLines(element, new QuoteProcessor() {
+                       @Override
+                       public String processText(String text) {
+                               return text;
+                       }
 
-               return "";
-       }
+                       @Override
+                       public boolean detectQuote(Node node) {
+                               if (node instanceof Element) {
+                                       Element elementNode = (Element) node;
+                                       if (elementNode.tagName().equals("blockquote")
+                                                       || elementNode.hasClass("quote")) {
+                                               return true;
+                                       }
+                               }
 
-       /**
-        * Get the first element of the given tag, or an empty {@link String} if
-        * none found.
-        * 
-        * @param element
-        *            the element to look in
-        * @param tagName
-        *            the tag to look for
-        * 
-        * @return the value or an empty {@link String}
-        */
-       private String firstOrEmptyTag(Element element, String tagName) {
-               Elements subElements = element.getElementsByTag(tagName);
-               if (subElements.size() > 0) {
-                       return subElements.get(0).text();
-               }
+                               return false;
+                       }
 
-               return "";
+                       @Override
+                       public boolean ignoreNode(Node node) {
+                               return false;
+                       }
+               });
        }
 }
index 6a5395498de5894657e39311cc68ee3b8f9529e2..8776e35fc7eac132aea50ff6ed7ead90d301fff7 100644 (file)
@@ -9,6 +9,7 @@ import java.util.List;
 import org.jsoup.helper.DataUtil;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
+import org.jsoup.nodes.Node;
 import org.jsoup.select.Elements;
 
 import be.nikiroo.gofetch.data.Comment;
@@ -92,41 +93,82 @@ public class Slashdot extends BasicSupport {
 
        private List<Comment> getComments(Element listing) {
                List<Comment> comments = new ArrayList<Comment>();
+               Comment lastComment = null;
                for (Element commentElement : listing.children()) {
                        if (commentElement.hasClass("comment")) {
-                               Comment comment = getComment(commentElement);
-                               if (!comment.isEmpty()) {
-                                       comments.add(comment);
+                               if (!commentElement.hasClass("hidden")) {
+                                       lastComment = getComment(commentElement);
+                                       comments.add(lastComment);
+                               }
+
+                               List<Comment> subComments = new ArrayList<Comment>();
+                               for (Element child : commentElement.children()) {
+                                       if (child.id().contains("commtree_")) {
+                                               subComments.addAll(getComments(child));
+                                       }
+                               }
+
+                               if (lastComment == null) {
+                                       comments.addAll(subComments);
+                               } else {
+                                       lastComment.addAll(subComments);
                                }
                        }
                }
+
                return comments;
        }
 
+       /**
+        * Get a comment from the given element.
+        * 
+        * @param commentElement
+        *            the element to get the comment of.
+        * 
+        * @return the comment, <b>NOT</b> including sub-comments
+        */
        private Comment getComment(Element commentElement) {
-               String title = firstOrEmpty(commentElement, "title");
-               String author = firstOrEmpty(commentElement, "by");
-               String content = firstOrEmpty(commentElement, "commentBody");
-               String date = firstOrEmpty(commentElement, "otherdetails");
+               String title = firstOrEmpty(commentElement, "title").text();
+               String author = firstOrEmpty(commentElement, "by").text();
+               String date = firstOrEmpty(commentElement, "otherdetails").text();
+               Element content = firstOrEmpty(commentElement, "commentBody");
+
+               return new Comment(commentElement.id(), author, title, date,
+                               toLines(content));
+       }
 
-               Comment comment = new Comment(commentElement.id(), author, title, date,
-                               content);
+       private List<String> toLines(Element element) {
+               return toLines(element, new QuoteProcessor() {
+                       @Override
+                       public String processText(String text) {
+                               while (text.startsWith(">")) { // comment in one-liners
+                                       text = text.substring(1).trim();
+                               }
 
-               for (Element child : commentElement.children()) {
-                       if (child.id().contains("commtree_")) {
-                               comment.addAll(getComments(child));
+                               return text;
                        }
-               }
 
-               return comment;
-       }
+                       @Override
+                       public boolean detectQuote(Node node) {
+                               if (node instanceof Element) {
+                                       Element elementNode = (Element) node;
+                                       if (elementNode.tagName().equals("blockquote")
+                                                       || elementNode.hasClass("quote")
+                                                       || (elementNode.tagName().equals("p")
+                                                                       && elementNode.textNodes().size() == 1 && elementNode
+                                                                       .textNodes().get(0).getWholeText()
+                                                                       .startsWith(">"))) {
+                                               return true;
+                                       }
+                               }
 
-       private String firstOrEmpty(Element element, String className) {
-               Elements subElements = element.getElementsByClass(className);
-               if (subElements.size() > 0) {
-                       return subElements.get(0).text();
-               }
+                               return false;
+                       }
 
-               return "";
+                       @Override
+                       public boolean ignoreNode(Node node) {
+                               return false;
+                       }
+               });
        }
 }