From 27008a8782c0ed96e07c8dc39ff0ed1f5163a9d0 Mon Sep 17 00:00:00 2001
From: Niki Roo
Date: Tue, 22 Aug 2017 18:56:51 +0200
Subject: [PATCH] Version 0.2.0: supports LWN, quotes,
s
---
VERSION | 2 +-
changelog.md | 4 +-
src/be/nikiroo/gofetch/data/Comment.java | 14 +-
src/be/nikiroo/gofetch/output/Gopher.java | 21 ++-
src/be/nikiroo/gofetch/output/Html.java | 13 +-
.../nikiroo/gofetch/support/BasicSupport.java | 149 ++++++++++++++++++
src/be/nikiroo/gofetch/support/LWN.java | 91 +++++------
src/be/nikiroo/gofetch/support/Pipedot.java | 66 ++++----
src/be/nikiroo/gofetch/support/Slashdot.java | 84 +++++++---
9 files changed, 315 insertions(+), 129 deletions(-)
diff --git a/VERSION b/VERSION
index 17e51c3..0ea3a94 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.1.1
+0.2.0
diff --git a/changelog.md b/changelog.md
index a3ec4a2..13ebf3a 100644
--- a/changelog.md
+++ b/changelog.md
@@ -1,8 +1,10 @@
# Gofetch
-## Version WIP
+## Version 0.2.0
- Add Linux Weekly News support
+- Correctly handle BR tags
+- Supports quotes
## Version 0.1.1
diff --git a/src/be/nikiroo/gofetch/data/Comment.java b/src/be/nikiroo/gofetch/data/Comment.java
index 44c0de1..963d6aa 100644
--- a/src/be/nikiroo/gofetch/data/Comment.java
+++ b/src/be/nikiroo/gofetch/data/Comment.java
@@ -9,16 +9,16 @@ public class Comment implements Iterable {
private String author;
private String title;
private String date;
- private String content;
+ private List lines;
private List children;
public Comment(String id, String author, String title, String date,
- String content) {
+ List lines) {
this.id = id;
this.author = author;
this.title = title;
this.date = date;
- this.content = content;
+ this.lines = lines;
this.children = new ArrayList();
}
@@ -61,13 +61,13 @@ public class Comment implements Iterable {
/**
* @return the content
*/
- public String getContent() {
- return content;
+ public List getContentLines() {
+ return lines;
}
public boolean isEmpty() {
- return children.isEmpty()
- && ("" + author + title + content).trim().isEmpty();
+ return children.isEmpty() && lines.isEmpty()
+ && ("" + author + title).trim().isEmpty();
}
@Override
diff --git a/src/be/nikiroo/gofetch/output/Gopher.java b/src/be/nikiroo/gofetch/output/Gopher.java
index 3fa6035..6dcb4aa 100644
--- a/src/be/nikiroo/gofetch/output/Gopher.java
+++ b/src/be/nikiroo/gofetch/output/Gopher.java
@@ -65,13 +65,30 @@ public class Gopher extends Output {
space = space.substring(0, LINE_SIZE - 20);
}
- appendLeft(builder, comment.getTitle(), ">> ", " ", space);
+ appendLeft(builder, comment.getTitle(), "** ", " ", space);
appendLeft(builder, "(" + comment.getAuthor() + ")", " ", " ",
space);
builder.append("i\r\n");
- appendLeft(builder, comment.getContent(), " ", " ", space);
+ for (String line : comment.getContentLines()) {
+ int depth = 0;
+ while (line.length() > depth && line.charAt(depth) == '>') {
+ depth++;
+ }
+ line = line.substring(depth).trim();
+
+ String prep = " ";
+ for (int i = 0; i < depth; i++) {
+ prep += ">";
+ }
+
+ if (depth > 0) {
+ prep += " ";
+ }
+
+ appendLeft(builder, line, prep, prep, space);
+ }
builder.append("i\r\n");
for (Comment subComment : comment) {
diff --git a/src/be/nikiroo/gofetch/output/Html.java b/src/be/nikiroo/gofetch/output/Html.java
index cdc77a4..33c99c8 100644
--- a/src/be/nikiroo/gofetch/output/Html.java
+++ b/src/be/nikiroo/gofetch/output/Html.java
@@ -17,7 +17,7 @@ public class Html extends Output {
}
String gopherUrl = "gopher://" + hostname + sel + ":" + port;
-
+
StringBuilder builder = new StringBuilder();
appendPre(builder);
@@ -28,9 +28,9 @@ public class Html extends Output {
+ ".
\n"//
+ "They are simply scrapped from their associated webpage and updated a few times a day.
\n"//
);
-
+
appendPost(builder);
-
+
return builder.toString();
}
@@ -101,8 +101,11 @@ public class Html extends Output {
.append("\n");
builder.append(space).append(" ")
.append(comment.getAuthor()).append("
\n");
- builder.append(space).append(" \n");
+ builder.append(space).append(" \n");
for (Comment subComment : comment) {
appendHtml(builder, subComment, space + " ");
}
diff --git a/src/be/nikiroo/gofetch/support/BasicSupport.java b/src/be/nikiroo/gofetch/support/BasicSupport.java
index 7a1d0ea..1db066b 100644
--- a/src/be/nikiroo/gofetch/support/BasicSupport.java
+++ b/src/be/nikiroo/gofetch/support/BasicSupport.java
@@ -4,9 +4,18 @@ import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;
+import java.util.ArrayList;
import java.util.List;
import java.util.zip.GZIPInputStream;
+import org.jsoup.helper.StringUtil;
+import org.jsoup.nodes.Element;
+import org.jsoup.nodes.Node;
+import org.jsoup.nodes.TextNode;
+import org.jsoup.select.Elements;
+import org.jsoup.select.NodeTraversor;
+import org.jsoup.select.NodeVisitor;
+
import be.nikiroo.gofetch.data.Story;
public abstract class BasicSupport {
@@ -14,6 +23,14 @@ public abstract class BasicSupport {
SLASHDOT, PIPEDOT, LWN,
}
+ public interface QuoteProcessor {
+ public boolean detectQuote(Node node);
+
+ public String processText(String text);
+
+ public boolean ignoreNode(Node node);
+ }
+
static private String preselector;
private Type type;
@@ -93,4 +110,136 @@ public abstract class BasicSupport {
return in;
}
+
+ /**
+ * Get the first {@link Element} of the given class, or an empty span
+ * {@link Element} if none found.
+ *
+ * @param element
+ * the element to look in
+ * @param className
+ * the class to look for
+ *
+ * @return the value or an empty span {@link Element}
+ */
+ static protected Element firstOrEmpty(Element element, String className) {
+ Elements subElements = element.getElementsByClass(className);
+ if (subElements.size() > 0) {
+ return subElements.get(0);
+ }
+
+ return new Element("span");
+ }
+
+ /**
+ * Get the first {@link Element} of the given tag, or an empty span
+ * {@link Element} if none found.
+ *
+ * @param element
+ * the element to look in
+ * @param tagName
+ * the tag to look for
+ *
+ * @return the value or an empty span {@link Element}
+ */
+ static protected Element firstOrEmptyTag(Element element, String tagName) {
+ Elements subElements = element.getElementsByTag(tagName);
+ if (subElements.size() > 0) {
+ return subElements.get(0);
+ }
+
+ return new Element("span");
+ }
+
+ static protected List toLines(Element element,
+ final QuoteProcessor quoteProcessor) {
+ final List lines = new ArrayList();
+ final StringBuilder currentLine = new StringBuilder();
+ final List quoted = new ArrayList();
+ final List ignoredNodes = new ArrayList();
+
+ if (element != null) {
+ new NodeTraversor(new NodeVisitor() {
+ @Override
+ public void head(Node node, int depth) {
+ if (quoteProcessor.ignoreNode(node)
+ || ignoredNodes.contains(node.parentNode())) {
+ ignoredNodes.add(node);
+ return;
+ }
+
+ String prep = "";
+ for (int i = 0; i < quoted.size(); i++) {
+ prep += ">";
+ }
+ prep += " ";
+
+ boolean enterQuote = quoteProcessor.detectQuote(node);
+ boolean leaveQuote = quoted.contains(depth);
+
+ if (enterQuote) {
+ quoted.add(depth);
+ }
+
+ if (leaveQuote) {
+ quoted.remove(Integer.valueOf(depth));
+ }
+
+ if (enterQuote || leaveQuote) {
+ if (currentLine.length() > 0) {
+ if (currentLine.charAt(currentLine.length() - 1) == '\n') {
+ currentLine.setLength(currentLine.length() - 1);
+ }
+ for (String l : currentLine.toString().split("\n")) {
+ lines.add(prep + l);
+ }
+ }
+ currentLine.setLength(0);
+ }
+
+ if (node instanceof Element) {
+ Element element = (Element) node;
+ boolean block = element.isBlock()
+ || element.tagName().equalsIgnoreCase("br");
+ if (block && currentLine.length() > 0) {
+ currentLine.append("\n");
+ }
+ } else if (node instanceof TextNode) {
+ TextNode textNode = (TextNode) node;
+ String line = StringUtil.normaliseWhitespace(textNode
+ .getWholeText());
+
+ currentLine.append(quoteProcessor.processText(line));
+ currentLine.append(" ");
+ }
+ }
+
+ @Override
+ public void tail(Node node, int depth) {
+ }
+ }).traverse(element);
+ }
+
+ if (currentLine.length() > 0) {
+ String prep = "";
+ for (int i = 0; i < quoted.size(); i++) {
+ prep += ">";
+ }
+ prep += " ";
+ if (currentLine.length() > 0) {
+ if (currentLine.charAt(currentLine.length() - 1) == '\n') {
+ currentLine.setLength(currentLine.length() - 1);
+ }
+ for (String l : currentLine.toString().split("\n")) {
+ lines.add(prep + l);
+ }
+ }
+ }
+
+ for (int i = 0; i < lines.size(); i++) {
+ lines.set(i, lines.get(i).replace(" ", " ").trim());
+ }
+
+ return lines;
+ }
}
diff --git a/src/be/nikiroo/gofetch/support/LWN.java b/src/be/nikiroo/gofetch/support/LWN.java
index 2fea78a..dba4c3b 100644
--- a/src/be/nikiroo/gofetch/support/LWN.java
+++ b/src/be/nikiroo/gofetch/support/LWN.java
@@ -139,8 +139,8 @@ public class LWN extends BasicSupport {
}
private Comment getComment(Element commentElement) {
- String title = firstOrEmpty(commentElement, "CommentTitle");
- String author = firstOrEmpty(commentElement, "CommentPoster");
+ String title = firstOrEmpty(commentElement, "CommentTitle").text();
+ String author = firstOrEmpty(commentElement, "CommentPoster").text();
String date = "";
int pos = author.lastIndexOf(" by ");
@@ -153,69 +153,54 @@ public class LWN extends BasicSupport {
}
}
- String content = "";
+ Element content = null;
Elements commentBodyElements = commentElement
.getElementsByClass("CommentBody");
if (commentBodyElements.size() > 0) {
- for (Node contentNode : commentBodyElements.get(0).childNodes()) {
- if (contentNode instanceof Element) {
- Element contentElement = (Element) contentNode;
- if (!contentElement.hasClass("CommentPoster")) {
- content = content.trim() + " "
- + contentElement.text().trim();
- }
- } else {
- content = content.trim() + " "
- + contentNode.outerHtml().trim();
- }
-
- }
- content = content.trim();
+ content = commentBodyElements.get(0);
}
Comment comment = new Comment(commentElement.id(), author, title, date,
- content);
+ toLines(content));
return comment;
}
- /**
- * Get the first element of the given class, or an empty {@link String} if
- * none found.
- *
- * @param element
- * the element to look in
- * @param className
- * the class to look for
- *
- * @return the value or an empty {@link String}
- */
- private String firstOrEmpty(Element element, String className) {
- Elements subElements = element.getElementsByClass(className);
- if (subElements.size() > 0) {
- return subElements.get(0).text();
- }
+ private List toLines(Element element) {
+ return toLines(element, new QuoteProcessor() {
+ @Override
+ public String processText(String text) {
+ while (text.startsWith(">")) { // comments
+ text = text.substring(1).trim();
+ }
- return "";
- }
+ return text;
+ }
- /**
- * Get the first element of the given tag, or an empty {@link String} if
- * none found.
- *
- * @param element
- * the element to look in
- * @param tagName
- * the tag to look for
- *
- * @return the value or an empty {@link String}
- */
- private String firstOrEmptyTag(Element element, String tagName) {
- Elements subElements = element.getElementsByTag(tagName);
- if (subElements.size() > 0) {
- return subElements.get(0).text();
- }
+ @Override
+ public boolean detectQuote(Node node) {
+ if (node instanceof Element) {
+ Element elementNode = (Element) node;
+ if (elementNode.tagName().equals("blockquote")
+ || elementNode.hasClass("QuotedText")) {
+ return true;
+ }
+ }
+
+ return false;
+ }
- return "";
+ @Override
+ public boolean ignoreNode(Node node) {
+ if (node instanceof Element) {
+ Element elementNode = (Element) node;
+ if (elementNode.hasClass("CommentPoster")) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+ });
}
}
diff --git a/src/be/nikiroo/gofetch/support/Pipedot.java b/src/be/nikiroo/gofetch/support/Pipedot.java
index 2436540..1bd5173 100644
--- a/src/be/nikiroo/gofetch/support/Pipedot.java
+++ b/src/be/nikiroo/gofetch/support/Pipedot.java
@@ -9,6 +9,7 @@ import java.util.List;
import org.jsoup.helper.DataUtil;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
+import org.jsoup.nodes.Node;
import org.jsoup.select.Elements;
import be.nikiroo.gofetch.data.Comment;
@@ -117,9 +118,9 @@ public class Pipedot extends BasicSupport {
}
private Comment getComment(Element commentElement) {
- String title = firstOrEmptyTag(commentElement, "h3");
- String author = firstOrEmpty(commentElement, "h4");
- String content = firstOrEmpty(commentElement, "comment-body");
+ String title = firstOrEmptyTag(commentElement, "h3").text();
+ String author = firstOrEmpty(commentElement, "h4").text();
+ Element content = firstOrEmpty(commentElement, "comment-body");
String date = "";
int pos = author.lastIndexOf(" on ");
@@ -129,7 +130,7 @@ public class Pipedot extends BasicSupport {
}
Comment comment = new Comment(commentElement.id(), author, title, date,
- content);
+ toLines(content));
Elements commentOutline = commentElement
.getElementsByClass("comment-outline");
@@ -140,43 +141,30 @@ public class Pipedot extends BasicSupport {
return comment;
}
- /**
- * Get the first element of the given class, or an empty {@link String} if
- * none found.
- *
- * @param element
- * the element to look in
- * @param className
- * the class to look for
- *
- * @return the value or an empty {@link String}
- */
- private String firstOrEmpty(Element element, String className) {
- Elements subElements = element.getElementsByClass(className);
- if (subElements.size() > 0) {
- return subElements.get(0).text();
- }
+ private List toLines(Element element) {
+ return toLines(element, new QuoteProcessor() {
+ @Override
+ public String processText(String text) {
+ return text;
+ }
- return "";
- }
+ @Override
+ public boolean detectQuote(Node node) {
+ if (node instanceof Element) {
+ Element elementNode = (Element) node;
+ if (elementNode.tagName().equals("blockquote")
+ || elementNode.hasClass("quote")) {
+ return true;
+ }
+ }
- /**
- * Get the first element of the given tag, or an empty {@link String} if
- * none found.
- *
- * @param element
- * the element to look in
- * @param tagName
- * the tag to look for
- *
- * @return the value or an empty {@link String}
- */
- private String firstOrEmptyTag(Element element, String tagName) {
- Elements subElements = element.getElementsByTag(tagName);
- if (subElements.size() > 0) {
- return subElements.get(0).text();
- }
+ return false;
+ }
- return "";
+ @Override
+ public boolean ignoreNode(Node node) {
+ return false;
+ }
+ });
}
}
diff --git a/src/be/nikiroo/gofetch/support/Slashdot.java b/src/be/nikiroo/gofetch/support/Slashdot.java
index 6a53954..8776e35 100644
--- a/src/be/nikiroo/gofetch/support/Slashdot.java
+++ b/src/be/nikiroo/gofetch/support/Slashdot.java
@@ -9,6 +9,7 @@ import java.util.List;
import org.jsoup.helper.DataUtil;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
+import org.jsoup.nodes.Node;
import org.jsoup.select.Elements;
import be.nikiroo.gofetch.data.Comment;
@@ -92,41 +93,82 @@ public class Slashdot extends BasicSupport {
private List getComments(Element listing) {
List comments = new ArrayList();
+ Comment lastComment = null;
for (Element commentElement : listing.children()) {
if (commentElement.hasClass("comment")) {
- Comment comment = getComment(commentElement);
- if (!comment.isEmpty()) {
- comments.add(comment);
+ if (!commentElement.hasClass("hidden")) {
+ lastComment = getComment(commentElement);
+ comments.add(lastComment);
+ }
+
+ List subComments = new ArrayList();
+ for (Element child : commentElement.children()) {
+ if (child.id().contains("commtree_")) {
+ subComments.addAll(getComments(child));
+ }
+ }
+
+ if (lastComment == null) {
+ comments.addAll(subComments);
+ } else {
+ lastComment.addAll(subComments);
}
}
}
+
return comments;
}
+ /**
+ * Get a comment from the given element.
+ *
+ * @param commentElement
+ * the element to get the comment of.
+ *
+ * @return the comment, NOT including sub-comments
+ */
private Comment getComment(Element commentElement) {
- String title = firstOrEmpty(commentElement, "title");
- String author = firstOrEmpty(commentElement, "by");
- String content = firstOrEmpty(commentElement, "commentBody");
- String date = firstOrEmpty(commentElement, "otherdetails");
+ String title = firstOrEmpty(commentElement, "title").text();
+ String author = firstOrEmpty(commentElement, "by").text();
+ String date = firstOrEmpty(commentElement, "otherdetails").text();
+ Element content = firstOrEmpty(commentElement, "commentBody");
+
+ return new Comment(commentElement.id(), author, title, date,
+ toLines(content));
+ }
- Comment comment = new Comment(commentElement.id(), author, title, date,
- content);
+ private List toLines(Element element) {
+ return toLines(element, new QuoteProcessor() {
+ @Override
+ public String processText(String text) {
+ while (text.startsWith(">")) { // comment in one-liners
+ text = text.substring(1).trim();
+ }
- for (Element child : commentElement.children()) {
- if (child.id().contains("commtree_")) {
- comment.addAll(getComments(child));
+ return text;
}
- }
- return comment;
- }
+ @Override
+ public boolean detectQuote(Node node) {
+ if (node instanceof Element) {
+ Element elementNode = (Element) node;
+ if (elementNode.tagName().equals("blockquote")
+ || elementNode.hasClass("quote")
+ || (elementNode.tagName().equals("p")
+ && elementNode.textNodes().size() == 1 && elementNode
+ .textNodes().get(0).getWholeText()
+ .startsWith(">"))) {
+ return true;
+ }
+ }
- private String firstOrEmpty(Element element, String className) {
- Elements subElements = element.getElementsByClass(className);
- if (subElements.size() > 0) {
- return subElements.get(0).text();
- }
+ return false;
+ }
- return "";
+ @Override
+ public boolean ignoreNode(Node node) {
+ return false;
+ }
+ });
}
}
--
2.27.0