More jDoc, a new BasicElementProcessor
[gofetch.git] / src / be / nikiroo / gofetch / support / Slashdot.java
index 6a5395498de5894657e39311cc68ee3b8f9529e2..1581d23cb2361f8fa55c912ff1e2516c8fc91d9d 100644 (file)
@@ -9,6 +9,7 @@ import java.util.List;
 import org.jsoup.helper.DataUtil;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
+import org.jsoup.nodes.Node;
 import org.jsoup.select.Elements;
 
 import be.nikiroo.gofetch.data.Comment;
@@ -32,9 +33,9 @@ public class Slashdot extends BasicSupport {
                URL url = new URL("https://slashdot.org/");
                InputStream in = open(url);
                Document doc = DataUtil.load(in, "UTF-8", url.toString());
-               Elements stories = doc.getElementsByTag("header");
-               for (Element story : stories) {
-                       Elements titles = story.getElementsByClass("story-title");
+               Elements articles = doc.getElementsByTag("header");
+               for (Element article : articles) {
+                       Elements titles = article.getElementsByClass("story-title");
                        if (titles.size() == 0) {
                                continue;
                        }
@@ -57,7 +58,7 @@ public class Slashdot extends BasicSupport {
                        }
 
                        String details = "";
-                       Elements detailsElements = story.getElementsByClass("details");
+                       Elements detailsElements = article.getElementsByClass("details");
                        if (detailsElements.size() > 0) {
                                details = detailsElements.get(0).text();
                        }
@@ -92,41 +93,77 @@ public class Slashdot extends BasicSupport {
 
        private List<Comment> getComments(Element listing) {
                List<Comment> comments = new ArrayList<Comment>();
+               Comment lastComment = null;
                for (Element commentElement : listing.children()) {
                        if (commentElement.hasClass("comment")) {
-                               Comment comment = getComment(commentElement);
-                               if (!comment.isEmpty()) {
-                                       comments.add(comment);
+                               if (!commentElement.hasClass("hidden")) {
+                                       lastComment = getComment(commentElement);
+                                       comments.add(lastComment);
+                               }
+
+                               List<Comment> subComments = new ArrayList<Comment>();
+                               for (Element child : commentElement.children()) {
+                                       if (child.id().contains("commtree_")) {
+                                               subComments.addAll(getComments(child));
+                                       }
+                               }
+
+                               if (lastComment == null) {
+                                       comments.addAll(subComments);
+                               } else {
+                                       lastComment.addAll(subComments);
                                }
                        }
                }
+
                return comments;
        }
 
+       /**
+        * Get a comment from the given element.
+        * 
+        * @param commentElement
+        *            the element to get the comment of.
+        * 
+        * @return the comment, <b>NOT</b> including sub-comments
+        */
        private Comment getComment(Element commentElement) {
-               String title = firstOrEmpty(commentElement, "title");
-               String author = firstOrEmpty(commentElement, "by");
-               String content = firstOrEmpty(commentElement, "commentBody");
-               String date = firstOrEmpty(commentElement, "otherdetails");
+               String title = firstOrEmpty(commentElement, "title").text();
+               String author = firstOrEmpty(commentElement, "by").text();
+               String date = firstOrEmpty(commentElement, "otherdetails").text();
+               Element content = firstOrEmpty(commentElement, "commentBody");
 
-               Comment comment = new Comment(commentElement.id(), author, title, date,
-                               content);
+               return new Comment(commentElement.id(), author, title, date,
+                               toLines(content));
+       }
 
-               for (Element child : commentElement.children()) {
-                       if (child.id().contains("commtree_")) {
-                               comment.addAll(getComments(child));
-                       }
-               }
+       private List<String> toLines(Element element) {
+               return toLines(element, new BasicElementProcessor() {
+                       @Override
+                       public String processText(String text) {
+                               while (text.startsWith(">")) { // comment in one-liners
+                                       text = text.substring(1).trim();
+                               }
 
-               return comment;
-       }
+                               return text;
+                       }
 
-       private String firstOrEmpty(Element element, String className) {
-               Elements subElements = element.getElementsByClass(className);
-               if (subElements.size() > 0) {
-                       return subElements.get(0).text();
-               }
+                       @Override
+                       public boolean detectQuote(Node node) {
+                               if (node instanceof Element) {
+                                       Element elementNode = (Element) node;
+                                       if (elementNode.tagName().equals("blockquote")
+                                                       || elementNode.hasClass("quote")
+                                                       || (elementNode.tagName().equals("p")
+                                                                       && elementNode.textNodes().size() == 1 && elementNode
+                                                                       .textNodes().get(0).getWholeText()
+                                                                       .startsWith(">"))) {
+                                               return true;
+                                       }
+                               }
 
-               return "";
+                               return false;
+                       }
+               });
        }
 }