X-Git-Url: https://git.nikiroo.be/?a=blobdiff_plain;f=src%2Fbe%2Fnikiroo%2Fgofetch%2Fsupport%2FSlashdot.java;h=43d35f4afa13281d8565da54e113fab82529f35f;hb=136ab80122a17caa0720116d6d2552521239fbb3;hp=5dfa03b227e30e2786d0a3dacf0195c6e5d66096;hpb=737852686d8897331706ed4b902dbd9d5038cb53;p=gofetch.git diff --git a/src/be/nikiroo/gofetch/support/Slashdot.java b/src/be/nikiroo/gofetch/support/Slashdot.java index 5dfa03b..43d35f4 100644 --- a/src/be/nikiroo/gofetch/support/Slashdot.java +++ b/src/be/nikiroo/gofetch/support/Slashdot.java @@ -9,11 +9,17 @@ import java.util.List; import org.jsoup.helper.DataUtil; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; +import org.jsoup.nodes.Node; import org.jsoup.select.Elements; import be.nikiroo.gofetch.data.Comment; import be.nikiroo.gofetch.data.Story; +/** + * Support https://slashdot.org/. + * + * @author niki + */ public class Slashdot extends BasicSupport { @Override public String getDescription() { @@ -25,14 +31,15 @@ public class Slashdot extends BasicSupport { List list = new ArrayList(); URL url = new URL("https://slashdot.org/"); - InputStream in = open(url); + InputStream in = downloader.open(url); Document doc = DataUtil.load(in, "UTF-8", url.toString()); - Elements stories = doc.getElementsByTag("header"); - for (Element story : stories) { - Elements titles = story.getElementsByClass("story-title"); + Elements articles = doc.getElementsByTag("header"); + for (Element article : articles) { + Elements titles = article.getElementsByClass("story-title"); if (titles.size() == 0) { continue; } + Element title = titles.get(0); String id = "" + title.attr("id"); @@ -51,7 +58,7 @@ public class Slashdot extends BasicSupport { } String details = ""; - Elements detailsElements = story.getElementsByClass("details"); + Elements detailsElements = article.getElementsByClass("details"); if (detailsElements.size() > 0) { details = detailsElements.get(0).text(); } @@ -70,57 +77,93 @@ public class Slashdot extends BasicSupport { } @Override - public List getComments(Story story) throws IOException { + public void fetch(Story story) throws IOException { List comments = new ArrayList(); URL url = new URL(story.getUrlInternal()); - InputStream in = open(url); + InputStream in = downloader.open(url); Document doc = DataUtil.load(in, "UTF-8", url.toString()); Element listing = doc.getElementById("commentlisting"); if (listing != null) { comments.addAll(getComments(listing)); } - return comments; + story.setComments(comments); } private List getComments(Element listing) { List comments = new ArrayList(); + Comment lastComment = null; for (Element commentElement : listing.children()) { if (commentElement.hasClass("comment")) { - Comment comment = getComment(commentElement); - if (!comment.isEmpty()) { - comments.add(comment); + if (!commentElement.hasClass("hidden")) { + lastComment = getComment(commentElement); + comments.add(lastComment); + } + + List subComments = new ArrayList(); + for (Element child : commentElement.children()) { + if (child.id().contains("commtree_")) { + subComments.addAll(getComments(child)); + } + } + + if (lastComment == null) { + comments.addAll(subComments); + } else { + lastComment.addAll(subComments); } } } + return comments; } + /** + * Get a comment from the given element. + * + * @param commentElement + * the element to get the comment of. + * + * @return the comment, NOT including sub-comments + */ private Comment getComment(Element commentElement) { - String title = firstOrEmpty(commentElement, "title"); - String author = firstOrEmpty(commentElement, "by"); - String content = firstOrEmpty(commentElement, "commentBody"); - String date = firstOrEmpty(commentElement, "otherdetails"); + String title = firstOrEmpty(commentElement, "title").text(); + String author = firstOrEmpty(commentElement, "by").text(); + String date = firstOrEmpty(commentElement, "otherdetails").text(); + Element content = firstOrEmpty(commentElement, "commentBody"); + + return new Comment(commentElement.id(), author, title, date, + toLines(content)); + } - Comment comment = new Comment(commentElement.id(), author, title, date, - content); + private List toLines(Element element) { + return toLines(element, new BasicElementProcessor() { + @Override + public String processText(String text) { + while (text.startsWith(">")) { // comment in one-liners + text = text.substring(1).trim(); + } - for (Element child : commentElement.children()) { - if (child.id().contains("commtree_")) { - comment.addAll(getComments(child)); + return text; } - } - - return comment; - } - private String firstOrEmpty(Element element, String className) { - Elements subElements = element.getElementsByClass(className); - if (subElements.size() > 0) { - return subElements.get(0).text(); - } + @Override + public boolean detectQuote(Node node) { + if (node instanceof Element) { + Element elementNode = (Element) node; + if (elementNode.tagName().equals("blockquote") + || elementNode.hasClass("quote") + || (elementNode.tagName().equals("p") + && elementNode.textNodes().size() == 1 && elementNode + .textNodes().get(0).getWholeText() + .startsWith(">"))) { + return true; + } + } - return ""; + return false; + } + }); } }