X-Git-Url: http://git.nikiroo.be/?a=blobdiff_plain;f=src%2Fbe%2Fnikiroo%2Fgofetch%2Fsupport%2FSlashdot.java;h=43ce13d023ff498d3a759e9eac884f351d10e79a;hb=3e62b034c1981ae6329f06b3f8c0ee25c3683789;hp=5dfa03b227e30e2786d0a3dacf0195c6e5d66096;hpb=737852686d8897331706ed4b902dbd9d5038cb53;p=gofetch.git diff --git a/src/be/nikiroo/gofetch/support/Slashdot.java b/src/be/nikiroo/gofetch/support/Slashdot.java index 5dfa03b..43ce13d 100644 --- a/src/be/nikiroo/gofetch/support/Slashdot.java +++ b/src/be/nikiroo/gofetch/support/Slashdot.java @@ -1,19 +1,22 @@ package be.nikiroo.gofetch.support; import java.io.IOException; -import java.io.InputStream; import java.net.URL; +import java.util.AbstractMap; import java.util.ArrayList; import java.util.List; +import java.util.Map.Entry; -import org.jsoup.helper.DataUtil; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; +import org.jsoup.nodes.Node; import org.jsoup.select.Elements; -import be.nikiroo.gofetch.data.Comment; -import be.nikiroo.gofetch.data.Story; - +/** + * Support https://slashdot.org/. + * + * @author niki + */ public class Slashdot extends BasicSupport { @Override public String getDescription() { @@ -21,104 +24,270 @@ public class Slashdot extends BasicSupport { } @Override - public List list() throws IOException { - List list = new ArrayList(); + protected List> getUrls() throws IOException { + List> urls = new ArrayList>(); + urls.add(new AbstractMap.SimpleEntry(new URL( + "https://slashdot.org/"), "")); + return urls; + } - URL url = new URL("https://slashdot.org/"); - InputStream in = open(url); - Document doc = DataUtil.load(in, "UTF-8", url.toString()); - Elements stories = doc.getElementsByTag("header"); - for (Element story : stories) { - Elements titles = story.getElementsByClass("story-title"); - if (titles.size() == 0) { - continue; - } - Element title = titles.get(0); + @Override + protected List getArticles(Document doc) { + return doc.getElementsByTag("header"); + } - String id = "" + title.attr("id"); + @Override + protected String getArticleId(Document doc, Element article) { + Element title = article.getElementsByClass("story-title").first(); + if (title != null) { + String id = title.attr("id"); if (id.startsWith("title-")) { id = id.substring("title-".length()); } - Elements links = title.getElementsByTag("a"); - String intUrl = null; - String extUrl = null; - if (links.size() > 0) { - intUrl = links.get(0).absUrl("href"); - } - if (links.size() > 1) { - extUrl = links.get(1).absUrl("href"); + return id; + } + + return ""; + } + + @Override + protected String getArticleTitle(Document doc, Element article) { + Element title = article.getElementsByClass("story-title").first(); + if (title != null) { + return title.text(); + } + + return ""; + } + + @Override + protected String getArticleAuthor(Document doc, Element article) { + // details: "Posted by AUTHOR on DATE from the further-crackdown dept." + String details = getArticleDetailsReal(article); + int pos = details.indexOf(" on "); + if (details.startsWith("Posted by ") && pos >= 0) { + return details.substring("Posted by ".length(), pos).trim(); + } + + return ""; + } + + @Override + protected String getArticleDate(Document doc, Element article) { + // Do not try bad articles + if (getArticleId(doc, article).isEmpty()) { + return ""; + } + + Element dateElement = doc.getElementsByTag("time").first(); + if (dateElement != null) { + String date = dateElement.text().trim(); + if (date.startsWith("on ")) { + date = date.substring("on ".length()); } - String details = ""; - Elements detailsElements = story.getElementsByClass("details"); - if (detailsElements.size() > 0) { - details = detailsElements.get(0).text(); + return date; + } + + return ""; + } + + @Override + protected String getArticleCategory(Document doc, Element article, + String currentCategory) { + Element categElement = doc.getElementsByClass("topic").first(); + if (categElement != null) { + return categElement.text(); + } + + return ""; + } + + @Override + protected String getArticleDetails(Document doc, Element article) { + // details: "Posted by AUTHOR on DATE from the further-crackdown dept." + String details = getArticleDetailsReal(article); + int pos = details.indexOf(" from the "); + if (pos >= 0) { + return details.substring(pos).trim(); + } + + return ""; + } + + @Override + protected String getArticleIntUrl(Document doc, Element article) { + Element title = article.getElementsByClass("story-title").first(); + if (title != null) { + Elements links = title.getElementsByTag("a"); + if (links.size() > 0) { + return links.get(0).absUrl("href"); } + } + return ""; + } - String body = ""; - Element bodyElement = doc.getElementById("text-" + id); - if (bodyElement != null) { - body = bodyElement.text(); + @Override + protected String getArticleExtUrl(Document doc, Element article) { + Element title = article.getElementsByClass("story-title").first(); + if (title != null) { + Elements links = title.getElementsByTag("a"); + if (links.size() > 1) { + return links.get(1).absUrl("href"); } + } + return ""; + } - list.add(new Story(getType(), id, title.text(), details, intUrl, - extUrl, body)); + @Override + protected String getArticleContent(Document doc, Element article) { + Element contentElement = doc // + .getElementById("text-" + getArticleId(doc, article)); + if (contentElement != null) { + return contentElement.text(); } - return list; + return ""; } @Override - public List getComments(Story story) throws IOException { - List comments = new ArrayList(); + protected Element getFullArticle(Document doc) { + return null; + } - URL url = new URL(story.getUrlInternal()); - InputStream in = open(url); - Document doc = DataUtil.load(in, "UTF-8", url.toString()); + @Override + protected List getFullArticleCommentPosts(Document doc, URL intUrl) { + List commentElements = new ArrayList(); Element listing = doc.getElementById("commentlisting"); if (listing != null) { - comments.addAll(getComments(listing)); + for (Element commentElement : listing.children()) { + if (commentElement.hasClass("comment")) { + commentElements.add(commentElement); + } + } } - return comments; + return commentElements; } - private List getComments(Element listing) { - List comments = new ArrayList(); - for (Element commentElement : listing.children()) { - if (commentElement.hasClass("comment")) { - Comment comment = getComment(commentElement); - if (!comment.isEmpty()) { - comments.add(comment); + @Override + protected ElementProcessor getElementProcessorFullArticle() { + return null; + } + + @Override + protected List getCommentCommentPosts(Document doc, + Element container) { + List commentElements = new ArrayList(); + for (Element child : container.children()) { + if (child.id().contains("commtree_")) { + for (Element sub : child.children()) { + if (sub.hasClass("comment")) { + commentElements.add(sub); + } } } } - return comments; + + return commentElements; } - private Comment getComment(Element commentElement) { - String title = firstOrEmpty(commentElement, "title"); - String author = firstOrEmpty(commentElement, "by"); - String content = firstOrEmpty(commentElement, "commentBody"); - String date = firstOrEmpty(commentElement, "otherdetails"); + @Override + protected String getCommentId(Element post) { + if (post.hasClass("hidden")) { + return ""; + } - Comment comment = new Comment(commentElement.id(), author, title, date, - content); + return post.id(); + } - for (Element child : commentElement.children()) { - if (child.id().contains("commtree_")) { - comment.addAll(getComments(child)); - } + @Override + protected String getCommentAuthor(Element post) { + if (post.hasClass("hidden")) { + return ""; } - return comment; + Element author = post.getElementsByClass("by").first(); + if (author != null) { + return author.text(); + } + + return ""; + } + + @Override + protected String getCommentTitle(Element post) { + if (post.hasClass("hidden")) { + return ""; + } + + Element title = post.getElementsByClass("title").first(); + if (title != null) { + return title.text(); + } + + return ""; + } + + @Override + protected String getCommentDate(Element post) { + if (post.hasClass("hidden")) { + return ""; + } + + Element date = post.getElementsByClass("otherdetails").first(); + if (date != null) { + return date.text(); + } + + return ""; + } + + @Override + protected Element getCommentContentElement(Element post) { + if (post.hasClass("hidden")) { + return null; + } + + return post.getElementsByClass("commentBody").first(); + } + + @Override + protected ElementProcessor getElementProcessorComment() { + return new BasicElementProcessor() { + @Override + public String processText(String text) { + while (text.startsWith(">")) { // comment in one-liners + text = text.substring(1).trim(); + } + + return text; + } + + @Override + public boolean detectQuote(Node node) { + if (node instanceof Element) { + Element elementNode = (Element) node; + if (elementNode.tagName().equals("blockquote") + || elementNode.hasClass("quote") + || (elementNode.tagName().equals("p") + && elementNode.textNodes().size() == 1 && elementNode + .textNodes().get(0).getWholeText() + .startsWith(">"))) { + return true; + } + } + + return false; + } + }; } - private String firstOrEmpty(Element element, String className) { - Elements subElements = element.getElementsByClass(className); - if (subElements.size() > 0) { - return subElements.get(0).text(); + private String getArticleDetailsReal(Element article) { + Element detailsElement = article.getElementsByClass("details").first(); + if (detailsElement != null) { + return detailsElement.text(); } return "";