X-Git-Url: http://git.nikiroo.be/?a=blobdiff_plain;f=src%2Fbe%2Fnikiroo%2Fgofetch%2Fsupport%2FSlashdot.java;h=43ce13d023ff498d3a759e9eac884f351d10e79a;hb=1ab7ff0acbc00ddcf6b27a3bebd1e351fdbe96a2;hp=1581d23cb2361f8fa55c912ff1e2516c8fc91d9d;hpb=202173602397b0793542c7a90f9d86013e153067;p=gofetch.git diff --git a/src/be/nikiroo/gofetch/support/Slashdot.java b/src/be/nikiroo/gofetch/support/Slashdot.java index 1581d23..43ce13d 100644 --- a/src/be/nikiroo/gofetch/support/Slashdot.java +++ b/src/be/nikiroo/gofetch/support/Slashdot.java @@ -1,20 +1,17 @@ package be.nikiroo.gofetch.support; import java.io.IOException; -import java.io.InputStream; import java.net.URL; +import java.util.AbstractMap; import java.util.ArrayList; import java.util.List; +import java.util.Map.Entry; -import org.jsoup.helper.DataUtil; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.nodes.Node; import org.jsoup.select.Elements; -import be.nikiroo.gofetch.data.Comment; -import be.nikiroo.gofetch.data.Story; - /** * Support https://slashdot.org/. * @@ -27,118 +24,238 @@ public class Slashdot extends BasicSupport { } @Override - public List list() throws IOException { - List list = new ArrayList(); - - URL url = new URL("https://slashdot.org/"); - InputStream in = open(url); - Document doc = DataUtil.load(in, "UTF-8", url.toString()); - Elements articles = doc.getElementsByTag("header"); - for (Element article : articles) { - Elements titles = article.getElementsByClass("story-title"); - if (titles.size() == 0) { - continue; - } + protected List> getUrls() throws IOException { + List> urls = new ArrayList>(); + urls.add(new AbstractMap.SimpleEntry(new URL( + "https://slashdot.org/"), "")); + return urls; + } - Element title = titles.get(0); + @Override + protected List getArticles(Document doc) { + return doc.getElementsByTag("header"); + } - String id = "" + title.attr("id"); + @Override + protected String getArticleId(Document doc, Element article) { + Element title = article.getElementsByClass("story-title").first(); + if (title != null) { + String id = title.attr("id"); if (id.startsWith("title-")) { id = id.substring("title-".length()); } - Elements links = title.getElementsByTag("a"); - String intUrl = null; - String extUrl = null; - if (links.size() > 0) { - intUrl = links.get(0).absUrl("href"); - } - if (links.size() > 1) { - extUrl = links.get(1).absUrl("href"); + return id; + } + + return ""; + } + + @Override + protected String getArticleTitle(Document doc, Element article) { + Element title = article.getElementsByClass("story-title").first(); + if (title != null) { + return title.text(); + } + + return ""; + } + + @Override + protected String getArticleAuthor(Document doc, Element article) { + // details: "Posted by AUTHOR on DATE from the further-crackdown dept." + String details = getArticleDetailsReal(article); + int pos = details.indexOf(" on "); + if (details.startsWith("Posted by ") && pos >= 0) { + return details.substring("Posted by ".length(), pos).trim(); + } + + return ""; + } + + @Override + protected String getArticleDate(Document doc, Element article) { + // Do not try bad articles + if (getArticleId(doc, article).isEmpty()) { + return ""; + } + + Element dateElement = doc.getElementsByTag("time").first(); + if (dateElement != null) { + String date = dateElement.text().trim(); + if (date.startsWith("on ")) { + date = date.substring("on ".length()); } - String details = ""; - Elements detailsElements = article.getElementsByClass("details"); - if (detailsElements.size() > 0) { - details = detailsElements.get(0).text(); + return date; + } + + return ""; + } + + @Override + protected String getArticleCategory(Document doc, Element article, + String currentCategory) { + Element categElement = doc.getElementsByClass("topic").first(); + if (categElement != null) { + return categElement.text(); + } + + return ""; + } + + @Override + protected String getArticleDetails(Document doc, Element article) { + // details: "Posted by AUTHOR on DATE from the further-crackdown dept." + String details = getArticleDetailsReal(article); + int pos = details.indexOf(" from the "); + if (pos >= 0) { + return details.substring(pos).trim(); + } + + return ""; + } + + @Override + protected String getArticleIntUrl(Document doc, Element article) { + Element title = article.getElementsByClass("story-title").first(); + if (title != null) { + Elements links = title.getElementsByTag("a"); + if (links.size() > 0) { + return links.get(0).absUrl("href"); } + } + return ""; + } - String body = ""; - Element bodyElement = doc.getElementById("text-" + id); - if (bodyElement != null) { - body = bodyElement.text(); + @Override + protected String getArticleExtUrl(Document doc, Element article) { + Element title = article.getElementsByClass("story-title").first(); + if (title != null) { + Elements links = title.getElementsByTag("a"); + if (links.size() > 1) { + return links.get(1).absUrl("href"); } + } + return ""; + } - list.add(new Story(getType(), id, title.text(), details, intUrl, - extUrl, body)); + @Override + protected String getArticleContent(Document doc, Element article) { + Element contentElement = doc // + .getElementById("text-" + getArticleId(doc, article)); + if (contentElement != null) { + return contentElement.text(); } - return list; + return ""; } @Override - public void fetch(Story story) throws IOException { - List comments = new ArrayList(); + protected Element getFullArticle(Document doc) { + return null; + } - URL url = new URL(story.getUrlInternal()); - InputStream in = open(url); - Document doc = DataUtil.load(in, "UTF-8", url.toString()); + @Override + protected List getFullArticleCommentPosts(Document doc, URL intUrl) { + List commentElements = new ArrayList(); Element listing = doc.getElementById("commentlisting"); if (listing != null) { - comments.addAll(getComments(listing)); + for (Element commentElement : listing.children()) { + if (commentElement.hasClass("comment")) { + commentElements.add(commentElement); + } + } } - story.setComments(comments); + return commentElements; } - private List getComments(Element listing) { - List comments = new ArrayList(); - Comment lastComment = null; - for (Element commentElement : listing.children()) { - if (commentElement.hasClass("comment")) { - if (!commentElement.hasClass("hidden")) { - lastComment = getComment(commentElement); - comments.add(lastComment); - } + @Override + protected ElementProcessor getElementProcessorFullArticle() { + return null; + } - List subComments = new ArrayList(); - for (Element child : commentElement.children()) { - if (child.id().contains("commtree_")) { - subComments.addAll(getComments(child)); + @Override + protected List getCommentCommentPosts(Document doc, + Element container) { + List commentElements = new ArrayList(); + for (Element child : container.children()) { + if (child.id().contains("commtree_")) { + for (Element sub : child.children()) { + if (sub.hasClass("comment")) { + commentElements.add(sub); } } - - if (lastComment == null) { - comments.addAll(subComments); - } else { - lastComment.addAll(subComments); - } } } - return comments; + return commentElements; + } + + @Override + protected String getCommentId(Element post) { + if (post.hasClass("hidden")) { + return ""; + } + + return post.id(); + } + + @Override + protected String getCommentAuthor(Element post) { + if (post.hasClass("hidden")) { + return ""; + } + + Element author = post.getElementsByClass("by").first(); + if (author != null) { + return author.text(); + } + + return ""; + } + + @Override + protected String getCommentTitle(Element post) { + if (post.hasClass("hidden")) { + return ""; + } + + Element title = post.getElementsByClass("title").first(); + if (title != null) { + return title.text(); + } + + return ""; + } + + @Override + protected String getCommentDate(Element post) { + if (post.hasClass("hidden")) { + return ""; + } + + Element date = post.getElementsByClass("otherdetails").first(); + if (date != null) { + return date.text(); + } + + return ""; } - /** - * Get a comment from the given element. - * - * @param commentElement - * the element to get the comment of. - * - * @return the comment, NOT including sub-comments - */ - private Comment getComment(Element commentElement) { - String title = firstOrEmpty(commentElement, "title").text(); - String author = firstOrEmpty(commentElement, "by").text(); - String date = firstOrEmpty(commentElement, "otherdetails").text(); - Element content = firstOrEmpty(commentElement, "commentBody"); + @Override + protected Element getCommentContentElement(Element post) { + if (post.hasClass("hidden")) { + return null; + } - return new Comment(commentElement.id(), author, title, date, - toLines(content)); + return post.getElementsByClass("commentBody").first(); } - private List toLines(Element element) { - return toLines(element, new BasicElementProcessor() { + @Override + protected ElementProcessor getElementProcessorComment() { + return new BasicElementProcessor() { @Override public String processText(String text) { while (text.startsWith(">")) { // comment in one-liners @@ -164,6 +281,15 @@ public class Slashdot extends BasicSupport { return false; } - }); + }; + } + + private String getArticleDetailsReal(Element article) { + Element detailsElement = article.getElementsByClass("details").first(); + if (detailsElement != null) { + return detailsElement.text(); + } + + return ""; } }