package be.nikiroo.gofetch.support; import java.io.IOException; import java.io.InputStream; import java.net.URL; import java.util.ArrayList; import java.util.List; import org.jsoup.helper.DataUtil; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.nodes.Node; import org.jsoup.select.Elements; import be.nikiroo.gofetch.data.Comment; import be.nikiroo.gofetch.data.Story; /** * Support https://slashdot.org/. * * @author niki */ public class Slashdot extends BasicSupport { @Override public String getDescription() { return "Slashdot: News for nerds, stuff that matters!"; } @Override public List list() throws IOException { List list = new ArrayList(); URL url = new URL("https://slashdot.org/"); InputStream in = open(url); Document doc = DataUtil.load(in, "UTF-8", url.toString()); Elements stories = doc.getElementsByTag("header"); for (Element story : stories) { Elements titles = story.getElementsByClass("story-title"); if (titles.size() == 0) { continue; } Element title = titles.get(0); String id = "" + title.attr("id"); if (id.startsWith("title-")) { id = id.substring("title-".length()); } Elements links = title.getElementsByTag("a"); String intUrl = null; String extUrl = null; if (links.size() > 0) { intUrl = links.get(0).absUrl("href"); } if (links.size() > 1) { extUrl = links.get(1).absUrl("href"); } String details = ""; Elements detailsElements = story.getElementsByClass("details"); if (detailsElements.size() > 0) { details = detailsElements.get(0).text(); } String body = ""; Element bodyElement = doc.getElementById("text-" + id); if (bodyElement != null) { body = bodyElement.text(); } list.add(new Story(getType(), id, title.text(), details, intUrl, extUrl, body)); } return list; } @Override public void fetch(Story story) throws IOException { List comments = new ArrayList(); URL url = new URL(story.getUrlInternal()); InputStream in = open(url); Document doc = DataUtil.load(in, "UTF-8", url.toString()); Element listing = doc.getElementById("commentlisting"); if (listing != null) { comments.addAll(getComments(listing)); } story.setComments(comments); } private List getComments(Element listing) { List comments = new ArrayList(); Comment lastComment = null; for (Element commentElement : listing.children()) { if (commentElement.hasClass("comment")) { if (!commentElement.hasClass("hidden")) { lastComment = getComment(commentElement); comments.add(lastComment); } List subComments = new ArrayList(); for (Element child : commentElement.children()) { if (child.id().contains("commtree_")) { subComments.addAll(getComments(child)); } } if (lastComment == null) { comments.addAll(subComments); } else { lastComment.addAll(subComments); } } } return comments; } /** * Get a comment from the given element. * * @param commentElement * the element to get the comment of. * * @return the comment, NOT including sub-comments */ private Comment getComment(Element commentElement) { String title = firstOrEmpty(commentElement, "title").text(); String author = firstOrEmpty(commentElement, "by").text(); String date = firstOrEmpty(commentElement, "otherdetails").text(); Element content = firstOrEmpty(commentElement, "commentBody"); return new Comment(commentElement.id(), author, title, date, toLines(content)); } private List toLines(Element element) { return toLines(element, new QuoteProcessor() { @Override public String processText(String text) { while (text.startsWith(">")) { // comment in one-liners text = text.substring(1).trim(); } return text; } @Override public boolean detectQuote(Node node) { if (node instanceof Element) { Element elementNode = (Element) node; if (elementNode.tagName().equals("blockquote") || elementNode.hasClass("quote") || (elementNode.tagName().equals("p") && elementNode.textNodes().size() == 1 && elementNode .textNodes().get(0).getWholeText() .startsWith(">"))) { return true; } } return false; } @Override public boolean ignoreNode(Node node) { return false; } }); } }