X-Git-Url: http://git.nikiroo.be/?a=blobdiff_plain;f=src%2Fbe%2Fnikiroo%2Fgofetch%2Fsupport%2FSlashdot.java;h=b3a779da62d229469346f8a9455b1e01b160ab9d;hb=c9cffa913fe4ebc5cbe483cc5afe676e6cb54abd;hp=8776e35fc7eac132aea50ff6ed7ead90d301fff7;hpb=27008a8782c0ed96e07c8dc39ff0ed1f5163a9d0;p=gofetch.git diff --git a/src/be/nikiroo/gofetch/support/Slashdot.java b/src/be/nikiroo/gofetch/support/Slashdot.java index 8776e35..b3a779d 100644 --- a/src/be/nikiroo/gofetch/support/Slashdot.java +++ b/src/be/nikiroo/gofetch/support/Slashdot.java @@ -14,6 +14,7 @@ import org.jsoup.select.Elements; import be.nikiroo.gofetch.data.Comment; import be.nikiroo.gofetch.data.Story; +import be.nikiroo.utils.StringUtils; /** * Support https://slashdot.org/. @@ -31,11 +32,11 @@ public class Slashdot extends BasicSupport { List list = new ArrayList(); URL url = new URL("https://slashdot.org/"); - InputStream in = open(url); + InputStream in = downloader.open(url); Document doc = DataUtil.load(in, "UTF-8", url.toString()); - Elements stories = doc.getElementsByTag("header"); - for (Element story : stories) { - Elements titles = story.getElementsByClass("story-title"); + Elements articles = doc.getElementsByTag("header"); + for (Element article : articles) { + Elements titles = article.getElementsByClass("story-title"); if (titles.size() == 0) { continue; } @@ -48,8 +49,8 @@ public class Slashdot extends BasicSupport { } Elements links = title.getElementsByTag("a"); - String intUrl = null; - String extUrl = null; + String intUrl = ""; + String extUrl = ""; if (links.size() > 0) { intUrl = links.get(0).absUrl("href"); } @@ -58,19 +59,46 @@ public class Slashdot extends BasicSupport { } String details = ""; - Elements detailsElements = story.getElementsByClass("details"); + Elements detailsElements = article.getElementsByClass("details"); if (detailsElements.size() > 0) { details = detailsElements.get(0).text(); } + // details: + // "Posted by AUTHOR on DATE from the further-crackdown dept." + String author = ""; + int pos = details.indexOf(" on "); + if (details.startsWith("Posted by ") && pos >= 0) { + author = details.substring("Posted by ".length(), pos).trim(); + } + pos = details.indexOf(" from the "); + if (pos >= 0) { + details = details.substring(pos).trim(); + } + String body = ""; Element bodyElement = doc.getElementById("text-" + id); if (bodyElement != null) { body = bodyElement.text(); } - list.add(new Story(getType(), id, title.text(), details, intUrl, - extUrl, body)); + String categ = ""; + Element categElement = doc.getElementsByClass("topic").first(); + if (categElement != null) { + categ = StringUtils.unhtml(categElement.text()).trim(); + } + + String date = ""; + Element dateElement = doc.getElementsByTag("time").first(); + if (dateElement != null) { + date = StringUtils.unhtml(dateElement.text()).trim(); + if (date.startsWith("on ")) { + date = date.substring("on ".length()); + } + } + + list.add(new Story(getType(), id, title.text(), author, date, + categ, details, intUrl, extUrl, body)); } return list; @@ -81,7 +109,7 @@ public class Slashdot extends BasicSupport { List comments = new ArrayList(); URL url = new URL(story.getUrlInternal()); - InputStream in = open(url); + InputStream in = downloader.open(url); Document doc = DataUtil.load(in, "UTF-8", url.toString()); Element listing = doc.getElementById("commentlisting"); if (listing != null) { @@ -138,7 +166,7 @@ public class Slashdot extends BasicSupport { } private List toLines(Element element) { - return toLines(element, new QuoteProcessor() { + return toLines(element, new BasicElementProcessor() { @Override public String processText(String text) { while (text.startsWith(">")) { // comment in one-liners @@ -164,11 +192,6 @@ public class Slashdot extends BasicSupport { return false; } - - @Override - public boolean ignoreNode(Node node) { - return false; - } }); } }