X-Git-Url: http://git.nikiroo.be/?a=blobdiff_plain;f=src%2Fbe%2Fnikiroo%2Fgofetch%2Fsupport%2FSlashdot.java;h=b3a779da62d229469346f8a9455b1e01b160ab9d;hb=26816d976f9d888fbf75feb754d761871a988c69;hp=6a5395498de5894657e39311cc68ee3b8f9529e2;hpb=5c056aade2e020276e039f81acba7bcb2b12e87f;p=gofetch.git
diff --git a/src/be/nikiroo/gofetch/support/Slashdot.java b/src/be/nikiroo/gofetch/support/Slashdot.java
index 6a53954..b3a779d 100644
--- a/src/be/nikiroo/gofetch/support/Slashdot.java
+++ b/src/be/nikiroo/gofetch/support/Slashdot.java
@@ -9,10 +9,12 @@ import java.util.List;
import org.jsoup.helper.DataUtil;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
+import org.jsoup.nodes.Node;
import org.jsoup.select.Elements;
import be.nikiroo.gofetch.data.Comment;
import be.nikiroo.gofetch.data.Story;
+import be.nikiroo.utils.StringUtils;
/**
* Support https://slashdot.org/.
@@ -30,11 +32,11 @@ public class Slashdot extends BasicSupport {
List list = new ArrayList();
URL url = new URL("https://slashdot.org/");
- InputStream in = open(url);
+ InputStream in = downloader.open(url);
Document doc = DataUtil.load(in, "UTF-8", url.toString());
- Elements stories = doc.getElementsByTag("header");
- for (Element story : stories) {
- Elements titles = story.getElementsByClass("story-title");
+ Elements articles = doc.getElementsByTag("header");
+ for (Element article : articles) {
+ Elements titles = article.getElementsByClass("story-title");
if (titles.size() == 0) {
continue;
}
@@ -47,8 +49,8 @@ public class Slashdot extends BasicSupport {
}
Elements links = title.getElementsByTag("a");
- String intUrl = null;
- String extUrl = null;
+ String intUrl = "";
+ String extUrl = "";
if (links.size() > 0) {
intUrl = links.get(0).absUrl("href");
}
@@ -57,19 +59,46 @@ public class Slashdot extends BasicSupport {
}
String details = "";
- Elements detailsElements = story.getElementsByClass("details");
+ Elements detailsElements = article.getElementsByClass("details");
if (detailsElements.size() > 0) {
details = detailsElements.get(0).text();
}
+ // details:
+ // "Posted by AUTHOR on DATE from the further-crackdown dept."
+ String author = "";
+ int pos = details.indexOf(" on ");
+ if (details.startsWith("Posted by ") && pos >= 0) {
+ author = details.substring("Posted by ".length(), pos).trim();
+ }
+ pos = details.indexOf(" from the ");
+ if (pos >= 0) {
+ details = details.substring(pos).trim();
+ }
+
String body = "";
Element bodyElement = doc.getElementById("text-" + id);
if (bodyElement != null) {
body = bodyElement.text();
}
- list.add(new Story(getType(), id, title.text(), details, intUrl,
- extUrl, body));
+ String categ = "";
+ Element categElement = doc.getElementsByClass("topic").first();
+ if (categElement != null) {
+ categ = StringUtils.unhtml(categElement.text()).trim();
+ }
+
+ String date = "";
+ Element dateElement = doc.getElementsByTag("time").first();
+ if (dateElement != null) {
+ date = StringUtils.unhtml(dateElement.text()).trim();
+ if (date.startsWith("on ")) {
+ date = date.substring("on ".length());
+ }
+ }
+
+ list.add(new Story(getType(), id, title.text(), author, date,
+ categ, details, intUrl, extUrl, body));
}
return list;
@@ -80,7 +109,7 @@ public class Slashdot extends BasicSupport {
List comments = new ArrayList();
URL url = new URL(story.getUrlInternal());
- InputStream in = open(url);
+ InputStream in = downloader.open(url);
Document doc = DataUtil.load(in, "UTF-8", url.toString());
Element listing = doc.getElementById("commentlisting");
if (listing != null) {
@@ -92,41 +121,77 @@ public class Slashdot extends BasicSupport {
private List getComments(Element listing) {
List comments = new ArrayList();
+ Comment lastComment = null;
for (Element commentElement : listing.children()) {
if (commentElement.hasClass("comment")) {
- Comment comment = getComment(commentElement);
- if (!comment.isEmpty()) {
- comments.add(comment);
+ if (!commentElement.hasClass("hidden")) {
+ lastComment = getComment(commentElement);
+ comments.add(lastComment);
+ }
+
+ List subComments = new ArrayList();
+ for (Element child : commentElement.children()) {
+ if (child.id().contains("commtree_")) {
+ subComments.addAll(getComments(child));
+ }
+ }
+
+ if (lastComment == null) {
+ comments.addAll(subComments);
+ } else {
+ lastComment.addAll(subComments);
}
}
}
+
return comments;
}
+ /**
+ * Get a comment from the given element.
+ *
+ * @param commentElement
+ * the element to get the comment of.
+ *
+ * @return the comment, NOT including sub-comments
+ */
private Comment getComment(Element commentElement) {
- String title = firstOrEmpty(commentElement, "title");
- String author = firstOrEmpty(commentElement, "by");
- String content = firstOrEmpty(commentElement, "commentBody");
- String date = firstOrEmpty(commentElement, "otherdetails");
+ String title = firstOrEmpty(commentElement, "title").text();
+ String author = firstOrEmpty(commentElement, "by").text();
+ String date = firstOrEmpty(commentElement, "otherdetails").text();
+ Element content = firstOrEmpty(commentElement, "commentBody");
- Comment comment = new Comment(commentElement.id(), author, title, date,
- content);
+ return new Comment(commentElement.id(), author, title, date,
+ toLines(content));
+ }
- for (Element child : commentElement.children()) {
- if (child.id().contains("commtree_")) {
- comment.addAll(getComments(child));
- }
- }
+ private List toLines(Element element) {
+ return toLines(element, new BasicElementProcessor() {
+ @Override
+ public String processText(String text) {
+ while (text.startsWith(">")) { // comment in one-liners
+ text = text.substring(1).trim();
+ }
- return comment;
- }
+ return text;
+ }
- private String firstOrEmpty(Element element, String className) {
- Elements subElements = element.getElementsByClass(className);
- if (subElements.size() > 0) {
- return subElements.get(0).text();
- }
+ @Override
+ public boolean detectQuote(Node node) {
+ if (node instanceof Element) {
+ Element elementNode = (Element) node;
+ if (elementNode.tagName().equals("blockquote")
+ || elementNode.hasClass("quote")
+ || (elementNode.tagName().equals("p")
+ && elementNode.textNodes().size() == 1 && elementNode
+ .textNodes().get(0).getWholeText()
+ .startsWith(">"))) {
+ return true;
+ }
+ }
- return "";
+ return false;
+ }
+ });
}
}