import org.jsoup.helper.DataUtil;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
+import org.jsoup.nodes.Node;
import org.jsoup.select.Elements;
import be.nikiroo.gofetch.data.Comment;
List<Story> list = new ArrayList<Story>();
URL url = new URL("https://pipedot.org/");
- InputStream in = open(url);
+ InputStream in = downloader.open(url);
Document doc = DataUtil.load(in, "UTF-8", url.toString());
- Elements stories = doc.getElementsByClass("story");
- for (Element story : stories) {
- Elements titles = story.getElementsByTag("h1");
+ Elements articles = doc.getElementsByClass("story");
+ for (Element article : articles) {
+ Elements titles = article.getElementsByTag("h1");
if (titles.size() == 0) {
continue;
}
Element title = titles.get(0);
String id = "";
- for (Element idElem : story.getElementsByTag("a")) {
+ for (Element idElem : article.getElementsByTag("a")) {
if (idElem.attr("href").startsWith("/pipe/")) {
id = idElem.attr("href").substring("/pipe/".length());
break;
String intUrl = null;
String extUrl = null;
- Elements links = story.getElementsByTag("a");
+ Elements links = article.getElementsByTag("a");
if (links.size() > 0) {
intUrl = links.get(0).absUrl("href");
}
}
String details = "";
- Elements detailsElements = story.getElementsByTag("div");
+ Elements detailsElements = article.getElementsByTag("div");
if (detailsElements.size() > 0) {
details = detailsElements.get(0).text();
}
String body = "";
- for (Element elem : story.children()) {
+ for (Element elem : article.children()) {
String tag = elem.tag().toString();
if (!tag.equals("header") && !tag.equals("footer")) {
body = elem.text();
List<Comment> comments = new ArrayList<Comment>();
URL url = new URL(story.getUrlInternal());
- InputStream in = open(url);
+ InputStream in = downloader.open(url);
Document doc = DataUtil.load(in, "UTF-8", url.toString());
Elements listing = doc.getElementsByTag("main");
if (listing.size() > 0) {
}
private Comment getComment(Element commentElement) {
- String title = firstOrEmptyTag(commentElement, "h3");
- String author = firstOrEmpty(commentElement, "h4");
- String content = firstOrEmpty(commentElement, "comment-body");
+ String title = firstOrEmptyTag(commentElement, "h3").text();
+ String author = firstOrEmpty(commentElement, "h4").text();
+ Element content = firstOrEmpty(commentElement, "comment-body");
String date = "";
int pos = author.lastIndexOf(" on ");
}
Comment comment = new Comment(commentElement.id(), author, title, date,
- content);
+ toLines(content));
Elements commentOutline = commentElement
.getElementsByClass("comment-outline");
return comment;
}
- /**
- * Get the first element of the given class, or an empty {@link String} if
- * none found.
- *
- * @param element
- * the element to look in
- * @param className
- * the class to look for
- *
- * @return the value or an empty {@link String}
- */
- private String firstOrEmpty(Element element, String className) {
- Elements subElements = element.getElementsByClass(className);
- if (subElements.size() > 0) {
- return subElements.get(0).text();
- }
-
- return "";
- }
-
- /**
- * Get the first element of the given tag, or an empty {@link String} if
- * none found.
- *
- * @param element
- * the element to look in
- * @param tagName
- * the tag to look for
- *
- * @return the value or an empty {@link String}
- */
- private String firstOrEmptyTag(Element element, String tagName) {
- Elements subElements = element.getElementsByTag(tagName);
- if (subElements.size() > 0) {
- return subElements.get(0).text();
- }
+ private List<String> toLines(Element element) {
+ return toLines(element, new BasicElementProcessor() {
+ @Override
+ public boolean detectQuote(Node node) {
+ if (node instanceof Element) {
+ Element elementNode = (Element) node;
+ if (elementNode.tagName().equals("blockquote")
+ || elementNode.hasClass("quote")) {
+ return true;
+ }
+ }
- return "";
+ return false;
+ }
+ });
}
}