List<Story> list = new ArrayList<Story>();
URL url = new URL("https://pipedot.org/");
- InputStream in = open(url);
+ InputStream in = downloader.open(url);
Document doc = DataUtil.load(in, "UTF-8", url.toString());
- Elements stories = doc.getElementsByClass("story");
- for (Element story : stories) {
- Elements titles = story.getElementsByTag("h1");
+ Elements articles = doc.getElementsByClass("story");
+ for (Element article : articles) {
+ Elements titles = article.getElementsByTag("h1");
if (titles.size() == 0) {
continue;
}
Element title = titles.get(0);
String id = "";
- for (Element idElem : story.getElementsByTag("a")) {
+ for (Element idElem : article.getElementsByTag("a")) {
if (idElem.attr("href").startsWith("/pipe/")) {
id = idElem.attr("href").substring("/pipe/".length());
break;
String intUrl = null;
String extUrl = null;
- Elements links = story.getElementsByTag("a");
+ Elements links = article.getElementsByTag("a");
if (links.size() > 0) {
intUrl = links.get(0).absUrl("href");
}
}
String details = "";
- Elements detailsElements = story.getElementsByTag("div");
+ Elements detailsElements = article.getElementsByTag("div");
if (detailsElements.size() > 0) {
- details = detailsElements.get(0).text();
+ details = detailsElements.get(0).text().trim();
}
+ String author = "";
+ int pos = details.indexOf("by ");
+ if (pos >= 0) {
+ author = details.substring(pos + "by ".length()).trim();
+ pos = author.indexOf(" in ");
+ if (pos >= 0) {
+ author = author.substring(0, pos).trim();
+ }
+ }
+
+ String categ = "";
+ pos = details.indexOf(" in ");
+ if (pos >= 0) {
+ categ = details.substring(pos + " in ".length()).trim();
+ pos = categ.indexOf(" on ");
+ if (pos >= 0) {
+ categ = categ.substring(0, pos).trim();
+ }
+ }
+
+ String date = "";
+ Element dateElement = article.getElementsByTag("time").first();
+ if (dateElement != null) {
+ date = date(dateElement.attr("datetime"));
+ }
+
+ // We already have all the details (date, author, id, categ)
+ details = "";
+
String body = "";
- for (Element elem : story.children()) {
+ for (Element elem : article.children()) {
String tag = elem.tag().toString();
if (!tag.equals("header") && !tag.equals("footer")) {
body = elem.text();
}
}
- list.add(new Story(getType(), id, title.text(), details, intUrl,
- extUrl, body));
+ list.add(new Story(getType(), id, title.text(), author, date,
+ categ, details, intUrl, extUrl, body));
}
return list;
List<Comment> comments = new ArrayList<Comment>();
URL url = new URL(story.getUrlInternal());
- InputStream in = open(url);
+ InputStream in = downloader.open(url);
Document doc = DataUtil.load(in, "UTF-8", url.toString());
Elements listing = doc.getElementsByTag("main");
if (listing.size() > 0) {
}
private List<String> toLines(Element element) {
- return toLines(element, new QuoteProcessor() {
- @Override
- public String processText(String text) {
- return text;
- }
-
+ return toLines(element, new BasicElementProcessor() {
@Override
public boolean detectQuote(Node node) {
if (node instanceof Element) {
return false;
}
-
- @Override
- public boolean ignoreNode(Node node) {
- return false;
- }
});
}
}