List<Story> list = new ArrayList<Story>();
URL url = new URL("https://lwn.net/");
- InputStream in = open(url);
+ InputStream in = downloader.open(url);
Document doc = DataUtil.load(in, "UTF-8", url.toString());
Elements articles = doc.getElementsByClass("pure-u-1");
for (Element article : articles) {
}
body = body.trim();
+ int pos;
+
+ String categ = "";
+ pos = details.indexOf("]");
+ if (pos >= 0) {
+ categ = details.substring(1, pos).trim();
+ }
+
String author = "";
- int pos = details.indexOf(" by ");
+ pos = details.indexOf(" by ");
if (pos >= 0) {
author = details.substring(pos + " by ".length()).trim();
}
pos = details.indexOf(" Posted ");
if (pos >= 0) {
date = details.substring(pos + " Posted ".length()).trim();
+ pos = date.indexOf(" by ");
+ if (pos >= 0) {
+ date = date.substring(0, pos).trim();
+ }
}
+ // We extracted everything from details so...
+ details = "";
+
String id = "";
String intUrl = "";
String extUrl = "";
id = intUrl.replaceAll("[^0-9]", "");
}
- list.add(new Story(getType(), id, title, details, intUrl, extUrl,
- body));
+ list.add(new Story(getType(), id, title, author, date, categ,
+ details, intUrl, extUrl, body));
}
return list;
// Do not try the paid-for stories...
if (!story.getTitle().startsWith("[$]")) {
URL url = new URL(story.getUrlInternal());
- InputStream in = open(url);
+ InputStream in = downloader.open(url);
Document doc = DataUtil.load(in, "UTF-8", url.toString());
Elements fullContentElements = doc
.getElementsByClass("ArticleText");
}
private List<String> toLines(Element element) {
- return toLines(element, new QuoteProcessor() {
+ return toLines(element, new BasicElementProcessor() {
@Override
public String processText(String text) {
while (text.startsWith(">")) { // comments
return false;
}
-
- @Override
- public String manualProcessing(Node node) {
- return null;
- }
});
}
}