@Override
protected String getArticleId(Document doc, Element article) {
- String id = getArticleIntUrl(doc, article).replaceAll("[^0-9]", "");
- while (id.length() < 10) {
- id = "0" + id;
- }
-
- return id;
+ return getArticleIntUrl(doc, article).replaceAll("[^0-9]", "");
}
@Override
// All but the first and two last children
for (int i = 1; i < listing.children().size() - 2; i++) {
Element e = listing.children().get(i);
- content = content.trim() + " " + e.text().trim();
+ content = content.trim() + " " + getArticleText(e);
}
return content;