- URL url = new URL(story.getUrlInternal());
- InputStream in = downloader.open(url);
- try {
- Document doc = DataUtil.load(in, "UTF-8", url.toString());
- Element article = doc.getElementsByTag("article").first();
- if (article != null) {
- article = article.getElementsByAttributeValue("itemprop",
- "articleBody").first();
- }
- if (article != null) {
- for (String line : toLines(article,
- new BasicElementProcessor() {
- @Override
- public boolean ignoreNode(Node node) {
- return node.attr("class").contains("chapo");
- }
-
- @Override
- public String isSubtitle(Node node) {
- if (node instanceof Element) {
- Element element = (Element) node;
- if (element.tagName().startsWith("h")
- && element.tagName().length() == 2) {
- return element.text();
- }
- }
- return null;
- }
- })) {
- fullContent += line + "\n";
- }