Fix subtitles and too much content in EreNumerique
[gofetch.git] / src / be / nikiroo / gofetch / support / EreNumerique.java
index bef677d9f8a75a3ec3c8faa9e802e05004719e22..b6a7598027c9b632cb52fb50f22677e4a1a314b4 100644 (file)
@@ -100,10 +100,29 @@ public class EreNumerique extends BasicSupport {
                try {
                        Document doc = DataUtil.load(in, "UTF-8", url.toString());
                        Element article = doc.getElementsByTag("article").first();
+                       if (article != null) {
+                               article = article.getElementsByAttributeValue("itemprop",
+                                               "articleBody").first();
+                       }
                        if (article != null) {
                                for (String line : toLines(article,
                                                new BasicElementProcessor() {
-                                                       // TODO: ignore headlines/pub
+                                                       @Override
+                                                       public boolean ignoreNode(Node node) {
+                                                               return node.attr("class").contains("chapo");
+                                                       }
+
+                                                       @Override
+                                                       public String isSubtitle(Node node) {
+                                                               if (node instanceof Element) {
+                                                                       Element element = (Element) node;
+                                                                       if (element.tagName().startsWith("h")
+                                                                                       && element.tagName().length() == 2) {
+                                                                               return element.text();
+                                                                       }
+                                                               }
+                                                               return null;
+                                                       }
                                                })) {
                                        fullContent += line + "\n";
                                }