From: Niki Roo Date: Sun, 25 Mar 2018 12:37:48 +0000 (+0200) Subject: Fix subtitles and too much content in EreNumerique X-Git-Url: https://git.nikiroo.be/?a=commitdiff_plain;h=b9afb12e17825f363d3679fcac75095fb1e9dc6d;p=gofetch.git Fix subtitles and too much content in EreNumerique --- diff --git a/src/be/nikiroo/gofetch/support/BasicSupport.java b/src/be/nikiroo/gofetch/support/BasicSupport.java index 8fc259a..b15fac7 100644 --- a/src/be/nikiroo/gofetch/support/BasicSupport.java +++ b/src/be/nikiroo/gofetch/support/BasicSupport.java @@ -100,6 +100,17 @@ public abstract class BasicSupport { * the usual automatic processing if not NULL */ public String manualProcessing(Node node); + + /** + * This {@link Node} is a subtitle and should be treated as such + * (highlighted). + * + * @param node + * the node to check + * + * @return NULL if it is not a subtitle, the subtitle to use if it is + */ + public String isSubtitle(Node node); } /** @@ -128,6 +139,11 @@ public abstract class BasicSupport { public String manualProcessing(Node node) { return null; } + + @Override + public String isSubtitle(Node node) { + return null; + } } static private String preselector; @@ -334,6 +350,7 @@ public abstract class BasicSupport { String manual = null; boolean ignore = elementProcessor.ignoreNode(node) || ignoredNodes.contains(node.parentNode()); + // Manual processing if (!ignore) { manual = elementProcessor.manualProcessing(node); if (manual != null) { @@ -342,6 +359,16 @@ public abstract class BasicSupport { } } + // Subtitle check + if (!ignore) { + String subtitle = elementProcessor.isSubtitle(node); + if (subtitle != null) { + subtitle = subtitle.trim(); + currentLine.append("\n[ " + subtitle + " ]\n"); + ignore = true; + } + } + if (ignore) { ignoredNodes.add(node); return; diff --git a/src/be/nikiroo/gofetch/support/EreNumerique.java b/src/be/nikiroo/gofetch/support/EreNumerique.java index bef677d..b6a7598 100644 --- a/src/be/nikiroo/gofetch/support/EreNumerique.java +++ b/src/be/nikiroo/gofetch/support/EreNumerique.java @@ -100,10 +100,29 @@ public class EreNumerique extends BasicSupport { try { Document doc = DataUtil.load(in, "UTF-8", url.toString()); Element article = doc.getElementsByTag("article").first(); + if (article != null) { + article = article.getElementsByAttributeValue("itemprop", + "articleBody").first(); + } if (article != null) { for (String line : toLines(article, new BasicElementProcessor() { - // TODO: ignore headlines/pub + @Override + public boolean ignoreNode(Node node) { + return node.attr("class").contains("chapo"); + } + + @Override + public String isSubtitle(Node node) { + if (node instanceof Element) { + Element element = (Element) node; + if (element.tagName().startsWith("h") + && element.tagName().length() == 2) { + return element.text(); + } + } + return null; + } })) { fullContent += line + "\n"; } diff --git a/src/be/nikiroo/gofetch/support/LeMonde.java b/src/be/nikiroo/gofetch/support/LeMonde.java index 4ec2c30..235f7ee 100644 --- a/src/be/nikiroo/gofetch/support/LeMonde.java +++ b/src/be/nikiroo/gofetch/support/LeMonde.java @@ -98,11 +98,11 @@ public class LeMonde extends BasicSupport { } @Override - public String manualProcessing(Node node) { + public String isSubtitle(Node node) { if (node instanceof Element) { Element element = (Element) node; if (element.hasClass("intertitre")) { - return "\n[ " + element.text() + " ]\n"; + return element.text(); } } return null;