From b9afb12e17825f363d3679fcac75095fb1e9dc6d Mon Sep 17 00:00:00 2001 From: Niki Roo Date: Sun, 25 Mar 2018 14:37:48 +0200 Subject: [PATCH] Fix subtitles and too much content in EreNumerique --- .../nikiroo/gofetch/support/BasicSupport.java | 27 +++++++++++++++++++ .../nikiroo/gofetch/support/EreNumerique.java | 21 ++++++++++++++- src/be/nikiroo/gofetch/support/LeMonde.java | 4 +-- 3 files changed, 49 insertions(+), 3 deletions(-) diff --git a/src/be/nikiroo/gofetch/support/BasicSupport.java b/src/be/nikiroo/gofetch/support/BasicSupport.java index 8fc259a..b15fac7 100644 --- a/src/be/nikiroo/gofetch/support/BasicSupport.java +++ b/src/be/nikiroo/gofetch/support/BasicSupport.java @@ -100,6 +100,17 @@ public abstract class BasicSupport { * the usual automatic processing if not NULL */ public String manualProcessing(Node node); + + /** + * This {@link Node} is a subtitle and should be treated as such + * (highlighted). + * + * @param node + * the node to check + * + * @return NULL if it is not a subtitle, the subtitle to use if it is + */ + public String isSubtitle(Node node); } /** @@ -128,6 +139,11 @@ public abstract class BasicSupport { public String manualProcessing(Node node) { return null; } + + @Override + public String isSubtitle(Node node) { + return null; + } } static private String preselector; @@ -334,6 +350,7 @@ public abstract class BasicSupport { String manual = null; boolean ignore = elementProcessor.ignoreNode(node) || ignoredNodes.contains(node.parentNode()); + // Manual processing if (!ignore) { manual = elementProcessor.manualProcessing(node); if (manual != null) { @@ -342,6 +359,16 @@ public abstract class BasicSupport { } } + // Subtitle check + if (!ignore) { + String subtitle = elementProcessor.isSubtitle(node); + if (subtitle != null) { + subtitle = subtitle.trim(); + currentLine.append("\n[ " + subtitle + " ]\n"); + ignore = true; + } + } + if (ignore) { ignoredNodes.add(node); return; diff --git a/src/be/nikiroo/gofetch/support/EreNumerique.java b/src/be/nikiroo/gofetch/support/EreNumerique.java index bef677d..b6a7598 100644 --- a/src/be/nikiroo/gofetch/support/EreNumerique.java +++ b/src/be/nikiroo/gofetch/support/EreNumerique.java @@ -100,10 +100,29 @@ public class EreNumerique extends BasicSupport { try { Document doc = DataUtil.load(in, "UTF-8", url.toString()); Element article = doc.getElementsByTag("article").first(); + if (article != null) { + article = article.getElementsByAttributeValue("itemprop", + "articleBody").first(); + } if (article != null) { for (String line : toLines(article, new BasicElementProcessor() { - // TODO: ignore headlines/pub + @Override + public boolean ignoreNode(Node node) { + return node.attr("class").contains("chapo"); + } + + @Override + public String isSubtitle(Node node) { + if (node instanceof Element) { + Element element = (Element) node; + if (element.tagName().startsWith("h") + && element.tagName().length() == 2) { + return element.text(); + } + } + return null; + } })) { fullContent += line + "\n"; } diff --git a/src/be/nikiroo/gofetch/support/LeMonde.java b/src/be/nikiroo/gofetch/support/LeMonde.java index 4ec2c30..235f7ee 100644 --- a/src/be/nikiroo/gofetch/support/LeMonde.java +++ b/src/be/nikiroo/gofetch/support/LeMonde.java @@ -98,11 +98,11 @@ public class LeMonde extends BasicSupport { } @Override - public String manualProcessing(Node node) { + public String isSubtitle(Node node) { if (node instanceof Element) { Element element = (Element) node; if (element.hasClass("intertitre")) { - return "\n[ " + element.text() + " ]\n"; + return element.text(); } } return null; -- 2.27.0