X-Git-Url: http://git.nikiroo.be/?a=blobdiff_plain;f=src%2Fbe%2Fnikiroo%2Fgofetch%2Fsupport%2FLeMonde.java;h=235f7ee2ce2985738401cb45b6d6e8a1b25ec750;hb=b9afb12e17825f363d3679fcac75095fb1e9dc6d;hp=4e22b4c0a9c8f6fdc93db8a09984f45b3020594a;hpb=100a839503d23e324d2db3f6d3e47892def3bf81;p=gofetch.git diff --git a/src/be/nikiroo/gofetch/support/LeMonde.java b/src/be/nikiroo/gofetch/support/LeMonde.java index 4e22b4c..235f7ee 100644 --- a/src/be/nikiroo/gofetch/support/LeMonde.java +++ b/src/be/nikiroo/gofetch/support/LeMonde.java @@ -15,6 +15,11 @@ import org.jsoup.select.Elements; import be.nikiroo.gofetch.data.Comment; import be.nikiroo.gofetch.data.Story; +/** + * Support http://www.lemonde.fr/. + * + * @author niki + */ public class LeMonde extends BasicSupport { @Override public String getDescription() { @@ -28,7 +33,7 @@ public class LeMonde extends BasicSupport { for (String topic : new String[] { "international", "politique", "societe", "sciences" }) { URL url = new URL("http://www.lemonde.fr/" + topic + "/1.html"); - InputStream in = open(url); + InputStream in = downloader.open(url); Document doc = DataUtil.load(in, "UTF-8", url.toString()); Elements articles = doc.getElementsByTag("article"); for (Element article : articles) { @@ -37,25 +42,27 @@ public class LeMonde extends BasicSupport { Elements contentElements = article.getElementsByClass("txt3"); if (times.size() > 0 && titleElements.size() > 0 && contentElements.size() > 0) { - String id = times.get(0).attr("datetime").replace(":", "_"); - String title = "[" + topic + "] " - + titleElements.get(0).text(); + String id = times.get(0).attr("datetime").replace(":", "_") + .replace("+", "_"); + String title = titleElements.get(0).text(); + String date = date(titleElements.get(0).text()); String content = contentElements.get(0).text(); String intUrl = ""; String extUrl = ""; + String author = ""; String details = ""; Elements detailsElements = article .getElementsByClass("signature"); if (detailsElements.size() > 0) { - details = detailsElements.get(0).text(); + author = detailsElements.get(0).text(); } Elements links = titleElements.get(0).getElementsByTag("a"); if (links.size() > 0) { intUrl = links.get(0).absUrl("href"); - list.add(new Story(getType(), id, title, details, - intUrl, extUrl, content)); + list.add(new Story(getType(), id, title, author, date, + topic, details, intUrl, extUrl, content)); } } } @@ -73,16 +80,11 @@ public class LeMonde extends BasicSupport { // some javascript, I need to check...) URL url = new URL(story.getUrlInternal()); - InputStream in = open(url); + InputStream in = downloader.open(url); Document doc = DataUtil.load(in, "UTF-8", url.toString()); Element article = doc.getElementById("articleBody"); if (article != null) { - for (String line : toLines(article, new QuoteProcessor() { - @Override - public String processText(String text) { - return text; - } - + for (String line : toLines(article, new BasicElementProcessor() { @Override public boolean ignoreNode(Node node) { if (node instanceof Element) { @@ -96,16 +98,11 @@ public class LeMonde extends BasicSupport { } @Override - public boolean detectQuote(Node node) { - return false; - } - - @Override - public String manualProcessing(Node node) { + public String isSubtitle(Node node) { if (node instanceof Element) { Element element = (Element) node; if (element.hasClass("intertitre")) { - return "\n[ " + element.text() + " ]\n"; + return element.text(); } } return null;