Bug fixes + rework of BasicSupport
[gofetch.git] / src / be / nikiroo / gofetch / support / LeMonde.java
index 235f7ee2ce2985738401cb45b6d6e8a1b25ec750..1f7aea7d633eda4ee567f9a981dc0d961e36908d 100644 (file)
@@ -1,19 +1,15 @@
 package be.nikiroo.gofetch.support;
 
 import java.io.IOException;
-import java.io.InputStream;
 import java.net.URL;
+import java.util.AbstractMap;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Map.Entry;
 
-import org.jsoup.helper.DataUtil;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
 import org.jsoup.nodes.Node;
-import org.jsoup.select.Elements;
-
-import be.nikiroo.gofetch.data.Comment;
-import be.nikiroo.gofetch.data.Story;
 
 /**
  * Support <a href="http://www.lemonde.fr/">http://www.lemonde.fr/</a>.
@@ -27,98 +23,171 @@ public class LeMonde extends BasicSupport {
        }
 
        @Override
-       public List<Story> list() throws IOException {
-               List<Story> list = new ArrayList<Story>();
-
-               for (String topic : new String[] { "international", "politique",
-                               "societe", "sciences" }) {
-                       URL url = new URL("http://www.lemonde.fr/" + topic + "/1.html");
-                       InputStream in = downloader.open(url);
-                       Document doc = DataUtil.load(in, "UTF-8", url.toString());
-                       Elements articles = doc.getElementsByTag("article");
-                       for (Element article : articles) {
-                               Elements times = article.getElementsByTag("time");
-                               Elements titleElements = article.getElementsByTag("h3");
-                               Elements contentElements = article.getElementsByClass("txt3");
-                               if (times.size() > 0 && titleElements.size() > 0
-                                               && contentElements.size() > 0) {
-                                       String id = times.get(0).attr("datetime").replace(":", "_")
-                                                       .replace("+", "_");
-                                       String title = titleElements.get(0).text();
-                                       String date = date(titleElements.get(0).text());
-                                       String content = contentElements.get(0).text();
-                                       String intUrl = "";
-                                       String extUrl = "";
-                                       String author = "";
-                                       String details = "";
-
-                                       Elements detailsElements = article
-                                                       .getElementsByClass("signature");
-                                       if (detailsElements.size() > 0) {
-                                               author = detailsElements.get(0).text();
-                                       }
+       protected List<Entry<URL, String>> getUrls() throws IOException {
+               List<Entry<URL, String>> urls = new ArrayList<Entry<URL, String>>();
+               for (String topic : new String[] { "International", "Politique",
+                               "Société", "Sciences" }) {
+                       URL url = new URL("http://www.lemonde.fr/"
+                                       + topic.toLowerCase().replace("é", "e") + "/1.html");
+                       urls.add(new AbstractMap.SimpleEntry<URL, String>(url, topic));
+               }
 
-                                       Elements links = titleElements.get(0).getElementsByTag("a");
-                                       if (links.size() > 0) {
-                                               intUrl = links.get(0).absUrl("href");
-                                               list.add(new Story(getType(), id, title, author, date,
-                                                               topic, details, intUrl, extUrl, content));
-                                       }
-                               }
+               return urls;
+       }
+
+       @Override
+       protected List<Element> getArticles(Document doc) {
+               return doc.getElementsByTag("article");
+       }
+
+       @Override
+       protected String getArticleId(Document doc, Element article) {
+               return ""; // will use the date
+       }
+
+       @Override
+       protected String getArticleTitle(Document doc, Element article) {
+               Element titleElement = article.getElementsByTag("h3").first();
+               if (titleElement != null) {
+                       return titleElement.text();
+               }
+
+               return "";
+       }
+
+       @Override
+       protected String getArticleAuthor(Document doc, Element article) {
+               Element detailsElement = article.getElementsByClass("signature")
+                               .first();
+               if (detailsElement != null) {
+                       return detailsElement.text();
+               }
+
+               return "";
+       }
+
+       @Override
+       protected String getArticleDate(Document doc, Element article) {
+               Element timeElement = article.getElementsByTag("time").first();
+               if (timeElement != null) {
+                       return timeElement.attr("datetime");
+               }
+
+               return "";
+       }
+
+       @Override
+       protected String getArticleCategory(Document doc, Element article,
+                       String currentCategory) {
+               return currentCategory;
+       }
+
+       @Override
+       protected String getArticleDetails(Document doc, Element article) {
+               return "";
+       }
+
+       @Override
+       protected String getArticleIntUrl(Document doc, Element article) {
+               Element titleElement = article.getElementsByTag("h3").first();
+               if (titleElement != null) {
+                       Element link = titleElement.getElementsByTag("a").first();
+                       if (link != null) {
+                               return link.absUrl("href");
                        }
                }
 
-               return list;
+               return "";
        }
 
        @Override
-       public void fetch(Story story) throws IOException {
-               String fullContent = story.getContent();
-               List<Comment> comments = new ArrayList<Comment>();
+       protected String getArticleExtUrl(Document doc, Element article) {
+               return "";
+       }
 
-               // Note: no comments on this site as far as I can see (or maybe with
-               // some javascript, I need to check...)
+       @Override
+       protected String getArticleContent(Document doc, Element article) {
+               Element contentElement = article.getElementsByClass("txt3").first();
+               if (contentElement != null) {
+                       return contentElement.text();
+               }
 
-               URL url = new URL(story.getUrlInternal());
-               InputStream in = downloader.open(url);
-               Document doc = DataUtil.load(in, "UTF-8", url.toString());
-               Element article = doc.getElementById("articleBody");
-               if (article != null) {
-                       for (String line : toLines(article, new BasicElementProcessor() {
-                               @Override
-                               public boolean ignoreNode(Node node) {
-                                       if (node instanceof Element) {
-                                               Element element = (Element) node;
-                                               if (element.hasClass("lire")) {
-                                                       return true;
-                                               }
-                                       }
+               return "";
+       }
+
+       @Override
+       protected Element getFullArticle(Document doc) {
+               return doc.getElementById("articleBody");
+       }
+
+       @Override
+       protected List<Element> getFullArticleCommentPosts(Document doc, URL intUrl) {
+               return null;
+       }
 
-                                       return false;
+       @Override
+       protected ElementProcessor getElementProcessorFullArticle() {
+               return new BasicElementProcessor() {
+                       @Override
+                       public boolean ignoreNode(Node node) {
+                               if (node instanceof Element) {
+                                       Element element = (Element) node;
+                                       if (element.hasClass("lire")) {
+                                               return true;
+                                       }
                                }
 
-                               @Override
-                               public String isSubtitle(Node node) {
-                                       if (node instanceof Element) {
-                                               Element element = (Element) node;
-                                               if (element.hasClass("intertitre")) {
-                                                       return element.text();
-                                               }
+                               return false;
+                       }
+
+                       @Override
+                       public String isSubtitle(Node node) {
+                               if (node instanceof Element) {
+                                       Element element = (Element) node;
+                                       if (element.hasClass("intertitre")) {
+                                               return element.text();
                                        }
-                                       return null;
                                }
-                       })) {
-                               fullContent += line + "\n";
+                               return null;
                        }
+               };
+       }
 
-                       // Content is too tight with a single break per line:
-                       fullContent = fullContent.replace("\n", "\n\n") //
-                                       .replace("\n\n\n\n", "\n\n") //
-                                       .replace("\n\n\n\n", "\n\n") //
-                                       .trim();
-               }
+       // No comment on this site, horrible javascript system
 
-               story.setFullContent(fullContent);
-               story.setComments(comments);
+       @Override
+       protected List<Element> getCommentCommentPosts(Document doc,
+                       Element container) {
+               return null;
+       }
+
+       @Override
+       protected String getCommentId(Element post) {
+               return null;
+       }
+
+       @Override
+       protected String getCommentAuthor(Element post) {
+               return null;
+       }
+
+       @Override
+       protected String getCommentTitle(Element post) {
+               return null;
+       }
+
+       @Override
+       protected String getCommentDate(Element post) {
+               return null;
+       }
+
+       @Override
+       protected Element getCommentContentElement(Element post) {
+               return null;
+       }
+
+       @Override
+       protected ElementProcessor getElementProcessorComment() {
+               return null;
        }
 }