package be.nikiroo.gofetch.support;
import java.io.IOException;
import java.net.URL;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.List;
import java.util.Map.Entry;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
/**
* Support http://www.lemonde.fr/.
*
* @author niki
*/
public class LeMonde extends BasicSupport {
@Override
public String getDescription() {
return "Le Monde: Actualités et Infos en France et dans le monde";
}
@Override
protected List> getUrls() throws IOException {
List> urls = new ArrayList>();
for (String topic : new String[] { "International", "Politique",
"Société", "Sciences" }) {
URL url = new URL("http://www.lemonde.fr/"
+ topic.toLowerCase().replace("é", "e") + "/1.html");
urls.add(new AbstractMap.SimpleEntry(url, topic));
}
return urls;
}
@Override
protected List getArticles(Document doc) {
return doc.getElementsByTag("article");
}
@Override
protected String getArticleId(Document doc, Element article) {
return ""; // will use the date
}
@Override
protected String getArticleTitle(Document doc, Element article) {
Element titleElement = article.getElementsByTag("h3").first();
if (titleElement != null) {
return titleElement.text();
}
return "";
}
@Override
protected String getArticleAuthor(Document doc, Element article) {
Element detailsElement = article.getElementsByClass("signature")
.first();
if (detailsElement != null) {
return detailsElement.text();
}
return "";
}
@Override
protected String getArticleDate(Document doc, Element article) {
Element timeElement = article.getElementsByTag("time").first();
if (timeElement != null) {
return timeElement.attr("datetime");
}
return "";
}
@Override
protected String getArticleCategory(Document doc, Element article,
String currentCategory) {
return currentCategory;
}
@Override
protected String getArticleDetails(Document doc, Element article) {
return "";
}
@Override
protected String getArticleIntUrl(Document doc, Element article) {
Element titleElement = article.getElementsByTag("h3").first();
if (titleElement != null) {
Element link = titleElement.getElementsByTag("a").first();
if (link != null) {
return link.absUrl("href");
}
}
return "";
}
@Override
protected String getArticleExtUrl(Document doc, Element article) {
return "";
}
@Override
protected String getArticleContent(Document doc, Element article) {
Element contentElement = article.getElementsByClass("txt3").first();
if (contentElement != null) {
return contentElement.text();
}
return "";
}
@Override
protected Element getFullArticle(Document doc) {
return doc.getElementById("articleBody");
}
@Override
protected List getFullArticleCommentPosts(Document doc, URL intUrl) {
return null;
}
@Override
protected ElementProcessor getElementProcessorFullArticle() {
return new BasicElementProcessor() {
@Override
public boolean ignoreNode(Node node) {
if (node instanceof Element) {
Element element = (Element) node;
if (element.hasClass("lire")) {
return true;
}
}
return false;
}
@Override
public String isSubtitle(Node node) {
if (node instanceof Element) {
Element element = (Element) node;
if (element.hasClass("intertitre")) {
return element.text();
}
}
return null;
}
};
}
// No comment on this site, horrible javascript system
@Override
protected List getCommentCommentPosts(Document doc,
Element container) {
return null;
}
@Override
protected String getCommentId(Element post) {
return null;
}
@Override
protected String getCommentAuthor(Element post) {
return null;
}
@Override
protected String getCommentTitle(Element post) {
return null;
}
@Override
protected String getCommentDate(Element post) {
return null;
}
@Override
protected Element getCommentContentElement(Element post) {
return null;
}
@Override
protected ElementProcessor getElementProcessorComment() {
return null;
}
}