package be.nikiroo.gofetch.support;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import org.jsoup.helper.DataUtil;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.select.Elements;
import be.nikiroo.gofetch.data.Comment;
import be.nikiroo.gofetch.data.Story;
import be.nikiroo.utils.StringUtils;
/**
* Support https://www.erenumerique.fr/.
*
* @author niki
*/
public class EreNumerique extends BasicSupport {
@Override
public String getDescription() {
return "Ère Numérique.FR: faites le bon choix !";
}
@Override
public List list() throws IOException {
List list = new ArrayList();
for (String categ : new String[] { "informatique" }) {
URL url = new URL("https://www.erenumerique.fr/" + categ);
InputStream in = downloader.open(url);
Document doc = DataUtil.load(in, "UTF-8", url.toString());
Elements articles = doc.getElementsByClass("item-details");
for (Element article : articles) {
String id = "";
String intUrl = "";
String extUrl = ""; // nope
String title = "";
String date = "";
String author = "";
String details = "";
String body = "";
// MUST NOT fail:
Element dateElement = article //
.getElementsByTag("time").first();
if (dateElement == null) {
continue;
}
Element urlElement = article.getElementsByTag("a").first();
if (urlElement != null) {
intUrl = urlElement.absUrl("href");
}
id = dateElement.attr("datetime").replace(":", "_")
.replace("+", "_");
date = date(dateElement.attr("datetime"));
Element titleElement = article.getElementsByTag("h2").first();
if (titleElement != null) {
title = StringUtils.unhtml(titleElement.text()).trim();
}
Element authorElement = article.getElementsByClass(
"td-post-author-name").first();
if (authorElement != null) {
authorElement = authorElement.getElementsByTag("a").first();
}
if (authorElement != null) {
author = StringUtils.unhtml(authorElement.text()).trim();
}
Element contentElement = article.getElementsByClass(
"td-excerpt").first();
if (contentElement != null) {
body = StringUtils.unhtml(contentElement.text()).trim();
}
list.add(new Story(getType(), id, title, author, date, categ,
details, intUrl, extUrl, body));
}
}
return list;
}
@Override
public void fetch(Story story) throws IOException {
String fullContent = story.getContent();
URL url = new URL(story.getUrlInternal());
InputStream in = downloader.open(url);
try {
Document doc = DataUtil.load(in, "UTF-8", url.toString());
Element article = doc.getElementsByTag("article").first();
if (article != null) {
for (String line : toLines(article,
new BasicElementProcessor() {
// TODO: ignore headlines/pub
})) {
fullContent += line + "\n";
}
// Content is too tight with a single break per line:
fullContent = fullContent.replace("\n", "\n\n") //
.replace("\n\n\n\n", "\n\n") //
.replace("\n\n\n\n", "\n\n") //
.trim();
}
// Get comments URL then parse it, if possible
Element posts = doc.getElementsByClass("comment-list").first();
story.setFullContent(fullContent);
story.setComments(getComments(posts));
} finally {
if (in != null) {
in.close();
}
}
}
private List getComments(Element posts) {
List comments = new ArrayList();
if (posts != null) {
for (Element post : posts.children()) {
if (!post.hasClass("comment")) {
continue;
}
String id = "";
String author = "";
String title = "";
String date = "";
List content = new ArrayList();
Element authorE = post.getElementsByTag("footer").first();
if (authorE != null) {
authorE = authorE.getElementsByTag("cite").first();
}
if (authorE != null) {
author = StringUtils.unhtml(authorE.text()).trim();
}
Element idE = post.getElementsByTag("a").first();
if (idE != null) {
id = idE.attr("id");
Element dateE = idE.getElementsByTag("span").first();
if (dateE != null) {
date = date(dateE.attr("data-epoch"));
}
}
Element contentE = post.getElementsByClass("comment-content")
.first();
if (contentE != null) {
for (String line : toLines(contentE,
new BasicElementProcessor() {
@Override
public boolean ignoreNode(Node node) {
// TODO: ignore headlines/pub
if (node instanceof Element) {
Element el = (Element) node;
if ("h4".equals(el.tagName())) {
return true;
}
}
return false;
}
})) {
content.add(line);
}
}
// Since we have no title but still an author, let's switch:
title = author;
author = "";
Comment comment = new Comment(id, author, title, date, content);
comments.add(comment);
Element children = post.getElementsByClass("children").first();
comment.addAll(getComments(children));
}
}
return comments;
}
}