From: Niki Roo <niki@nikiroo.be>
Date: Fri, 23 Mar 2018 22:22:56 +0000 (+0100)
Subject: New website supported: Ère Numérique FR
X-Git-Url: https://git.nikiroo.be/?a=commitdiff_plain;ds=sidebyside;h=31755801cf5c2e2502e1222928d56c11f64dbdfe;p=gofetch.git

New website supported: Ère Numérique FR
---

diff --git a/src/be/nikiroo/gofetch/support/BasicSupport.java b/src/be/nikiroo/gofetch/support/BasicSupport.java
index 6d930f6..b0325b3 100644
--- a/src/be/nikiroo/gofetch/support/BasicSupport.java
+++ b/src/be/nikiroo/gofetch/support/BasicSupport.java
@@ -45,6 +45,8 @@ public abstract class BasicSupport {
 		REGISTER,
 		/** FR: Linux */
 		TOO_LINUX,
+		/** FR: IT */
+		ERE_NUMERIQUE,
 	}
 
 	/**
@@ -238,6 +240,9 @@ public abstract class BasicSupport {
 			case TOO_LINUX:
 				support = new TooLinux();
 				break;
+			case ERE_NUMERIQUE:
+				support = new EreNumerique();
+				break;
 			}
 
 			if (support != null) {
diff --git a/src/be/nikiroo/gofetch/support/EreNumerique.java b/src/be/nikiroo/gofetch/support/EreNumerique.java
new file mode 100644
index 0000000..bef677d
--- /dev/null
+++ b/src/be/nikiroo/gofetch/support/EreNumerique.java
@@ -0,0 +1,196 @@
+package be.nikiroo.gofetch.support;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.jsoup.helper.DataUtil;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.nodes.Node;
+import org.jsoup.select.Elements;
+
+import be.nikiroo.gofetch.data.Comment;
+import be.nikiroo.gofetch.data.Story;
+import be.nikiroo.utils.StringUtils;
+
+/**
+ * Support <a
+ * href="https://www.erenumerique.fr/">https://www.erenumerique.fr/</a>.
+ * 
+ * @author niki
+ */
+public class EreNumerique extends BasicSupport {
+	@Override
+	public String getDescription() {
+		return "Ãre NumÃ©rique.FR: faites le bon choix !";
+	}
+
+	@Override
+	public List<Story> list() throws IOException {
+		List<Story> list = new ArrayList<Story>();
+
+		for (String categ : new String[] { "informatique" }) {
+			URL url = new URL("https://www.erenumerique.fr/" + categ);
+			InputStream in = downloader.open(url);
+			Document doc = DataUtil.load(in, "UTF-8", url.toString());
+			Elements articles = doc.getElementsByClass("item-details");
+			for (Element article : articles) {
+				String id = "";
+				String intUrl = "";
+				String extUrl = ""; // nope
+				String title = "";
+				String date = "";
+				String author = "";
+				String details = "";
+				String body = "";
+
+				// MUST NOT fail:
+				Element dateElement = article //
+						.getElementsByTag("time").first();
+				if (dateElement == null) {
+					continue;
+				}
+
+				Element urlElement = article.getElementsByTag("a").first();
+				if (urlElement != null) {
+					intUrl = urlElement.absUrl("href");
+				}
+
+				id = dateElement.attr("datetime").replace(":", "_")
+						.replace("+", "_");
+				date = date(dateElement.attr("datetime"));
+
+				Element titleElement = article.getElementsByTag("h2").first();
+				if (titleElement != null) {
+					title = StringUtils.unhtml(titleElement.text()).trim();
+				}
+
+				Element authorElement = article.getElementsByClass(
+						"td-post-author-name").first();
+				if (authorElement != null) {
+					authorElement = authorElement.getElementsByTag("a").first();
+				}
+				if (authorElement != null) {
+					author = StringUtils.unhtml(authorElement.text()).trim();
+				}
+
+				Element contentElement = article.getElementsByClass(
+						"td-excerpt").first();
+				if (contentElement != null) {
+					body = StringUtils.unhtml(contentElement.text()).trim();
+				}
+
+				list.add(new Story(getType(), id, title, author, date, categ,
+						details, intUrl, extUrl, body));
+			}
+		}
+
+		return list;
+	}
+
+	@Override
+	public void fetch(Story story) throws IOException {
+		String fullContent = story.getContent();
+
+		URL url = new URL(story.getUrlInternal());
+		InputStream in = downloader.open(url);
+		try {
+			Document doc = DataUtil.load(in, "UTF-8", url.toString());
+			Element article = doc.getElementsByTag("article").first();
+			if (article != null) {
+				for (String line : toLines(article,
+						new BasicElementProcessor() {
+							// TODO: ignore headlines/pub
+						})) {
+					fullContent += line + "\n";
+				}
+
+				// Content is too tight with a single break per line:
+				fullContent = fullContent.replace("\n", "\n\n") //
+						.replace("\n\n\n\n", "\n\n") //
+						.replace("\n\n\n\n", "\n\n") //
+						.trim();
+			}
+
+			// Get comments URL then parse it, if possible
+			Element posts = doc.getElementsByClass("comment-list").first();
+
+			story.setFullContent(fullContent);
+			story.setComments(getComments(posts));
+		} finally {
+			if (in != null) {
+				in.close();
+			}
+		}
+	}
+
+	private List<Comment> getComments(Element posts) {
+		List<Comment> comments = new ArrayList<Comment>();
+		if (posts != null) {
+			for (Element post : posts.children()) {
+				if (!post.hasClass("comment")) {
+					continue;
+				}
+
+				String id = "";
+				String author = "";
+				String title = "";
+				String date = "";
+				List<String> content = new ArrayList<String>();
+
+				Element authorE = post.getElementsByTag("footer").first();
+				if (authorE != null) {
+					authorE = authorE.getElementsByTag("cite").first();
+				}
+				if (authorE != null) {
+					author = StringUtils.unhtml(authorE.text()).trim();
+				}
+
+				Element idE = post.getElementsByTag("a").first();
+				if (idE != null) {
+					id = idE.attr("id");
+					Element dateE = idE.getElementsByTag("span").first();
+					if (dateE != null) {
+						date = date(dateE.attr("data-epoch"));
+					}
+				}
+
+				Element contentE = post.getElementsByClass("comment-content")
+						.first();
+				if (contentE != null) {
+					for (String line : toLines(contentE,
+							new BasicElementProcessor() {
+								@Override
+								public boolean ignoreNode(Node node) {
+									// TODO: ignore headlines/pub
+									if (node instanceof Element) {
+										Element el = (Element) node;
+										if ("h4".equals(el.tagName())) {
+											return true;
+										}
+									}
+
+									return false;
+								}
+							})) {
+						content.add(line);
+					}
+				}
+
+				// Since we have no title but still an author, let's switch:
+				title = author;
+				author = "";
+				Comment comment = new Comment(id, author, title, date, content);
+				comments.add(comment);
+
+				Element children = post.getElementsByClass("children").first();
+				comment.addAll(getComments(children));
+			}
+		}
+
+		return comments;
+	}
+}