package be.nikiroo.gofetch.support; import java.io.IOException; import java.net.URL; import java.util.AbstractMap; import java.util.ArrayList; import java.util.List; import java.util.Map.Entry; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.nodes.Node; /** * Support https://www.erenumerique.fr/. * * @author niki */ public class EreNumerique extends BasicSupport { @Override public String getDescription() { return "Ère Numérique.FR: faites le bon choix !"; } @Override protected List> getUrls() throws IOException { List> urls = new ArrayList>(); for (String categ : new String[] { "Informatique" }) { URL url = new URL("https://www.erenumerique.fr/" + categ.toLowerCase()); urls.add(new AbstractMap.SimpleEntry(url, categ)); } return urls; } @Override protected List getArticles(Document doc) { return doc.getElementsByClass("item-details"); } @Override protected String getArticleId(Document doc, Element article) { return ""; // will use the date } @Override protected String getArticleTitle(Document doc, Element article) { Element titleElement = article.getElementsByTag("h2").first(); if (titleElement != null) { return titleElement.text(); } return ""; } @Override protected String getArticleAuthor(Document doc, Element article) { Element authorElement = article.getElementsByClass( "td-post-author-name").first(); if (authorElement != null) { authorElement = authorElement.getElementsByTag("a").first(); } if (authorElement != null) { return authorElement.text(); } return ""; } @Override protected String getArticleDate(Document doc, Element article) { Element dateElement = article // .getElementsByTag("time").first(); if (dateElement != null) { return dateElement.attr("datetime"); } return ""; } @Override protected String getArticleCategory(Document doc, Element article, String currentCategory) { return currentCategory; } @Override protected String getArticleDetails(Document doc, Element article) { return ""; } @Override protected String getArticleIntUrl(Document doc, Element article) { Element urlElement = article.getElementsByTag("a").first(); if (urlElement != null) { return urlElement.absUrl("href"); } return ""; } @Override protected String getArticleExtUrl(Document doc, Element article) { return ""; } @Override protected String getArticleContent(Document doc, Element article) { Element contentElement = article.getElementsByClass("td-excerpt") .first(); if (contentElement != null) { return getArticleText(contentElement); } return ""; } @Override protected Element getFullArticle(Document doc) { Element article = doc.getElementsByTag("article").first(); if (article != null) { article = article.getElementsByAttributeValue("itemprop", "articleBody").first(); } return article; } @Override protected List getFullArticleCommentPosts(Document doc, URL intUrl) { return getSubCommentElements(doc.getElementsByClass("comment-list") .first()); } @Override protected ElementProcessor getElementProcessorFullArticle() { return new BasicElementProcessor() { @Override public boolean ignoreNode(Node node) { return node.attr("class").contains("chapo"); } @Override public String isSubtitle(Node node) { if (node instanceof Element) { Element element = (Element) node; if (element.tagName().startsWith("h") && element.tagName().length() == 2) { return element.text(); } } return null; } }; } @Override protected List getCommentCommentPosts(Document doc, Element container) { return getSubCommentElements(container.getElementsByClass("children") .first()); } @Override protected String getCommentId(Element post) { Element idE = post.getElementsByTag("a").first(); if (idE != null) { return idE.attr("id"); } return ""; } @Override protected String getCommentAuthor(Element post) { // Since we have no title, we switch with author return ""; } @Override protected String getCommentTitle(Element post) { // Since we have no title, we switch with author Element authorE = post.getElementsByTag("footer").first(); if (authorE != null) { authorE = authorE.getElementsByTag("cite").first(); } if (authorE != null) { return authorE.text(); } return ""; } @Override protected String getCommentDate(Element post) { Element idE = post.getElementsByTag("a").first(); if (idE != null) { Element dateE = idE.getElementsByTag("span").first(); if (dateE != null) { return dateE.attr("data-epoch"); } } return ""; } @Override protected Element getCommentContentElement(Element post) { Element contentE = post.getElementsByClass("comment-content").first(); return contentE; } @Override protected ElementProcessor getElementProcessorComment() { return new BasicElementProcessor() { @Override public boolean ignoreNode(Node node) { if (node instanceof Element) { Element el = (Element) node; if ("h4".equals(el.tagName())) { return true; } } return false; } }; } private List getSubCommentElements(Element posts) { List commentElements = new ArrayList(); if (posts != null) { for (Element possibleCommentElement : posts.children()) { if (possibleCommentElement.hasClass("comment")) { commentElements.add(possibleCommentElement); } } } return commentElements; } }