1 package be
.nikiroo
.gofetch
.support
;
3 import java
.io
.IOException
;
5 import java
.util
.AbstractMap
;
6 import java
.util
.ArrayList
;
8 import java
.util
.Map
.Entry
;
10 import org
.jsoup
.nodes
.Document
;
11 import org
.jsoup
.nodes
.Element
;
12 import org
.jsoup
.nodes
.Node
;
16 * href="https://www.erenumerique.fr/">https://www.erenumerique.fr/</a>.
20 public class EreNumerique
extends BasicSupport
{
22 public String
getDescription() {
23 return "Ère Numérique.FR: faites le bon choix !";
27 protected List
<Entry
<URL
, String
>> getUrls() throws IOException
{
28 List
<Entry
<URL
, String
>> urls
= new ArrayList
<Entry
<URL
, String
>>();
29 for (String categ
: new String
[] { "Informatique" }) {
30 URL url
= new URL("https://www.erenumerique.fr/"
31 + categ
.toLowerCase());
32 urls
.add(new AbstractMap
.SimpleEntry
<URL
, String
>(url
, categ
));
39 protected List
<Element
> getArticles(Document doc
) {
40 return doc
.getElementsByClass("item-details");
44 protected String
getArticleId(Document doc
, Element article
) {
45 return ""; // will use the date
49 protected String
getArticleTitle(Document doc
, Element article
) {
50 Element titleElement
= article
.getElementsByTag("h2").first();
51 if (titleElement
!= null) {
52 return titleElement
.text();
59 protected String
getArticleAuthor(Document doc
, Element article
) {
60 Element authorElement
= article
.getElementsByClass(
61 "td-post-author-name").first();
62 if (authorElement
!= null) {
63 authorElement
= authorElement
.getElementsByTag("a").first();
65 if (authorElement
!= null) {
66 return authorElement
.text();
73 protected String
getArticleDate(Document doc
, Element article
) {
74 Element dateElement
= article
//
75 .getElementsByTag("time").first();
76 if (dateElement
!= null) {
77 return dateElement
.attr("datetime");
84 protected String
getArticleCategory(Document doc
, Element article
,
85 String currentCategory
) {
86 return currentCategory
;
90 protected String
getArticleDetails(Document doc
, Element article
) {
95 protected String
getArticleIntUrl(Document doc
, Element article
) {
96 Element urlElement
= article
.getElementsByTag("a").first();
97 if (urlElement
!= null) {
98 return urlElement
.absUrl("href");
105 protected String
getArticleExtUrl(Document doc
, Element article
) {
110 protected String
getArticleContent(Document doc
, Element article
) {
111 Element contentElement
= article
.getElementsByClass("td-excerpt")
113 if (contentElement
!= null) {
114 return getArticleText(contentElement
);
121 protected Element
getFullArticle(Document doc
) {
122 Element article
= doc
.getElementsByTag("article").first();
123 if (article
!= null) {
124 article
= article
.getElementsByAttributeValue("itemprop",
125 "articleBody").first();
132 protected List
<Element
> getFullArticleCommentPosts(Document doc
, URL intUrl
) {
133 return getSubCommentElements(doc
.getElementsByClass("comment-list")
138 protected ElementProcessor
getElementProcessorFullArticle() {
139 return new BasicElementProcessor() {
141 public boolean ignoreNode(Node node
) {
142 return node
.attr("class").contains("chapo");
146 public String
isSubtitle(Node node
) {
147 if (node
instanceof Element
) {
148 Element element
= (Element
) node
;
149 if (element
.tagName().startsWith("h")
150 && element
.tagName().length() == 2) {
151 return element
.text();
160 protected List
<Element
> getCommentCommentPosts(Document doc
,
162 return getSubCommentElements(container
.getElementsByClass("children")
167 protected String
getCommentId(Element post
) {
168 Element idE
= post
.getElementsByTag("a").first();
170 return idE
.attr("id");
177 protected String
getCommentAuthor(Element post
) {
178 // Since we have no title, we switch with author
183 protected String
getCommentTitle(Element post
) {
184 // Since we have no title, we switch with author
185 Element authorE
= post
.getElementsByTag("footer").first();
186 if (authorE
!= null) {
187 authorE
= authorE
.getElementsByTag("cite").first();
189 if (authorE
!= null) {
190 return authorE
.text();
197 protected String
getCommentDate(Element post
) {
198 Element idE
= post
.getElementsByTag("a").first();
200 Element dateE
= idE
.getElementsByTag("span").first();
202 return dateE
.attr("data-epoch");
210 protected Element
getCommentContentElement(Element post
) {
211 Element contentE
= post
.getElementsByClass("comment-content").first();
216 protected ElementProcessor
getElementProcessorComment() {
217 return new BasicElementProcessor() {
219 public boolean ignoreNode(Node node
) {
220 if (node
instanceof Element
) {
221 Element el
= (Element
) node
;
222 if ("h4".equals(el
.tagName())) {
232 private List
<Element
> getSubCommentElements(Element posts
) {
233 List
<Element
> commentElements
= new ArrayList
<Element
>();
235 for (Element possibleCommentElement
: posts
.children()) {
236 if (possibleCommentElement
.hasClass("comment")) {
237 commentElements
.add(possibleCommentElement
);
242 return commentElements
;