Commit | Line | Data |
---|---|---|
31755801 NR |
1 | package be.nikiroo.gofetch.support; |
2 | ||
3 | import java.io.IOException; | |
31755801 | 4 | import java.net.URL; |
3e62b034 | 5 | import java.util.AbstractMap; |
31755801 NR |
6 | import java.util.ArrayList; |
7 | import java.util.List; | |
3e62b034 | 8 | import java.util.Map.Entry; |
31755801 | 9 | |
31755801 NR |
10 | import org.jsoup.nodes.Document; |
11 | import org.jsoup.nodes.Element; | |
12 | import org.jsoup.nodes.Node; | |
31755801 NR |
13 | |
14 | /** | |
15 | * Support <a | |
16 | * href="https://www.erenumerique.fr/">https://www.erenumerique.fr/</a>. | |
17 | * | |
18 | * @author niki | |
19 | */ | |
20 | public class EreNumerique extends BasicSupport { | |
21 | @Override | |
22 | public String getDescription() { | |
23 | return "Ère Numérique.FR: faites le bon choix !"; | |
24 | } | |
25 | ||
26 | @Override | |
3e62b034 NR |
27 | protected List<Entry<URL, String>> getUrls() throws IOException { |
28 | List<Entry<URL, String>> urls = new ArrayList<Entry<URL, String>>(); | |
29 | for (String categ : new String[] { "Informatique" }) { | |
30 | URL url = new URL("https://www.erenumerique.fr/" | |
31 | + categ.toLowerCase()); | |
32 | urls.add(new AbstractMap.SimpleEntry<URL, String>(url, categ)); | |
33 | } | |
31755801 | 34 | |
3e62b034 NR |
35 | return urls; |
36 | } | |
31755801 | 37 | |
3e62b034 NR |
38 | @Override |
39 | protected List<Element> getArticles(Document doc) { | |
40 | return doc.getElementsByClass("item-details"); | |
41 | } | |
31755801 | 42 | |
3e62b034 NR |
43 | @Override |
44 | protected String getArticleId(Document doc, Element article) { | |
45 | return ""; // will use the date | |
46 | } | |
31755801 | 47 | |
3e62b034 NR |
48 | @Override |
49 | protected String getArticleTitle(Document doc, Element article) { | |
50 | Element titleElement = article.getElementsByTag("h2").first(); | |
51 | if (titleElement != null) { | |
52 | return titleElement.text(); | |
53 | } | |
31755801 | 54 | |
3e62b034 NR |
55 | return ""; |
56 | } | |
31755801 | 57 | |
3e62b034 NR |
58 | @Override |
59 | protected String getArticleAuthor(Document doc, Element article) { | |
60 | Element authorElement = article.getElementsByClass( | |
61 | "td-post-author-name").first(); | |
62 | if (authorElement != null) { | |
63 | authorElement = authorElement.getElementsByTag("a").first(); | |
64 | } | |
65 | if (authorElement != null) { | |
66 | return authorElement.text(); | |
31755801 NR |
67 | } |
68 | ||
3e62b034 | 69 | return ""; |
31755801 NR |
70 | } |
71 | ||
72 | @Override | |
3e62b034 NR |
73 | protected String getArticleDate(Document doc, Element article) { |
74 | Element dateElement = article // | |
75 | .getElementsByTag("time").first(); | |
76 | if (dateElement != null) { | |
77 | return dateElement.attr("datetime"); | |
78 | } | |
31755801 | 79 | |
3e62b034 NR |
80 | return ""; |
81 | } | |
31755801 | 82 | |
3e62b034 NR |
83 | @Override |
84 | protected String getArticleCategory(Document doc, Element article, | |
85 | String currentCategory) { | |
86 | return currentCategory; | |
87 | } | |
31755801 | 88 | |
3e62b034 NR |
89 | @Override |
90 | protected String getArticleDetails(Document doc, Element article) { | |
91 | return ""; | |
92 | } | |
31755801 | 93 | |
3e62b034 NR |
94 | @Override |
95 | protected String getArticleIntUrl(Document doc, Element article) { | |
96 | Element urlElement = article.getElementsByTag("a").first(); | |
97 | if (urlElement != null) { | |
98 | return urlElement.absUrl("href"); | |
31755801 | 99 | } |
3e62b034 NR |
100 | |
101 | return ""; | |
31755801 NR |
102 | } |
103 | ||
3e62b034 NR |
104 | @Override |
105 | protected String getArticleExtUrl(Document doc, Element article) { | |
106 | return ""; | |
107 | } | |
108 | ||
109 | @Override | |
110 | protected String getArticleContent(Document doc, Element article) { | |
111 | Element contentElement = article.getElementsByClass("td-excerpt") | |
112 | .first(); | |
113 | if (contentElement != null) { | |
e818d449 | 114 | return getArticleText(contentElement); |
3e62b034 | 115 | } |
31755801 | 116 | |
3e62b034 NR |
117 | return ""; |
118 | } | |
31755801 | 119 | |
3e62b034 NR |
120 | @Override |
121 | protected Element getFullArticle(Document doc) { | |
122 | Element article = doc.getElementsByTag("article").first(); | |
123 | if (article != null) { | |
124 | article = article.getElementsByAttributeValue("itemprop", | |
125 | "articleBody").first(); | |
126 | } | |
127 | ||
128 | return article; | |
129 | } | |
130 | ||
131 | @Override | |
132 | protected List<Element> getFullArticleCommentPosts(Document doc, URL intUrl) { | |
133 | return getSubCommentElements(doc.getElementsByClass("comment-list") | |
134 | .first()); | |
135 | } | |
31755801 | 136 | |
3e62b034 NR |
137 | @Override |
138 | protected ElementProcessor getElementProcessorFullArticle() { | |
139 | return new BasicElementProcessor() { | |
140 | @Override | |
141 | public boolean ignoreNode(Node node) { | |
142 | return node.attr("class").contains("chapo"); | |
143 | } | |
144 | ||
145 | @Override | |
146 | public String isSubtitle(Node node) { | |
147 | if (node instanceof Element) { | |
148 | Element element = (Element) node; | |
149 | if (element.tagName().startsWith("h") | |
150 | && element.tagName().length() == 2) { | |
151 | return element.text(); | |
31755801 NR |
152 | } |
153 | } | |
3e62b034 NR |
154 | return null; |
155 | } | |
156 | }; | |
157 | } | |
158 | ||
159 | @Override | |
160 | protected List<Element> getCommentCommentPosts(Document doc, | |
161 | Element container) { | |
162 | return getSubCommentElements(container.getElementsByClass("children") | |
163 | .first()); | |
164 | } | |
165 | ||
166 | @Override | |
167 | protected String getCommentId(Element post) { | |
168 | Element idE = post.getElementsByTag("a").first(); | |
169 | if (idE != null) { | |
170 | return idE.attr("id"); | |
171 | } | |
172 | ||
173 | return ""; | |
174 | } | |
175 | ||
176 | @Override | |
177 | protected String getCommentAuthor(Element post) { | |
178 | // Since we have no title, we switch with author | |
179 | return ""; | |
180 | } | |
181 | ||
182 | @Override | |
183 | protected String getCommentTitle(Element post) { | |
184 | // Since we have no title, we switch with author | |
185 | Element authorE = post.getElementsByTag("footer").first(); | |
186 | if (authorE != null) { | |
187 | authorE = authorE.getElementsByTag("cite").first(); | |
188 | } | |
189 | if (authorE != null) { | |
190 | return authorE.text(); | |
191 | } | |
192 | ||
193 | return ""; | |
194 | } | |
195 | ||
196 | @Override | |
197 | protected String getCommentDate(Element post) { | |
198 | Element idE = post.getElementsByTag("a").first(); | |
199 | if (idE != null) { | |
200 | Element dateE = idE.getElementsByTag("span").first(); | |
201 | if (dateE != null) { | |
202 | return dateE.attr("data-epoch"); | |
203 | } | |
204 | } | |
31755801 | 205 | |
3e62b034 NR |
206 | return ""; |
207 | } | |
208 | ||
209 | @Override | |
210 | protected Element getCommentContentElement(Element post) { | |
211 | Element contentE = post.getElementsByClass("comment-content").first(); | |
212 | return contentE; | |
213 | } | |
214 | ||
215 | @Override | |
216 | protected ElementProcessor getElementProcessorComment() { | |
217 | return new BasicElementProcessor() { | |
218 | @Override | |
219 | public boolean ignoreNode(Node node) { | |
220 | if (node instanceof Element) { | |
221 | Element el = (Element) node; | |
222 | if ("h4".equals(el.tagName())) { | |
223 | return true; | |
31755801 NR |
224 | } |
225 | } | |
226 | ||
3e62b034 NR |
227 | return false; |
228 | } | |
229 | }; | |
230 | } | |
31755801 | 231 | |
3e62b034 NR |
232 | private List<Element> getSubCommentElements(Element posts) { |
233 | List<Element> commentElements = new ArrayList<Element>(); | |
234 | if (posts != null) { | |
235 | for (Element possibleCommentElement : posts.children()) { | |
236 | if (possibleCommentElement.hasClass("comment")) { | |
237 | commentElements.add(possibleCommentElement); | |
238 | } | |
31755801 NR |
239 | } |
240 | } | |
241 | ||
3e62b034 | 242 | return commentElements; |
31755801 NR |
243 | } |
244 | } |