1f7aea7d633eda4ee567f9a981dc0d961e36908d
1 package be
.nikiroo
.gofetch
.support
;
3 import java
.io
.IOException
;
5 import java
.util
.AbstractMap
;
6 import java
.util
.ArrayList
;
8 import java
.util
.Map
.Entry
;
10 import org
.jsoup
.nodes
.Document
;
11 import org
.jsoup
.nodes
.Element
;
12 import org
.jsoup
.nodes
.Node
;
15 * Support <a href="http://www.lemonde.fr/">http://www.lemonde.fr/</a>.
19 public class LeMonde
extends BasicSupport
{
21 public String
getDescription() {
22 return "Le Monde: Actualités et Infos en France et dans le monde";
26 protected List
<Entry
<URL
, String
>> getUrls() throws IOException
{
27 List
<Entry
<URL
, String
>> urls
= new ArrayList
<Entry
<URL
, String
>>();
28 for (String topic
: new String
[] { "International", "Politique",
29 "Société", "Sciences" }) {
30 URL url
= new URL("http://www.lemonde.fr/"
31 + topic
.toLowerCase().replace("é", "e") + "/1.html");
32 urls
.add(new AbstractMap
.SimpleEntry
<URL
, String
>(url
, topic
));
39 protected List
<Element
> getArticles(Document doc
) {
40 return doc
.getElementsByTag("article");
44 protected String
getArticleId(Document doc
, Element article
) {
45 return ""; // will use the date
49 protected String
getArticleTitle(Document doc
, Element article
) {
50 Element titleElement
= article
.getElementsByTag("h3").first();
51 if (titleElement
!= null) {
52 return titleElement
.text();
59 protected String
getArticleAuthor(Document doc
, Element article
) {
60 Element detailsElement
= article
.getElementsByClass("signature")
62 if (detailsElement
!= null) {
63 return detailsElement
.text();
70 protected String
getArticleDate(Document doc
, Element article
) {
71 Element timeElement
= article
.getElementsByTag("time").first();
72 if (timeElement
!= null) {
73 return timeElement
.attr("datetime");
80 protected String
getArticleCategory(Document doc
, Element article
,
81 String currentCategory
) {
82 return currentCategory
;
86 protected String
getArticleDetails(Document doc
, Element article
) {
91 protected String
getArticleIntUrl(Document doc
, Element article
) {
92 Element titleElement
= article
.getElementsByTag("h3").first();
93 if (titleElement
!= null) {
94 Element link
= titleElement
.getElementsByTag("a").first();
96 return link
.absUrl("href");
104 protected String
getArticleExtUrl(Document doc
, Element article
) {
109 protected String
getArticleContent(Document doc
, Element article
) {
110 Element contentElement
= article
.getElementsByClass("txt3").first();
111 if (contentElement
!= null) {
112 return contentElement
.text();
119 protected Element
getFullArticle(Document doc
) {
120 return doc
.getElementById("articleBody");
124 protected List
<Element
> getFullArticleCommentPosts(Document doc
, URL intUrl
) {
129 protected ElementProcessor
getElementProcessorFullArticle() {
130 return new BasicElementProcessor() {
132 public boolean ignoreNode(Node node
) {
133 if (node
instanceof Element
) {
134 Element element
= (Element
) node
;
135 if (element
.hasClass("lire")) {
144 public String
isSubtitle(Node node
) {
145 if (node
instanceof Element
) {
146 Element element
= (Element
) node
;
147 if (element
.hasClass("intertitre")) {
148 return element
.text();
156 // No comment on this site, horrible javascript system
159 protected List
<Element
> getCommentCommentPosts(Document doc
,
165 protected String
getCommentId(Element post
) {
170 protected String
getCommentAuthor(Element post
) {
175 protected String
getCommentTitle(Element post
) {
180 protected String
getCommentDate(Element post
) {
185 protected Element
getCommentContentElement(Element post
) {
190 protected ElementProcessor
getElementProcessorComment() {