Fix layout issues in getContent() text
[gofetch.git] / src / be / nikiroo / gofetch / support / LeMonde.java
CommitLineData
100a8395
NR
1package be.nikiroo.gofetch.support;
2
3import java.io.IOException;
100a8395 4import java.net.URL;
3e62b034 5import java.util.AbstractMap;
100a8395
NR
6import java.util.ArrayList;
7import java.util.List;
3e62b034 8import java.util.Map.Entry;
100a8395 9
100a8395
NR
10import org.jsoup.nodes.Document;
11import org.jsoup.nodes.Element;
12import org.jsoup.nodes.Node;
100a8395 13
b34d1f35
NR
14/**
15 * Support <a href="http://www.lemonde.fr/">http://www.lemonde.fr/</a>.
16 *
17 * @author niki
18 */
100a8395
NR
19public class LeMonde extends BasicSupport {
20 @Override
21 public String getDescription() {
22 return "Le Monde: Actualités et Infos en France et dans le monde";
23 }
24
25 @Override
3e62b034
NR
26 protected List<Entry<URL, String>> getUrls() throws IOException {
27 List<Entry<URL, String>> urls = new ArrayList<Entry<URL, String>>();
28 for (String topic : new String[] { "International", "Politique",
29 "Société", "Sciences" }) {
30 URL url = new URL("http://www.lemonde.fr/"
31 + topic.toLowerCase().replace("é", "e") + "/1.html");
32 urls.add(new AbstractMap.SimpleEntry<URL, String>(url, topic));
33 }
100a8395 34
3e62b034
NR
35 return urls;
36 }
37
38 @Override
39 protected List<Element> getArticles(Document doc) {
40 return doc.getElementsByTag("article");
41 }
42
43 @Override
44 protected String getArticleId(Document doc, Element article) {
45 return ""; // will use the date
46 }
47
48 @Override
49 protected String getArticleTitle(Document doc, Element article) {
50 Element titleElement = article.getElementsByTag("h3").first();
51 if (titleElement != null) {
52 return titleElement.text();
53 }
54
55 return "";
56 }
57
58 @Override
59 protected String getArticleAuthor(Document doc, Element article) {
60 Element detailsElement = article.getElementsByClass("signature")
61 .first();
62 if (detailsElement != null) {
63 return detailsElement.text();
64 }
65
66 return "";
67 }
68
69 @Override
70 protected String getArticleDate(Document doc, Element article) {
71 Element timeElement = article.getElementsByTag("time").first();
72 if (timeElement != null) {
73 return timeElement.attr("datetime");
74 }
75
76 return "";
77 }
78
79 @Override
80 protected String getArticleCategory(Document doc, Element article,
81 String currentCategory) {
82 return currentCategory;
83 }
84
85 @Override
86 protected String getArticleDetails(Document doc, Element article) {
87 return "";
88 }
89
90 @Override
91 protected String getArticleIntUrl(Document doc, Element article) {
92 Element titleElement = article.getElementsByTag("h3").first();
93 if (titleElement != null) {
94 Element link = titleElement.getElementsByTag("a").first();
95 if (link != null) {
96 return link.absUrl("href");
100a8395
NR
97 }
98 }
99
3e62b034 100 return "";
100a8395
NR
101 }
102
103 @Override
3e62b034
NR
104 protected String getArticleExtUrl(Document doc, Element article) {
105 return "";
106 }
100a8395 107
3e62b034
NR
108 @Override
109 protected String getArticleContent(Document doc, Element article) {
110 Element contentElement = article.getElementsByClass("txt3").first();
111 if (contentElement != null) {
e818d449 112 return getArticleText(contentElement);
3e62b034 113 }
100a8395 114
3e62b034
NR
115 return "";
116 }
117
118 @Override
119 protected Element getFullArticle(Document doc) {
120 return doc.getElementById("articleBody");
121 }
122
123 @Override
124 protected List<Element> getFullArticleCommentPosts(Document doc, URL intUrl) {
125 return null;
126 }
100a8395 127
3e62b034
NR
128 @Override
129 protected ElementProcessor getElementProcessorFullArticle() {
130 return new BasicElementProcessor() {
131 @Override
132 public boolean ignoreNode(Node node) {
133 if (node instanceof Element) {
134 Element element = (Element) node;
135 if (element.hasClass("lire")) {
136 return true;
137 }
100a8395
NR
138 }
139
3e62b034
NR
140 return false;
141 }
142
143 @Override
144 public String isSubtitle(Node node) {
145 if (node instanceof Element) {
146 Element element = (Element) node;
147 if (element.hasClass("intertitre")) {
148 return element.text();
100a8395 149 }
100a8395 150 }
3e62b034 151 return null;
100a8395 152 }
3e62b034
NR
153 };
154 }
100a8395 155
3e62b034 156 // No comment on this site, horrible javascript system
100a8395 157
3e62b034
NR
158 @Override
159 protected List<Element> getCommentCommentPosts(Document doc,
160 Element container) {
161 return null;
162 }
163
164 @Override
165 protected String getCommentId(Element post) {
166 return null;
167 }
168
169 @Override
170 protected String getCommentAuthor(Element post) {
171 return null;
172 }
173
174 @Override
175 protected String getCommentTitle(Element post) {
176 return null;
177 }
178
179 @Override
180 protected String getCommentDate(Element post) {
181 return null;
182 }
183
184 @Override
185 protected Element getCommentContentElement(Element post) {
186 return null;
187 }
188
189 @Override
190 protected ElementProcessor getElementProcessorComment() {
191 return null;
100a8395
NR
192 }
193}