Commit | Line | Data |
---|---|---|
100a8395 NR |
1 | package be.nikiroo.gofetch.support; |
2 | ||
3 | import java.io.IOException; | |
100a8395 | 4 | import java.net.URL; |
3e62b034 | 5 | import java.util.AbstractMap; |
100a8395 NR |
6 | import java.util.ArrayList; |
7 | import java.util.List; | |
3e62b034 | 8 | import java.util.Map.Entry; |
100a8395 | 9 | |
100a8395 NR |
10 | import org.jsoup.nodes.Document; |
11 | import org.jsoup.nodes.Element; | |
12 | import org.jsoup.nodes.Node; | |
100a8395 | 13 | |
b34d1f35 NR |
14 | /** |
15 | * Support <a href="http://www.lemonde.fr/">http://www.lemonde.fr/</a>. | |
16 | * | |
17 | * @author niki | |
18 | */ | |
100a8395 NR |
19 | public class LeMonde extends BasicSupport { |
20 | @Override | |
21 | public String getDescription() { | |
22 | return "Le Monde: Actualités et Infos en France et dans le monde"; | |
23 | } | |
24 | ||
25 | @Override | |
3e62b034 NR |
26 | protected List<Entry<URL, String>> getUrls() throws IOException { |
27 | List<Entry<URL, String>> urls = new ArrayList<Entry<URL, String>>(); | |
28 | for (String topic : new String[] { "International", "Politique", | |
29 | "Société", "Sciences" }) { | |
30 | URL url = new URL("http://www.lemonde.fr/" | |
31 | + topic.toLowerCase().replace("é", "e") + "/1.html"); | |
32 | urls.add(new AbstractMap.SimpleEntry<URL, String>(url, topic)); | |
33 | } | |
100a8395 | 34 | |
3e62b034 NR |
35 | return urls; |
36 | } | |
37 | ||
38 | @Override | |
39 | protected List<Element> getArticles(Document doc) { | |
40 | return doc.getElementsByTag("article"); | |
41 | } | |
42 | ||
43 | @Override | |
44 | protected String getArticleId(Document doc, Element article) { | |
45 | return ""; // will use the date | |
46 | } | |
47 | ||
48 | @Override | |
49 | protected String getArticleTitle(Document doc, Element article) { | |
50 | Element titleElement = article.getElementsByTag("h3").first(); | |
51 | if (titleElement != null) { | |
52 | return titleElement.text(); | |
53 | } | |
54 | ||
55 | return ""; | |
56 | } | |
57 | ||
58 | @Override | |
59 | protected String getArticleAuthor(Document doc, Element article) { | |
60 | Element detailsElement = article.getElementsByClass("signature") | |
61 | .first(); | |
62 | if (detailsElement != null) { | |
63 | return detailsElement.text(); | |
64 | } | |
65 | ||
66 | return ""; | |
67 | } | |
68 | ||
69 | @Override | |
70 | protected String getArticleDate(Document doc, Element article) { | |
71 | Element timeElement = article.getElementsByTag("time").first(); | |
72 | if (timeElement != null) { | |
73 | return timeElement.attr("datetime"); | |
74 | } | |
75 | ||
76 | return ""; | |
77 | } | |
78 | ||
79 | @Override | |
80 | protected String getArticleCategory(Document doc, Element article, | |
81 | String currentCategory) { | |
82 | return currentCategory; | |
83 | } | |
84 | ||
85 | @Override | |
86 | protected String getArticleDetails(Document doc, Element article) { | |
87 | return ""; | |
88 | } | |
89 | ||
90 | @Override | |
91 | protected String getArticleIntUrl(Document doc, Element article) { | |
92 | Element titleElement = article.getElementsByTag("h3").first(); | |
93 | if (titleElement != null) { | |
94 | Element link = titleElement.getElementsByTag("a").first(); | |
95 | if (link != null) { | |
96 | return link.absUrl("href"); | |
100a8395 NR |
97 | } |
98 | } | |
99 | ||
3e62b034 | 100 | return ""; |
100a8395 NR |
101 | } |
102 | ||
103 | @Override | |
3e62b034 NR |
104 | protected String getArticleExtUrl(Document doc, Element article) { |
105 | return ""; | |
106 | } | |
100a8395 | 107 | |
3e62b034 NR |
108 | @Override |
109 | protected String getArticleContent(Document doc, Element article) { | |
110 | Element contentElement = article.getElementsByClass("txt3").first(); | |
111 | if (contentElement != null) { | |
112 | return contentElement.text(); | |
113 | } | |
100a8395 | 114 | |
3e62b034 NR |
115 | return ""; |
116 | } | |
117 | ||
118 | @Override | |
119 | protected Element getFullArticle(Document doc) { | |
120 | return doc.getElementById("articleBody"); | |
121 | } | |
122 | ||
123 | @Override | |
124 | protected List<Element> getFullArticleCommentPosts(Document doc, URL intUrl) { | |
125 | return null; | |
126 | } | |
100a8395 | 127 | |
3e62b034 NR |
128 | @Override |
129 | protected ElementProcessor getElementProcessorFullArticle() { | |
130 | return new BasicElementProcessor() { | |
131 | @Override | |
132 | public boolean ignoreNode(Node node) { | |
133 | if (node instanceof Element) { | |
134 | Element element = (Element) node; | |
135 | if (element.hasClass("lire")) { | |
136 | return true; | |
137 | } | |
100a8395 NR |
138 | } |
139 | ||
3e62b034 NR |
140 | return false; |
141 | } | |
142 | ||
143 | @Override | |
144 | public String isSubtitle(Node node) { | |
145 | if (node instanceof Element) { | |
146 | Element element = (Element) node; | |
147 | if (element.hasClass("intertitre")) { | |
148 | return element.text(); | |
100a8395 | 149 | } |
100a8395 | 150 | } |
3e62b034 | 151 | return null; |
100a8395 | 152 | } |
3e62b034 NR |
153 | }; |
154 | } | |
100a8395 | 155 | |
3e62b034 | 156 | // No comment on this site, horrible javascript system |
100a8395 | 157 | |
3e62b034 NR |
158 | @Override |
159 | protected List<Element> getCommentCommentPosts(Document doc, | |
160 | Element container) { | |
161 | return null; | |
162 | } | |
163 | ||
164 | @Override | |
165 | protected String getCommentId(Element post) { | |
166 | return null; | |
167 | } | |
168 | ||
169 | @Override | |
170 | protected String getCommentAuthor(Element post) { | |
171 | return null; | |
172 | } | |
173 | ||
174 | @Override | |
175 | protected String getCommentTitle(Element post) { | |
176 | return null; | |
177 | } | |
178 | ||
179 | @Override | |
180 | protected String getCommentDate(Element post) { | |
181 | return null; | |
182 | } | |
183 | ||
184 | @Override | |
185 | protected Element getCommentContentElement(Element post) { | |
186 | return null; | |
187 | } | |
188 | ||
189 | @Override | |
190 | protected ElementProcessor getElementProcessorComment() { | |
191 | return null; | |
100a8395 NR |
192 | } |
193 | } |