1 package be
.nikiroo
.gofetch
.support
;
3 import java
.io
.IOException
;
4 import java
.io
.InputStream
;
6 import java
.util
.ArrayList
;
9 import org
.jsoup
.helper
.DataUtil
;
10 import org
.jsoup
.nodes
.Document
;
11 import org
.jsoup
.nodes
.Element
;
12 import org
.jsoup
.nodes
.Node
;
13 import org
.jsoup
.select
.Elements
;
15 import be
.nikiroo
.gofetch
.data
.Comment
;
16 import be
.nikiroo
.gofetch
.data
.Story
;
19 * Support <a href="http://www.lemonde.fr/">http://www.lemonde.fr/</a>.
23 public class LeMonde
extends BasicSupport
{
25 public String
getDescription() {
26 return "Le Monde: Actualités et Infos en France et dans le monde";
30 public List
<Story
> list() throws IOException
{
31 List
<Story
> list
= new ArrayList
<Story
>();
33 for (String topic
: new String
[] { "international", "politique",
34 "societe", "sciences" }) {
35 URL url
= new URL("http://www.lemonde.fr/" + topic
+ "/1.html");
36 InputStream in
= downloader
.open(url
);
37 Document doc
= DataUtil
.load(in
, "UTF-8", url
.toString());
38 Elements articles
= doc
.getElementsByTag("article");
39 for (Element article
: articles
) {
40 Elements times
= article
.getElementsByTag("time");
41 Elements titleElements
= article
.getElementsByTag("h3");
42 Elements contentElements
= article
.getElementsByClass("txt3");
43 if (times
.size() > 0 && titleElements
.size() > 0
44 && contentElements
.size() > 0) {
45 String id
= times
.get(0).attr("datetime").replace(":", "_")
47 String title
= titleElements
.get(0).text();
48 String date
= date(titleElements
.get(0).text());
49 String content
= contentElements
.get(0).text();
55 Elements detailsElements
= article
56 .getElementsByClass("signature");
57 if (detailsElements
.size() > 0) {
58 author
= detailsElements
.get(0).text();
61 Elements links
= titleElements
.get(0).getElementsByTag("a");
62 if (links
.size() > 0) {
63 intUrl
= links
.get(0).absUrl("href");
64 list
.add(new Story(getType(), id
, title
, author
, date
,
65 topic
, details
, intUrl
, extUrl
, content
));
75 public void fetch(Story story
) throws IOException
{
76 String fullContent
= story
.getContent();
77 List
<Comment
> comments
= new ArrayList
<Comment
>();
79 // Note: no comments on this site as far as I can see (or maybe with
80 // some javascript, I need to check...)
82 URL url
= new URL(story
.getUrlInternal());
83 InputStream in
= downloader
.open(url
);
84 Document doc
= DataUtil
.load(in
, "UTF-8", url
.toString());
85 Element article
= doc
.getElementById("articleBody");
86 if (article
!= null) {
87 for (String line
: toLines(article
, new BasicElementProcessor() {
89 public boolean ignoreNode(Node node
) {
90 if (node
instanceof Element
) {
91 Element element
= (Element
) node
;
92 if (element
.hasClass("lire")) {
101 public String
isSubtitle(Node node
) {
102 if (node
instanceof Element
) {
103 Element element
= (Element
) node
;
104 if (element
.hasClass("intertitre")) {
105 return element
.text();
111 fullContent
+= line
+ "\n";
114 // Content is too tight with a single break per line:
115 fullContent
= fullContent
.replace("\n", "\n\n") //
116 .replace("\n\n\n\n", "\n\n") //
117 .replace("\n\n\n\n", "\n\n") //
121 story
.setFullContent(fullContent
);
122 story
.setComments(comments
);