1 package be
.nikiroo
.gofetch
.support
;
3 import java
.io
.IOException
;
4 import java
.io
.InputStream
;
6 import java
.util
.ArrayList
;
9 import org
.jsoup
.helper
.DataUtil
;
10 import org
.jsoup
.nodes
.Document
;
11 import org
.jsoup
.nodes
.Element
;
12 import org
.jsoup
.nodes
.Node
;
13 import org
.jsoup
.select
.Elements
;
15 import be
.nikiroo
.gofetch
.data
.Comment
;
16 import be
.nikiroo
.gofetch
.data
.Story
;
18 public class LeMonde
extends BasicSupport
{
20 public String
getDescription() {
21 return "Le Monde: Actualités et Infos en France et dans le monde";
25 public List
<Story
> list() throws IOException
{
26 List
<Story
> list
= new ArrayList
<Story
>();
28 for (String topic
: new String
[] { "international", "politique",
29 "societe", "sciences" }) {
30 URL url
= new URL("http://www.lemonde.fr/" + topic
+ "/1.html");
31 InputStream in
= downloader
.open(url
);
32 Document doc
= DataUtil
.load(in
, "UTF-8", url
.toString());
33 Elements articles
= doc
.getElementsByTag("article");
34 for (Element article
: articles
) {
35 Elements times
= article
.getElementsByTag("time");
36 Elements titleElements
= article
.getElementsByTag("h3");
37 Elements contentElements
= article
.getElementsByClass("txt3");
38 if (times
.size() > 0 && titleElements
.size() > 0
39 && contentElements
.size() > 0) {
40 String id
= times
.get(0).attr("datetime").replace(":", "_")
42 String title
= "[" + topic
+ "] "
43 + titleElements
.get(0).text();
44 String content
= contentElements
.get(0).text();
49 Elements detailsElements
= article
50 .getElementsByClass("signature");
51 if (detailsElements
.size() > 0) {
52 details
= detailsElements
.get(0).text();
55 Elements links
= titleElements
.get(0).getElementsByTag("a");
56 if (links
.size() > 0) {
57 intUrl
= links
.get(0).absUrl("href");
58 list
.add(new Story(getType(), id
, title
, details
,
59 intUrl
, extUrl
, content
));
69 public void fetch(Story story
) throws IOException
{
70 String fullContent
= story
.getContent();
71 List
<Comment
> comments
= new ArrayList
<Comment
>();
73 // Note: no comments on this site as far as I can see (or maybe with
74 // some javascript, I need to check...)
76 URL url
= new URL(story
.getUrlInternal());
77 InputStream in
= downloader
.open(url
);
78 Document doc
= DataUtil
.load(in
, "UTF-8", url
.toString());
79 Element article
= doc
.getElementById("articleBody");
80 if (article
!= null) {
81 for (String line
: toLines(article
, new BasicElementProcessor() {
83 public boolean ignoreNode(Node node
) {
84 if (node
instanceof Element
) {
85 Element element
= (Element
) node
;
86 if (element
.hasClass("lire")) {
95 public String
manualProcessing(Node node
) {
96 if (node
instanceof Element
) {
97 Element element
= (Element
) node
;
98 if (element
.hasClass("intertitre")) {
99 return "\n[ " + element
.text() + " ]\n";
105 fullContent
+= line
+ "\n";
108 // Content is too tight with a single break per line:
109 fullContent
= fullContent
.replace("\n", "\n\n") //
110 .replace("\n\n\n\n", "\n\n") //
111 .replace("\n\n\n\n", "\n\n") //
115 story
.setFullContent(fullContent
);
116 story
.setComments(comments
);