4e22b4c0a9c8f6fdc93db8a09984f45b3020594a
1 package be
.nikiroo
.gofetch
.support
;
3 import java
.io
.IOException
;
4 import java
.io
.InputStream
;
6 import java
.util
.ArrayList
;
9 import org
.jsoup
.helper
.DataUtil
;
10 import org
.jsoup
.nodes
.Document
;
11 import org
.jsoup
.nodes
.Element
;
12 import org
.jsoup
.nodes
.Node
;
13 import org
.jsoup
.select
.Elements
;
15 import be
.nikiroo
.gofetch
.data
.Comment
;
16 import be
.nikiroo
.gofetch
.data
.Story
;
18 public class LeMonde
extends BasicSupport
{
20 public String
getDescription() {
21 return "Le Monde: Actualités et Infos en France et dans le monde";
25 public List
<Story
> list() throws IOException
{
26 List
<Story
> list
= new ArrayList
<Story
>();
28 for (String topic
: new String
[] { "international", "politique",
29 "societe", "sciences" }) {
30 URL url
= new URL("http://www.lemonde.fr/" + topic
+ "/1.html");
31 InputStream in
= open(url
);
32 Document doc
= DataUtil
.load(in
, "UTF-8", url
.toString());
33 Elements articles
= doc
.getElementsByTag("article");
34 for (Element article
: articles
) {
35 Elements times
= article
.getElementsByTag("time");
36 Elements titleElements
= article
.getElementsByTag("h3");
37 Elements contentElements
= article
.getElementsByClass("txt3");
38 if (times
.size() > 0 && titleElements
.size() > 0
39 && contentElements
.size() > 0) {
40 String id
= times
.get(0).attr("datetime").replace(":", "_");
41 String title
= "[" + topic
+ "] "
42 + titleElements
.get(0).text();
43 String content
= contentElements
.get(0).text();
48 Elements detailsElements
= article
49 .getElementsByClass("signature");
50 if (detailsElements
.size() > 0) {
51 details
= detailsElements
.get(0).text();
54 Elements links
= titleElements
.get(0).getElementsByTag("a");
55 if (links
.size() > 0) {
56 intUrl
= links
.get(0).absUrl("href");
57 list
.add(new Story(getType(), id
, title
, details
,
58 intUrl
, extUrl
, content
));
68 public void fetch(Story story
) throws IOException
{
69 String fullContent
= story
.getContent();
70 List
<Comment
> comments
= new ArrayList
<Comment
>();
72 // Note: no comments on this site as far as I can see (or maybe with
73 // some javascript, I need to check...)
75 URL url
= new URL(story
.getUrlInternal());
76 InputStream in
= open(url
);
77 Document doc
= DataUtil
.load(in
, "UTF-8", url
.toString());
78 Element article
= doc
.getElementById("articleBody");
79 if (article
!= null) {
80 for (String line
: toLines(article
, new QuoteProcessor() {
82 public String
processText(String text
) {
87 public boolean ignoreNode(Node node
) {
88 if (node
instanceof Element
) {
89 Element element
= (Element
) node
;
90 if (element
.hasClass("lire")) {
99 public boolean detectQuote(Node node
) {
104 public String
manualProcessing(Node node
) {
105 if (node
instanceof Element
) {
106 Element element
= (Element
) node
;
107 if (element
.hasClass("intertitre")) {
108 return "\n[ " + element
.text() + " ]\n";
114 fullContent
+= line
+ "\n";
117 // Content is too tight with a single break per line:
118 fullContent
= fullContent
.replace("\n", "\n\n") //
119 .replace("\n\n\n\n", "\n\n") //
120 .replace("\n\n\n\n", "\n\n") //
124 story
.setFullContent(fullContent
);
125 story
.setComments(comments
);