Version 1.0.1: Fix Le Monde IDs in gopher
[gofetch.git] / src / be / nikiroo / gofetch / support / LeMonde.java
1 package be.nikiroo.gofetch.support;
2
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.net.URL;
6 import java.util.ArrayList;
7 import java.util.List;
8
9 import org.jsoup.helper.DataUtil;
10 import org.jsoup.nodes.Document;
11 import org.jsoup.nodes.Element;
12 import org.jsoup.nodes.Node;
13 import org.jsoup.select.Elements;
14
15 import be.nikiroo.gofetch.data.Comment;
16 import be.nikiroo.gofetch.data.Story;
17
18 public class LeMonde extends BasicSupport {
19 @Override
20 public String getDescription() {
21 return "Le Monde: Actualités et Infos en France et dans le monde";
22 }
23
24 @Override
25 public List<Story> list() throws IOException {
26 List<Story> list = new ArrayList<Story>();
27
28 for (String topic : new String[] { "international", "politique",
29 "societe", "sciences" }) {
30 URL url = new URL("http://www.lemonde.fr/" + topic + "/1.html");
31 InputStream in = open(url);
32 Document doc = DataUtil.load(in, "UTF-8", url.toString());
33 Elements articles = doc.getElementsByTag("article");
34 for (Element article : articles) {
35 Elements times = article.getElementsByTag("time");
36 Elements titleElements = article.getElementsByTag("h3");
37 Elements contentElements = article.getElementsByClass("txt3");
38 if (times.size() > 0 && titleElements.size() > 0
39 && contentElements.size() > 0) {
40 String id = times.get(0).attr("datetime").replace(":", "_")
41 .replace("+", "_");
42 String title = "[" + topic + "] "
43 + titleElements.get(0).text();
44 String content = contentElements.get(0).text();
45 String intUrl = "";
46 String extUrl = "";
47 String details = "";
48
49 Elements detailsElements = article
50 .getElementsByClass("signature");
51 if (detailsElements.size() > 0) {
52 details = detailsElements.get(0).text();
53 }
54
55 Elements links = titleElements.get(0).getElementsByTag("a");
56 if (links.size() > 0) {
57 intUrl = links.get(0).absUrl("href");
58 list.add(new Story(getType(), id, title, details,
59 intUrl, extUrl, content));
60 }
61 }
62 }
63 }
64
65 return list;
66 }
67
68 @Override
69 public void fetch(Story story) throws IOException {
70 String fullContent = story.getContent();
71 List<Comment> comments = new ArrayList<Comment>();
72
73 // Note: no comments on this site as far as I can see (or maybe with
74 // some javascript, I need to check...)
75
76 URL url = new URL(story.getUrlInternal());
77 InputStream in = open(url);
78 Document doc = DataUtil.load(in, "UTF-8", url.toString());
79 Element article = doc.getElementById("articleBody");
80 if (article != null) {
81 for (String line : toLines(article, new QuoteProcessor() {
82 @Override
83 public String processText(String text) {
84 return text;
85 }
86
87 @Override
88 public boolean ignoreNode(Node node) {
89 if (node instanceof Element) {
90 Element element = (Element) node;
91 if (element.hasClass("lire")) {
92 return true;
93 }
94 }
95
96 return false;
97 }
98
99 @Override
100 public boolean detectQuote(Node node) {
101 return false;
102 }
103
104 @Override
105 public String manualProcessing(Node node) {
106 if (node instanceof Element) {
107 Element element = (Element) node;
108 if (element.hasClass("intertitre")) {
109 return "\n[ " + element.text() + " ]\n";
110 }
111 }
112 return null;
113 }
114 })) {
115 fullContent += line + "\n";
116 }
117
118 // Content is too tight with a single break per line:
119 fullContent = fullContent.replace("\n", "\n\n") //
120 .replace("\n\n\n\n", "\n\n") //
121 .replace("\n\n\n\n", "\n\n") //
122 .trim();
123 }
124
125 story.setFullContent(fullContent);
126 story.setComments(comments);
127 }
128 }