3b5edb8bb712ba7597cdd1636f0bfbe4163a473a
[gofetch.git] / src / be / nikiroo / gofetch / support / TooLinux.java
1 package be.nikiroo.gofetch.support;
2
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.net.URL;
6 import java.util.ArrayList;
7 import java.util.List;
8
9 import org.jsoup.helper.DataUtil;
10 import org.jsoup.nodes.Document;
11 import org.jsoup.nodes.Element;
12 import org.jsoup.nodes.Node;
13 import org.jsoup.select.Elements;
14
15 import be.nikiroo.gofetch.data.Comment;
16 import be.nikiroo.gofetch.data.Story;
17 import be.nikiroo.utils.StringUtils;
18
19 public class TooLinux extends BasicSupport {
20 @Override
21 public String getDescription() {
22 return "Premier quotidien francophone d'actualité généraliste sur Linux, les logiciels libres et l'interopérabilité, depuis mars 2000.";
23 }
24
25 @Override
26 public List<Story> list() throws IOException {
27 List<Story> list = new ArrayList<Story>();
28
29 URL url = new URL("https://www.toolinux.com/");
30 InputStream in = downloader.open(url);
31 Document doc = DataUtil.load(in, "UTF-8", url.toString());
32 Elements articles = doc.getElementsByClass("hentry");
33 for (Element article : articles) {
34 String id = "";
35 String intUrl = "";
36 String extUrl = ""; // nope
37 String title = "";
38 String date = "";
39 String details = "";
40 String body = "";
41
42 Element urlElement = article.getElementsByTag("a").first();
43 if (urlElement != null) {
44 intUrl = urlElement.absUrl("href");
45 }
46
47 Element titleElement = article.getElementsByClass("entry-title")
48 .first();
49 if (titleElement != null) {
50 title = StringUtils.unhtml(titleElement.text()).trim();
51 }
52
53 Element dateElement = article.getElementsByClass("published")
54 .first();
55 if (dateElement != null) {
56 date = StringUtils.unhtml(dateElement.text()).trim();
57 id = dateElement.attr("title").trim();
58 }
59
60 if (id.isEmpty()) {
61 // fallback
62 id = intUrl.replace("/", "_");
63 }
64
65 Element detailsElement = article.getElementsByClass("introduction")
66 .first();
67 details = "(" + date + ") ";
68 if (detailsElement != null) {
69 details += StringUtils.unhtml(detailsElement.text()).trim();
70 }
71
72 list.add(new Story(getType(), id, title, details, intUrl, extUrl,
73 body));
74 }
75
76 return list;
77 }
78
79 @Override
80 public void fetch(Story story) throws IOException {
81 String fullContent = story.getContent();
82 List<Comment> comments = new ArrayList<Comment>();
83 story.setComments(comments);
84
85 URL url = new URL(story.getUrlInternal());
86 InputStream in = downloader.open(url);
87 try {
88 Document doc = DataUtil.load(in, "UTF-8", url.toString());
89 Element article = doc.getElementById("content");
90 if (article != null) {
91 for (String line : toLines(article,
92 new BasicElementProcessor() {
93 @Override
94 public boolean ignoreNode(Node node) {
95 if ("notes".equals(node.attr("class"))) {
96 return true;
97 }
98 return false;
99 }
100 })) {
101 fullContent += line + "\n";
102 }
103
104 // Content is too tight with a single break per line:
105 fullContent = fullContent.replace("\n", "\n\n") //
106 .replace("\n\n\n\n", "\n\n") //
107 .replace("\n\n\n\n", "\n\n") //
108 .trim();
109 }
110
111 story.setFullContent(fullContent);
112 } finally {
113 if (in != null) {
114 in.close();
115 }
116 }
117 }
118 }