Separate story details components
[gofetch.git] / src / be / nikiroo / gofetch / support / TooLinux.java
1 package be.nikiroo.gofetch.support;
2
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.net.URL;
6 import java.util.ArrayList;
7 import java.util.List;
8
9 import org.jsoup.helper.DataUtil;
10 import org.jsoup.nodes.Document;
11 import org.jsoup.nodes.Element;
12 import org.jsoup.nodes.Node;
13 import org.jsoup.select.Elements;
14
15 import be.nikiroo.gofetch.data.Comment;
16 import be.nikiroo.gofetch.data.Story;
17 import be.nikiroo.utils.StringUtils;
18
19 /**
20 * Support <a href="https://www.toolinux.com/">https://www.toolinux.com/</a>.
21 *
22 * @author niki
23 */
24 public class TooLinux extends BasicSupport {
25 @Override
26 public String getDescription() {
27 return "TooLinux: Actualité généraliste sur Linux et les logiciels libres";
28 }
29
30 @Override
31 public List<Story> list() throws IOException {
32 List<Story> list = new ArrayList<Story>();
33
34 URL url = new URL("https://www.toolinux.com/");
35 InputStream in = downloader.open(url);
36 Document doc = DataUtil.load(in, "UTF-8", url.toString());
37 Elements articles = doc.getElementsByClass("hentry");
38 for (Element article : articles) {
39 String id = "";
40 String intUrl = "";
41 String extUrl = ""; // nope
42 String title = "";
43 String date = "";
44 String details = "";
45 String body = "";
46 String author = ""; // nope
47 String categ = ""; // nope
48
49 Element urlElement = article.getElementsByTag("a").first();
50 if (urlElement != null) {
51 intUrl = urlElement.absUrl("href");
52 }
53
54 Element titleElement = article.getElementsByClass("entry-title")
55 .first();
56 if (titleElement != null) {
57 title = StringUtils.unhtml(titleElement.text()).trim();
58 }
59
60 Element dateElement = article.getElementsByClass("published")
61 .first();
62 if (dateElement != null) {
63 date = StringUtils.unhtml(dateElement.text()).trim();
64 id = dateElement.attr("title").trim();
65 }
66
67 if (id.isEmpty()) {
68 // fallback
69 id = intUrl.replace("/", "_");
70 }
71
72 Element detailsElement = article.getElementsByClass("introduction")
73 .first();
74 if (detailsElement != null) {
75 details += StringUtils.unhtml(detailsElement.text()).trim();
76 }
77
78 list.add(new Story(getType(), id, title, author, date, categ,
79 details, intUrl, extUrl, body));
80 }
81
82 return list;
83 }
84
85 @Override
86 public void fetch(Story story) throws IOException {
87 String fullContent = story.getContent();
88 List<Comment> comments = new ArrayList<Comment>();
89 story.setComments(comments);
90
91 URL url = new URL(story.getUrlInternal());
92 InputStream in = downloader.open(url);
93 try {
94 Document doc = DataUtil.load(in, "UTF-8", url.toString());
95 Element article = doc.getElementById("content");
96 if (article != null) {
97 for (String line : toLines(article,
98 new BasicElementProcessor() {
99 @Override
100 public boolean ignoreNode(Node node) {
101 if ("notes".equals(node.attr("class"))) {
102 return true;
103 }
104 return false;
105 }
106 })) {
107 fullContent += line + "\n";
108 }
109
110 // Content is too tight with a single break per line:
111 fullContent = fullContent.replace("\n", "\n\n") //
112 .replace("\n\n\n\n", "\n\n") //
113 .replace("\n\n\n\n", "\n\n") //
114 .trim();
115 }
116
117 story.setFullContent(fullContent);
118 } finally {
119 if (in != null) {
120 in.close();
121 }
122 }
123 }
124 }