New support: TooLinux
authorNiki Roo <niki@nikiroo.be>
Thu, 22 Mar 2018 15:27:58 +0000 (16:27 +0100)
committerNiki Roo <niki@nikiroo.be>
Thu, 22 Mar 2018 15:27:58 +0000 (16:27 +0100)
src/be/nikiroo/gofetch/support/BasicSupport.java
src/be/nikiroo/gofetch/support/TooLinux.java [new file with mode: 0644]

index b7eaca3cfe8e6917e3507212cfbb24d29657be5b..1258e8918ad1ec8eb429849634aa5f950bdc674f 100644 (file)
@@ -19,7 +19,7 @@ public abstract class BasicSupport {
        protected static Downloader downloader = new Downloader("gofetcher");
 
        public enum Type {
        protected static Downloader downloader = new Downloader("gofetcher");
 
        public enum Type {
-               SLASHDOT, PIPEDOT, LWN, LEMONDE, REGISTER, 
+               SLASHDOT, PIPEDOT, LWN, LEMONDE, REGISTER, TOOLINUX,
        }
 
        /**
        }
 
        /**
@@ -181,6 +181,9 @@ public abstract class BasicSupport {
                        case REGISTER:
                                support = new TheRegister();
                                break;
                        case REGISTER:
                                support = new TheRegister();
                                break;
+                       case TOOLINUX:
+                               support = new TooLinux();
+                               break;
                        }
 
                        if (support != null) {
                        }
 
                        if (support != null) {
diff --git a/src/be/nikiroo/gofetch/support/TooLinux.java b/src/be/nikiroo/gofetch/support/TooLinux.java
new file mode 100644 (file)
index 0000000..3b5edb8
--- /dev/null
@@ -0,0 +1,118 @@
+package be.nikiroo.gofetch.support;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.jsoup.helper.DataUtil;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.nodes.Node;
+import org.jsoup.select.Elements;
+
+import be.nikiroo.gofetch.data.Comment;
+import be.nikiroo.gofetch.data.Story;
+import be.nikiroo.utils.StringUtils;
+
+public class TooLinux extends BasicSupport {
+       @Override
+       public String getDescription() {
+               return "Premier quotidien francophone d'actualité généraliste sur Linux, les logiciels libres et l'interopérabilité, depuis mars 2000.";
+       }
+
+       @Override
+       public List<Story> list() throws IOException {
+               List<Story> list = new ArrayList<Story>();
+
+               URL url = new URL("https://www.toolinux.com/");
+               InputStream in = downloader.open(url);
+               Document doc = DataUtil.load(in, "UTF-8", url.toString());
+               Elements articles = doc.getElementsByClass("hentry");
+               for (Element article : articles) {
+                       String id = "";
+                       String intUrl = "";
+                       String extUrl = ""; // nope
+                       String title = "";
+                       String date = "";
+                       String details = "";
+                       String body = "";
+
+                       Element urlElement = article.getElementsByTag("a").first();
+                       if (urlElement != null) {
+                               intUrl = urlElement.absUrl("href");
+                       }
+
+                       Element titleElement = article.getElementsByClass("entry-title")
+                                       .first();
+                       if (titleElement != null) {
+                               title = StringUtils.unhtml(titleElement.text()).trim();
+                       }
+
+                       Element dateElement = article.getElementsByClass("published")
+                                       .first();
+                       if (dateElement != null) {
+                               date = StringUtils.unhtml(dateElement.text()).trim();
+                               id = dateElement.attr("title").trim();
+                       }
+
+                       if (id.isEmpty()) {
+                               // fallback
+                               id = intUrl.replace("/", "_");
+                       }
+
+                       Element detailsElement = article.getElementsByClass("introduction")
+                                       .first();
+                       details = "(" + date + ") ";
+                       if (detailsElement != null) {
+                               details += StringUtils.unhtml(detailsElement.text()).trim();
+                       }
+
+                       list.add(new Story(getType(), id, title, details, intUrl, extUrl,
+                                       body));
+               }
+
+               return list;
+       }
+
+       @Override
+       public void fetch(Story story) throws IOException {
+               String fullContent = story.getContent();
+               List<Comment> comments = new ArrayList<Comment>();
+               story.setComments(comments);
+
+               URL url = new URL(story.getUrlInternal());
+               InputStream in = downloader.open(url);
+               try {
+                       Document doc = DataUtil.load(in, "UTF-8", url.toString());
+                       Element article = doc.getElementById("content");
+                       if (article != null) {
+                               for (String line : toLines(article,
+                                               new BasicElementProcessor() {
+                                                       @Override
+                                                       public boolean ignoreNode(Node node) {
+                                                               if ("notes".equals(node.attr("class"))) {
+                                                                       return true;
+                                                               }
+                                                               return false;
+                                                       }
+                                               })) {
+                                       fullContent += line + "\n";
+                               }
+
+                               // Content is too tight with a single break per line:
+                               fullContent = fullContent.replace("\n", "\n\n") //
+                                               .replace("\n\n\n\n", "\n\n") //
+                                               .replace("\n\n\n\n", "\n\n") //
+                                               .trim();
+                       }
+
+                       story.setFullContent(fullContent);
+               } finally {
+                       if (in != null) {
+                               in.close();
+                       }
+               }
+       }
+}