From cd555a1e275ce7cabd2303ad96fcd8b33a0213fc Mon Sep 17 00:00:00 2001 From: Niki Roo Date: Thu, 22 Mar 2018 16:27:58 +0100 Subject: [PATCH] New support: TooLinux --- .../nikiroo/gofetch/support/BasicSupport.java | 5 +- src/be/nikiroo/gofetch/support/TooLinux.java | 118 ++++++++++++++++++ 2 files changed, 122 insertions(+), 1 deletion(-) create mode 100644 src/be/nikiroo/gofetch/support/TooLinux.java diff --git a/src/be/nikiroo/gofetch/support/BasicSupport.java b/src/be/nikiroo/gofetch/support/BasicSupport.java index b7eaca3..1258e89 100644 --- a/src/be/nikiroo/gofetch/support/BasicSupport.java +++ b/src/be/nikiroo/gofetch/support/BasicSupport.java @@ -19,7 +19,7 @@ public abstract class BasicSupport { protected static Downloader downloader = new Downloader("gofetcher"); public enum Type { - SLASHDOT, PIPEDOT, LWN, LEMONDE, REGISTER, + SLASHDOT, PIPEDOT, LWN, LEMONDE, REGISTER, TOOLINUX, } /** @@ -181,6 +181,9 @@ public abstract class BasicSupport { case REGISTER: support = new TheRegister(); break; + case TOOLINUX: + support = new TooLinux(); + break; } if (support != null) { diff --git a/src/be/nikiroo/gofetch/support/TooLinux.java b/src/be/nikiroo/gofetch/support/TooLinux.java new file mode 100644 index 0000000..3b5edb8 --- /dev/null +++ b/src/be/nikiroo/gofetch/support/TooLinux.java @@ -0,0 +1,118 @@ +package be.nikiroo.gofetch.support; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; + +import org.jsoup.helper.DataUtil; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.nodes.Node; +import org.jsoup.select.Elements; + +import be.nikiroo.gofetch.data.Comment; +import be.nikiroo.gofetch.data.Story; +import be.nikiroo.utils.StringUtils; + +public class TooLinux extends BasicSupport { + @Override + public String getDescription() { + return "Premier quotidien francophone d'actualité généraliste sur Linux, les logiciels libres et l'interopérabilité, depuis mars 2000."; + } + + @Override + public List list() throws IOException { + List list = new ArrayList(); + + URL url = new URL("https://www.toolinux.com/"); + InputStream in = downloader.open(url); + Document doc = DataUtil.load(in, "UTF-8", url.toString()); + Elements articles = doc.getElementsByClass("hentry"); + for (Element article : articles) { + String id = ""; + String intUrl = ""; + String extUrl = ""; // nope + String title = ""; + String date = ""; + String details = ""; + String body = ""; + + Element urlElement = article.getElementsByTag("a").first(); + if (urlElement != null) { + intUrl = urlElement.absUrl("href"); + } + + Element titleElement = article.getElementsByClass("entry-title") + .first(); + if (titleElement != null) { + title = StringUtils.unhtml(titleElement.text()).trim(); + } + + Element dateElement = article.getElementsByClass("published") + .first(); + if (dateElement != null) { + date = StringUtils.unhtml(dateElement.text()).trim(); + id = dateElement.attr("title").trim(); + } + + if (id.isEmpty()) { + // fallback + id = intUrl.replace("/", "_"); + } + + Element detailsElement = article.getElementsByClass("introduction") + .first(); + details = "(" + date + ") "; + if (detailsElement != null) { + details += StringUtils.unhtml(detailsElement.text()).trim(); + } + + list.add(new Story(getType(), id, title, details, intUrl, extUrl, + body)); + } + + return list; + } + + @Override + public void fetch(Story story) throws IOException { + String fullContent = story.getContent(); + List comments = new ArrayList(); + story.setComments(comments); + + URL url = new URL(story.getUrlInternal()); + InputStream in = downloader.open(url); + try { + Document doc = DataUtil.load(in, "UTF-8", url.toString()); + Element article = doc.getElementById("content"); + if (article != null) { + for (String line : toLines(article, + new BasicElementProcessor() { + @Override + public boolean ignoreNode(Node node) { + if ("notes".equals(node.attr("class"))) { + return true; + } + return false; + } + })) { + fullContent += line + "\n"; + } + + // Content is too tight with a single break per line: + fullContent = fullContent.replace("\n", "\n\n") // + .replace("\n\n\n\n", "\n\n") // + .replace("\n\n\n\n", "\n\n") // + .trim(); + } + + story.setFullContent(fullContent); + } finally { + if (in != null) { + in.close(); + } + } + } +} -- 2.27.0