[gofetch.git] / be / nikiroo / gofetch / support / TooLinux.java

package be.nikiroo.gofetch.support;

import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;

import org.jsoup.helper.DataUtil;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.select.Elements;

import be.nikiroo.gofetch.data.Comment;
import be.nikiroo.gofetch.data.Story;
import be.nikiroo.utils.StringUtils;

public class TooLinux extends BasicSupport {
	@Override
	public String getDescription() {
		return "TooLinux: Premier quotidien francophone d'actualité généraliste sur Linux, les logiciels libres et l'interopérabilité, depuis mars 2000.";
	}

	@Override
	public List<Story> list() throws IOException {
		List<Story> list = new ArrayList<Story>();

		URL url = new URL("https://www.toolinux.com/");
		InputStream in = downloader.open(url);
		Document doc = DataUtil.load(in, "UTF-8", url.toString());
		Elements articles = doc.getElementsByClass("hentry");
		for (Element article : articles) {
			String id = "";
			String intUrl = "";
			String extUrl = ""; // nope
			String title = "";
			String date = "";
			String details = "";
			String body = "";

			Element urlElement = article.getElementsByTag("a").first();
			if (urlElement != null) {
				intUrl = urlElement.absUrl("href");
			}

			Element titleElement = article.getElementsByClass("entry-title")
					.first();
			if (titleElement != null) {
				title = StringUtils.unhtml(titleElement.text()).trim();
			}

			Element dateElement = article.getElementsByClass("published")
					.first();
			if (dateElement != null) {
				date = StringUtils.unhtml(dateElement.text()).trim();
				id = dateElement.attr("title").trim();
			}

			if (id.isEmpty()) {
				// fallback
				id = intUrl.replace("/", "_");
			}

			Element detailsElement = article.getElementsByClass("introduction")
					.first();
			details = "(" + date + ") ";
			if (detailsElement != null) {
				details += StringUtils.unhtml(detailsElement.text()).trim();
			}

			list.add(new Story(getType(), id, title, details, intUrl, extUrl,
					body));
		}

		return list;
	}

	@Override
	public void fetch(Story story) throws IOException {
		String fullContent = story.getContent();
		List<Comment> comments = new ArrayList<Comment>();
		story.setComments(comments);

		URL url = new URL(story.getUrlInternal());
		InputStream in = downloader.open(url);
		try {
			Document doc = DataUtil.load(in, "UTF-8", url.toString());
			Element article = doc.getElementById("content");
			if (article != null) {
				for (String line : toLines(article,
						new BasicElementProcessor() {
							@Override
							public boolean ignoreNode(Node node) {
								if ("notes".equals(node.attr("class"))) {
									return true;
								}
								return false;
							}
						})) {
					fullContent += line + "\n";
				}

				// Content is too tight with a single break per line:
				fullContent = fullContent.replace("\n", "\n\n") //
						.replace("\n\n\n\n", "\n\n") //
						.replace("\n\n\n\n", "\n\n") //
						.trim();
			}

			story.setFullContent(fullContent);
		} finally {
			if (in != null) {
				in.close();
			}
		}
	}
}
Commit	Line	Data
	1	package be.nikiroo.gofetch.support;
	2
	3	import java.io.IOException;
	4	import java.io.InputStream;
	5	import java.net.URL;
	6	import java.util.ArrayList;
	7	import java.util.List;
	8
	9	import org.jsoup.helper.DataUtil;
	10	import org.jsoup.nodes.Document;
	11	import org.jsoup.nodes.Element;
	12	import org.jsoup.nodes.Node;
	13	import org.jsoup.select.Elements;
	14
	15	import be.nikiroo.gofetch.data.Comment;
	16	import be.nikiroo.gofetch.data.Story;
	17	import be.nikiroo.utils.StringUtils;
	18
	19	public class TooLinux extends BasicSupport {
	20	@Override
	21	public String getDescription() {
	22	return "TooLinux: Premier quotidien francophone d'actualité généraliste sur Linux, les logiciels libres et l'interopérabilité, depuis mars 2000.";
	23	}
	24
	25	@Override
	26	public List<Story> list() throws IOException {
	27	List<Story> list = new ArrayList<Story>();
	28
	29	URL url = new URL("https://www.toolinux.com/");
	30	InputStream in = downloader.open(url);
	31	Document doc = DataUtil.load(in, "UTF-8", url.toString());
	32	Elements articles = doc.getElementsByClass("hentry");
	33	for (Element article : articles) {
	34	String id = "";
	35	String intUrl = "";
	36	String extUrl = ""; // nope
	37	String title = "";
	38	String date = "";
	39	String details = "";
	40	String body = "";
	41
	42	Element urlElement = article.getElementsByTag("a").first();
	43	if (urlElement != null) {
	44	intUrl = urlElement.absUrl("href");
	45	}
	46
	47	Element titleElement = article.getElementsByClass("entry-title")
	48	.first();
	49	if (titleElement != null) {
	50	title = StringUtils.unhtml(titleElement.text()).trim();
	51	}
	52
	53	Element dateElement = article.getElementsByClass("published")
	54	.first();
	55	if (dateElement != null) {
	56	date = StringUtils.unhtml(dateElement.text()).trim();
	57	id = dateElement.attr("title").trim();
	58	}
	59
	60	if (id.isEmpty()) {
	61	// fallback
	62	id = intUrl.replace("/", "_");
	63	}
	64
	65	Element detailsElement = article.getElementsByClass("introduction")
	66	.first();
	67	details = "(" + date + ") ";
	68	if (detailsElement != null) {
	69	details += StringUtils.unhtml(detailsElement.text()).trim();
	70	}
	71
	72	list.add(new Story(getType(), id, title, details, intUrl, extUrl,
	73	body));
	74	}
	75
	76	return list;
	77	}
	78
	79	@Override
	80	public void fetch(Story story) throws IOException {
	81	String fullContent = story.getContent();
	82	List<Comment> comments = new ArrayList<Comment>();
	83	story.setComments(comments);
	84
	85	URL url = new URL(story.getUrlInternal());
	86	InputStream in = downloader.open(url);
	87	try {
	88	Document doc = DataUtil.load(in, "UTF-8", url.toString());
	89	Element article = doc.getElementById("content");
	90	if (article != null) {
	91	for (String line : toLines(article,
	92	new BasicElementProcessor() {
	93	@Override
	94	public boolean ignoreNode(Node node) {
	95	if ("notes".equals(node.attr("class"))) {
	96	return true;
	97	}
	98	return false;
	99	}
	100	})) {
	101	fullContent += line + "\n";
	102	}
	103
	104	// Content is too tight with a single break per line:
	105	fullContent = fullContent.replace("\n", "\n\n") //
	106	.replace("\n\n\n\n", "\n\n") //
	107	.replace("\n\n\n\n", "\n\n") //
	108	.trim();
	109	}
	110
	111	story.setFullContent(fullContent);
	112	} finally {
	113	if (in != null) {
	114	in.close();
	115	}
	116	}
	117	}
	118	}