[gofetch.git] / src / be / nikiroo / gofetch / support / TooLinux.java

package be.nikiroo.gofetch.support;

import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;

import org.jsoup.helper.DataUtil;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.select.Elements;

import be.nikiroo.gofetch.data.Comment;
import be.nikiroo.gofetch.data.Story;
import be.nikiroo.utils.StringUtils;

/**
 * Support <a href="https://www.toolinux.com/">https://www.toolinux.com/</a>.
 * 
 * @author niki
 */
public class TooLinux extends BasicSupport {
	@Override
	public String getDescription() {
		return "TooLinux: Actualité généraliste sur Linux et les logiciels libres";
	}

	@Override
	public List<Story> list() throws IOException {
		List<Story> list = new ArrayList<Story>();

		URL url = new URL("https://www.toolinux.com/");
		InputStream in = downloader.open(url);
		Document doc = DataUtil.load(in, "UTF-8", url.toString());
		Elements articles = doc.getElementsByClass("hentry");
		for (Element article : articles) {
			String id = "";
			String intUrl = "";
			String extUrl = ""; // nope
			String title = "";
			String date = "";
			String details = "";
			String body = "";
			String author = ""; // nope
			String categ = ""; // nope

			Element urlElement = article.getElementsByTag("a").first();
			if (urlElement != null) {
				intUrl = urlElement.absUrl("href");
			}

			Element titleElement = article.getElementsByClass("entry-title")
					.first();
			if (titleElement != null) {
				title = StringUtils.unhtml(titleElement.text()).trim();
			}

			Element dateElement = article.getElementsByClass("published")
					.first();
			if (dateElement != null) {
				date = StringUtils.unhtml(dateElement.text()).trim();
				id = dateElement.attr("title").trim();
			}

			if (id.isEmpty()) {
				// fallback
				id = intUrl.replace("/", "_");
			}

			Element bodyElement = article.getElementsByClass("introduction")
					.first();
			if (bodyElement != null) {
				body = StringUtils.unhtml(bodyElement.text()).trim();
			}

			list.add(new Story(getType(), id, title, author, date, categ,
					details, intUrl, extUrl, body));
		}

		return list;
	}

	@Override
	public void fetch(Story story) throws IOException {
		String fullContent = story.getContent();
		List<Comment> comments = new ArrayList<Comment>();
		story.setComments(comments);

		URL url = new URL(story.getUrlInternal());
		InputStream in = downloader.open(url);
		try {
			Document doc = DataUtil.load(in, "UTF-8", url.toString());
			Element article = doc.getElementById("content");
			if (article != null) {
				for (String line : toLines(article,
						new BasicElementProcessor() {
							@Override
							public boolean ignoreNode(Node node) {
								if ("notes".equals(node.attr("class"))) {
									return true;
								}
								return false;
							}
						})) {
					fullContent += line + "\n";
				}

				// Content is too tight with a single break per line:
				fullContent = fullContent.replace("\n", "\n\n") //
						.replace("\n\n\n\n", "\n\n") //
						.replace("\n\n\n\n", "\n\n") //
						.trim();
			}

			story.setFullContent(fullContent);
		} finally {
			if (in != null) {
				in.close();
			}
		}
	}
}
Commit	Line	Data
	1	package be.nikiroo.gofetch.support;
	2
	3	import java.io.IOException;
	4	import java.io.InputStream;
	5	import java.net.URL;
	6	import java.util.ArrayList;
	7	import java.util.List;
	8
	9	import org.jsoup.helper.DataUtil;
	10	import org.jsoup.nodes.Document;
	11	import org.jsoup.nodes.Element;
	12	import org.jsoup.nodes.Node;
	13	import org.jsoup.select.Elements;
	14
	15	import be.nikiroo.gofetch.data.Comment;
	16	import be.nikiroo.gofetch.data.Story;
	17	import be.nikiroo.utils.StringUtils;
	18
	19	/**
	20	* Support <a href="https://www.toolinux.com/">https://www.toolinux.com/</a>.
	21	*
	22	* @author niki
	23	*/
	24	public class TooLinux extends BasicSupport {
	25	@Override
	26	public String getDescription() {
	27	return "TooLinux: Actualité généraliste sur Linux et les logiciels libres";
	28	}
	29
	30	@Override
	31	public List<Story> list() throws IOException {
	32	List<Story> list = new ArrayList<Story>();
	33
	34	URL url = new URL("https://www.toolinux.com/");
	35	InputStream in = downloader.open(url);
	36	Document doc = DataUtil.load(in, "UTF-8", url.toString());
	37	Elements articles = doc.getElementsByClass("hentry");
	38	for (Element article : articles) {
	39	String id = "";
	40	String intUrl = "";
	41	String extUrl = ""; // nope
	42	String title = "";
	43	String date = "";
	44	String details = "";
	45	String body = "";
	46	String author = ""; // nope
	47	String categ = ""; // nope
	48
	49	Element urlElement = article.getElementsByTag("a").first();
	50	if (urlElement != null) {
	51	intUrl = urlElement.absUrl("href");
	52	}
	53
	54	Element titleElement = article.getElementsByClass("entry-title")
	55	.first();
	56	if (titleElement != null) {
	57	title = StringUtils.unhtml(titleElement.text()).trim();
	58	}
	59
	60	Element dateElement = article.getElementsByClass("published")
	61	.first();
	62	if (dateElement != null) {
	63	date = StringUtils.unhtml(dateElement.text()).trim();
	64	id = dateElement.attr("title").trim();
	65	}
	66
	67	if (id.isEmpty()) {
	68	// fallback
	69	id = intUrl.replace("/", "_");
	70	}
	71
	72	Element bodyElement = article.getElementsByClass("introduction")
	73	.first();
	74	if (bodyElement != null) {
	75	body = StringUtils.unhtml(bodyElement.text()).trim();
	76	}
	77
	78	list.add(new Story(getType(), id, title, author, date, categ,
	79	details, intUrl, extUrl, body));
	80	}
	81
	82	return list;
	83	}
	84
	85	@Override
	86	public void fetch(Story story) throws IOException {
	87	String fullContent = story.getContent();
	88	List<Comment> comments = new ArrayList<Comment>();
	89	story.setComments(comments);
	90
	91	URL url = new URL(story.getUrlInternal());
	92	InputStream in = downloader.open(url);
	93	try {
	94	Document doc = DataUtil.load(in, "UTF-8", url.toString());
	95	Element article = doc.getElementById("content");
	96	if (article != null) {
	97	for (String line : toLines(article,
	98	new BasicElementProcessor() {
	99	@Override
	100	public boolean ignoreNode(Node node) {
	101	if ("notes".equals(node.attr("class"))) {
	102	return true;
	103	}
	104	return false;
	105	}
	106	})) {
	107	fullContent += line + "\n";
	108	}
	109
	110	// Content is too tight with a single break per line:
	111	fullContent = fullContent.replace("\n", "\n\n") //
	112	.replace("\n\n\n\n", "\n\n") //
	113	.replace("\n\n\n\n", "\n\n") //
	114	.trim();
	115	}
	116
	117	story.setFullContent(fullContent);
	118	} finally {
	119	if (in != null) {
	120	in.close();
	121	}
	122	}
	123	}
	124	}