[gofetch.git] / src / be / nikiroo / gofetch / support / LeMonde.java

package be.nikiroo.gofetch.support;

import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;

import org.jsoup.helper.DataUtil;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.select.Elements;

import be.nikiroo.gofetch.data.Comment;
import be.nikiroo.gofetch.data.Story;

/**
 * Support <a href="http://www.lemonde.fr/">http://www.lemonde.fr/</a>.
 * 
 * @author niki
 */
public class LeMonde extends BasicSupport {
	@Override
	public String getDescription() {
		return "Le Monde: Actualités et Infos en France et dans le monde";
	}

	@Override
	public List<Story> list() throws IOException {
		List<Story> list = new ArrayList<Story>();

		for (String topic : new String[] { "international", "politique",
				"societe", "sciences" }) {
			URL url = new URL("http://www.lemonde.fr/" + topic + "/1.html");
			InputStream in = downloader.open(url);
			Document doc = DataUtil.load(in, "UTF-8", url.toString());
			Elements articles = doc.getElementsByTag("article");
			for (Element article : articles) {
				Elements times = article.getElementsByTag("time");
				Elements titleElements = article.getElementsByTag("h3");
				Elements contentElements = article.getElementsByClass("txt3");
				if (times.size() > 0 && titleElements.size() > 0
						&& contentElements.size() > 0) {
					String id = times.get(0).attr("datetime").replace(":", "_")
							.replace("+", "_");
					String title = titleElements.get(0).text();
					String date = date(titleElements.get(0).text());
					String content = contentElements.get(0).text();
					String intUrl = "";
					String extUrl = "";
					String author = "";
					String details = "";

					Elements detailsElements = article
							.getElementsByClass("signature");
					if (detailsElements.size() > 0) {
						author = detailsElements.get(0).text();
					}

					Elements links = titleElements.get(0).getElementsByTag("a");
					if (links.size() > 0) {
						intUrl = links.get(0).absUrl("href");
						list.add(new Story(getType(), id, title, author, date,
								topic, details, intUrl, extUrl, content));
					}
				}
			}
		}

		return list;
	}

	@Override
	public void fetch(Story story) throws IOException {
		String fullContent = story.getContent();
		List<Comment> comments = new ArrayList<Comment>();

		// Note: no comments on this site as far as I can see (or maybe with
		// some javascript, I need to check...)

		URL url = new URL(story.getUrlInternal());
		InputStream in = downloader.open(url);
		Document doc = DataUtil.load(in, "UTF-8", url.toString());
		Element article = doc.getElementById("articleBody");
		if (article != null) {
			for (String line : toLines(article, new BasicElementProcessor() {
				@Override
				public boolean ignoreNode(Node node) {
					if (node instanceof Element) {
						Element element = (Element) node;
						if (element.hasClass("lire")) {
							return true;
						}
					}

					return false;
				}

				@Override
				public String isSubtitle(Node node) {
					if (node instanceof Element) {
						Element element = (Element) node;
						if (element.hasClass("intertitre")) {
							return element.text();
						}
					}
					return null;
				}
			})) {
				fullContent += line + "\n";
			}

			// Content is too tight with a single break per line:
			fullContent = fullContent.replace("\n", "\n\n") //
					.replace("\n\n\n\n", "\n\n") //
					.replace("\n\n\n\n", "\n\n") //
					.trim();
		}

		story.setFullContent(fullContent);
		story.setComments(comments);
	}
}
Commit	Line	Data
	1	package be.nikiroo.gofetch.support;
	2
	3	import java.io.IOException;
	4	import java.io.InputStream;
	5	import java.net.URL;
	6	import java.util.ArrayList;
	7	import java.util.List;
	8
	9	import org.jsoup.helper.DataUtil;
	10	import org.jsoup.nodes.Document;
	11	import org.jsoup.nodes.Element;
	12	import org.jsoup.nodes.Node;
	13	import org.jsoup.select.Elements;
	14
	15	import be.nikiroo.gofetch.data.Comment;
	16	import be.nikiroo.gofetch.data.Story;
	17
	18	/**
	19	* Support <a href="http://www.lemonde.fr/">http://www.lemonde.fr/</a>.
	20	*
	21	* @author niki
	22	*/
	23	public class LeMonde extends BasicSupport {
	24	@Override
	25	public String getDescription() {
	26	return "Le Monde: Actualités et Infos en France et dans le monde";
	27	}
	28
	29	@Override
	30	public List<Story> list() throws IOException {
	31	List<Story> list = new ArrayList<Story>();
	32
	33	for (String topic : new String[] { "international", "politique",
	34	"societe", "sciences" }) {
	35	URL url = new URL("http://www.lemonde.fr/" + topic + "/1.html");
	36	InputStream in = downloader.open(url);
	37	Document doc = DataUtil.load(in, "UTF-8", url.toString());
	38	Elements articles = doc.getElementsByTag("article");
	39	for (Element article : articles) {
	40	Elements times = article.getElementsByTag("time");
	41	Elements titleElements = article.getElementsByTag("h3");
	42	Elements contentElements = article.getElementsByClass("txt3");
	43	if (times.size() > 0 && titleElements.size() > 0
	44	&& contentElements.size() > 0) {
	45	String id = times.get(0).attr("datetime").replace(":", "_")
	46	.replace("+", "_");
	47	String title = titleElements.get(0).text();
	48	String date = date(titleElements.get(0).text());
	49	String content = contentElements.get(0).text();
	50	String intUrl = "";
	51	String extUrl = "";
	52	String author = "";
	53	String details = "";
	54
	55	Elements detailsElements = article
	56	.getElementsByClass("signature");
	57	if (detailsElements.size() > 0) {
	58	author = detailsElements.get(0).text();
	59	}
	60
	61	Elements links = titleElements.get(0).getElementsByTag("a");
	62	if (links.size() > 0) {
	63	intUrl = links.get(0).absUrl("href");
	64	list.add(new Story(getType(), id, title, author, date,
	65	topic, details, intUrl, extUrl, content));
	66	}
	67	}
	68	}
	69	}
	70
	71	return list;
	72	}
	73
	74	@Override
	75	public void fetch(Story story) throws IOException {
	76	String fullContent = story.getContent();
	77	List<Comment> comments = new ArrayList<Comment>();
	78
	79	// Note: no comments on this site as far as I can see (or maybe with
	80	// some javascript, I need to check...)
	81
	82	URL url = new URL(story.getUrlInternal());
	83	InputStream in = downloader.open(url);
	84	Document doc = DataUtil.load(in, "UTF-8", url.toString());
	85	Element article = doc.getElementById("articleBody");
	86	if (article != null) {
	87	for (String line : toLines(article, new BasicElementProcessor() {
	88	@Override
	89	public boolean ignoreNode(Node node) {
	90	if (node instanceof Element) {
	91	Element element = (Element) node;
	92	if (element.hasClass("lire")) {
	93	return true;
	94	}
	95	}
	96
	97	return false;
	98	}
	99
	100	@Override
	101	public String isSubtitle(Node node) {
	102	if (node instanceof Element) {
	103	Element element = (Element) node;
	104	if (element.hasClass("intertitre")) {
	105	return element.text();
	106	}
	107	}
	108	return null;
	109	}
	110	})) {
	111	fullContent += line + "\n";
	112	}
	113
	114	// Content is too tight with a single break per line:
	115	fullContent = fullContent.replace("\n", "\n\n") //
	116	.replace("\n\n\n\n", "\n\n") //
	117	.replace("\n\n\n\n", "\n\n") //
	118	.trim();
	119	}
	120
	121	story.setFullContent(fullContent);
	122	story.setComments(comments);
	123	}
	124	}