[gofetch.git] / nikiroo / gofetch / support / Slashdot.java

package be.nikiroo.gofetch.support;

import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;

import org.jsoup.helper.DataUtil;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.select.Elements;

import be.nikiroo.gofetch.data.Comment;
import be.nikiroo.gofetch.data.Story;

/**
 * Support <a href='https://slashdot.org/'>https://slashdot.org/</a>.
 * 
 * @author niki
 */
public class Slashdot extends BasicSupport {
	@Override
	public String getDescription() {
		return "Slashdot: News for nerds, stuff that matters!";
	}

	@Override
	public List<Story> list() throws IOException {
		List<Story> list = new ArrayList<Story>();

		URL url = new URL("https://slashdot.org/");
		InputStream in = downloader.open(url);
		Document doc = DataUtil.load(in, "UTF-8", url.toString());
		Elements articles = doc.getElementsByTag("header");
		for (Element article : articles) {
			Elements titles = article.getElementsByClass("story-title");
			if (titles.size() == 0) {
				continue;
			}

			Element title = titles.get(0);

			String id = "" + title.attr("id");
			if (id.startsWith("title-")) {
				id = id.substring("title-".length());
			}

			Elements links = title.getElementsByTag("a");
			String intUrl = null;
			String extUrl = null;
			if (links.size() > 0) {
				intUrl = links.get(0).absUrl("href");
			}
			if (links.size() > 1) {
				extUrl = links.get(1).absUrl("href");
			}

			String details = "";
			Elements detailsElements = article.getElementsByClass("details");
			if (detailsElements.size() > 0) {
				details = detailsElements.get(0).text();
			}

			String body = "";
			Element bodyElement = doc.getElementById("text-" + id);
			if (bodyElement != null) {
				body = bodyElement.text();
			}

			list.add(new Story(getType(), id, title.text(), details, intUrl,
					extUrl, body));
		}

		return list;
	}

	@Override
	public void fetch(Story story) throws IOException {
		List<Comment> comments = new ArrayList<Comment>();

		URL url = new URL(story.getUrlInternal());
		InputStream in = downloader.open(url);
		Document doc = DataUtil.load(in, "UTF-8", url.toString());
		Element listing = doc.getElementById("commentlisting");
		if (listing != null) {
			comments.addAll(getComments(listing));
		}

		story.setComments(comments);
	}

	private List<Comment> getComments(Element listing) {
		List<Comment> comments = new ArrayList<Comment>();
		Comment lastComment = null;
		for (Element commentElement : listing.children()) {
			if (commentElement.hasClass("comment")) {
				if (!commentElement.hasClass("hidden")) {
					lastComment = getComment(commentElement);
					comments.add(lastComment);
				}

				List<Comment> subComments = new ArrayList<Comment>();
				for (Element child : commentElement.children()) {
					if (child.id().contains("commtree_")) {
						subComments.addAll(getComments(child));
					}
				}

				if (lastComment == null) {
					comments.addAll(subComments);
				} else {
					lastComment.addAll(subComments);
				}
			}
		}

		return comments;
	}

	/**
	 * Get a comment from the given element.
	 * 
	 * @param commentElement
	 *            the element to get the comment of.
	 * 
	 * @return the comment, <b>NOT</b> including sub-comments
	 */
	private Comment getComment(Element commentElement) {
		String title = firstOrEmpty(commentElement, "title").text();
		String author = firstOrEmpty(commentElement, "by").text();
		String date = firstOrEmpty(commentElement, "otherdetails").text();
		Element content = firstOrEmpty(commentElement, "commentBody");

		return new Comment(commentElement.id(), author, title, date,
				toLines(content));
	}

	private List<String> toLines(Element element) {
		return toLines(element, new BasicElementProcessor() {
			@Override
			public String processText(String text) {
				while (text.startsWith(">")) { // comment in one-liners
					text = text.substring(1).trim();
				}

				return text;
			}

			@Override
			public boolean detectQuote(Node node) {
				if (node instanceof Element) {
					Element elementNode = (Element) node;
					if (elementNode.tagName().equals("blockquote")
							|| elementNode.hasClass("quote")
							|| (elementNode.tagName().equals("p")
									&& elementNode.textNodes().size() == 1 && elementNode
									.textNodes().get(0).getWholeText()
									.startsWith(">"))) {
						return true;
					}
				}

				return false;
			}
		});
	}
}
Commit	Line	Data
	1	package be.nikiroo.gofetch.support;
	2
	3	import java.io.IOException;
	4	import java.io.InputStream;
	5	import java.net.URL;
	6	import java.util.ArrayList;
	7	import java.util.List;
	8
	9	import org.jsoup.helper.DataUtil;
	10	import org.jsoup.nodes.Document;
	11	import org.jsoup.nodes.Element;
	12	import org.jsoup.nodes.Node;
	13	import org.jsoup.select.Elements;
	14
	15	import be.nikiroo.gofetch.data.Comment;
	16	import be.nikiroo.gofetch.data.Story;
	17
	18	/**
	19	* Support <a href='https://slashdot.org/'>https://slashdot.org/</a>.
	20	*
	21	* @author niki
	22	*/
	23	public class Slashdot extends BasicSupport {
	24	@Override
	25	public String getDescription() {
	26	return "Slashdot: News for nerds, stuff that matters!";
	27	}
	28
	29	@Override
	30	public List<Story> list() throws IOException {
	31	List<Story> list = new ArrayList<Story>();
	32
	33	URL url = new URL("https://slashdot.org/");
	34	InputStream in = downloader.open(url);
	35	Document doc = DataUtil.load(in, "UTF-8", url.toString());
	36	Elements articles = doc.getElementsByTag("header");
	37	for (Element article : articles) {
	38	Elements titles = article.getElementsByClass("story-title");
	39	if (titles.size() == 0) {
	40	continue;
	41	}
	42
	43	Element title = titles.get(0);
	44
	45	String id = "" + title.attr("id");
	46	if (id.startsWith("title-")) {
	47	id = id.substring("title-".length());
	48	}
	49
	50	Elements links = title.getElementsByTag("a");
	51	String intUrl = null;
	52	String extUrl = null;
	53	if (links.size() > 0) {
	54	intUrl = links.get(0).absUrl("href");
	55	}
	56	if (links.size() > 1) {
	57	extUrl = links.get(1).absUrl("href");
	58	}
	59
	60	String details = "";
	61	Elements detailsElements = article.getElementsByClass("details");
	62	if (detailsElements.size() > 0) {
	63	details = detailsElements.get(0).text();
	64	}
	65
	66	String body = "";
	67	Element bodyElement = doc.getElementById("text-" + id);
	68	if (bodyElement != null) {
	69	body = bodyElement.text();
	70	}
	71
	72	list.add(new Story(getType(), id, title.text(), details, intUrl,
	73	extUrl, body));
	74	}
	75
	76	return list;
	77	}
	78
	79	@Override
	80	public void fetch(Story story) throws IOException {
	81	List<Comment> comments = new ArrayList<Comment>();
	82
	83	URL url = new URL(story.getUrlInternal());
	84	InputStream in = downloader.open(url);
	85	Document doc = DataUtil.load(in, "UTF-8", url.toString());
	86	Element listing = doc.getElementById("commentlisting");
	87	if (listing != null) {
	88	comments.addAll(getComments(listing));
	89	}
	90
	91	story.setComments(comments);
	92	}
	93
	94	private List<Comment> getComments(Element listing) {
	95	List<Comment> comments = new ArrayList<Comment>();
	96	Comment lastComment = null;
	97	for (Element commentElement : listing.children()) {
	98	if (commentElement.hasClass("comment")) {
	99	if (!commentElement.hasClass("hidden")) {
	100	lastComment = getComment(commentElement);
	101	comments.add(lastComment);
	102	}
	103
	104	List<Comment> subComments = new ArrayList<Comment>();
	105	for (Element child : commentElement.children()) {
	106	if (child.id().contains("commtree_")) {
	107	subComments.addAll(getComments(child));
	108	}
	109	}
	110
	111	if (lastComment == null) {
	112	comments.addAll(subComments);
	113	} else {
	114	lastComment.addAll(subComments);
	115	}
	116	}
	117	}
	118
	119	return comments;
	120	}
	121
	122	/**
	123	* Get a comment from the given element.
	124	*
	125	* @param commentElement
	126	* the element to get the comment of.
	127	*
	128	* @return the comment, <b>NOT</b> including sub-comments
	129	*/
	130	private Comment getComment(Element commentElement) {
	131	String title = firstOrEmpty(commentElement, "title").text();
	132	String author = firstOrEmpty(commentElement, "by").text();
	133	String date = firstOrEmpty(commentElement, "otherdetails").text();
	134	Element content = firstOrEmpty(commentElement, "commentBody");
	135
	136	return new Comment(commentElement.id(), author, title, date,
	137	toLines(content));
	138	}
	139
	140	private List<String> toLines(Element element) {
	141	return toLines(element, new BasicElementProcessor() {
	142	@Override
	143	public String processText(String text) {
	144	while (text.startsWith(">")) { // comment in one-liners
	145	text = text.substring(1).trim();
	146	}
	147
	148	return text;
	149	}
	150
	151	@Override
	152	public boolean detectQuote(Node node) {
	153	if (node instanceof Element) {
	154	Element elementNode = (Element) node;
	155	if (elementNode.tagName().equals("blockquote")
	156	\|\| elementNode.hasClass("quote")
	157	\|\| (elementNode.tagName().equals("p")
	158	&& elementNode.textNodes().size() == 1 && elementNode
	159	.textNodes().get(0).getWholeText()
	160	.startsWith(">"))) {
	161	return true;
	162	}
	163	}
	164
	165	return false;
	166	}
	167	});
	168	}
	169	}