[fanfix.git] / src / be / nikiroo / fanfix / supported / E621.java

package be.nikiroo.fanfix.supported;

import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.net.URLDecoder;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.LinkedList;
import java.util.List;
import java.util.Map.Entry;

import org.jsoup.helper.DataUtil;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;

import be.nikiroo.fanfix.Instance;
import be.nikiroo.fanfix.data.MetaData;
import be.nikiroo.utils.IOUtils;
import be.nikiroo.utils.Image;
import be.nikiroo.utils.Progress;
import be.nikiroo.utils.StringUtils;

/**
 * Support class for <a href="http://e621.net/">e621.net</a> and
 * <a href="http://e926.net/">e926.net</a>, a Furry website supporting comics,
 * including some of MLP.
 * <p>
 * <a href="http://e926.net/">e926.net</a> only shows the "clean" images and
 * comics, but it can be difficult to browse.
 * 
 * @author niki
 */
class E621 extends BasicSupport {
	@Override
	protected boolean supports(URL url) {
		String host = url.getHost();
		if (host.startsWith("www.")) {
			host = host.substring("www.".length());
		}

		return ("e621.net".equals(host) || "e926.net".equals(host)) && (isPool(url) || isSearchOrSet(url));
	}

	@Override
	protected boolean isHtml() {
		return true;
	}

	@Override
	protected MetaData getMeta() throws IOException {
		MetaData meta = new MetaData();

		meta.setTitle(getTitle());
		meta.setAuthor(getAuthor());
		meta.setDate("");
		meta.setTags(getTags());
		meta.setSource(getType().getSourceName());
		meta.setUrl(getSource().toString());
		meta.setPublisher(getType().getSourceName());
		meta.setUuid(getSource().toString());
		meta.setLuid("");
		meta.setLang("en");
		meta.setSubject("Furry");
		meta.setType(getType().toString());
		meta.setImageDocument(true);
		meta.setCover(getCover());
		meta.setFakeCover(true);

		return meta;
	}

	@Override
	protected String getDesc() throws IOException {
		if (isSearchOrSet(getSource())) {
			StringBuilder builder = new StringBuilder();
			builder.append("A collection of images from ").append(getSource().getHost()).append("\n") //
					.append("\tTime of creation: " + StringUtils.fromTime(new Date().getTime())).append("\n") //
					.append("\tTags: ");//
			for (String tag : getTags()) {
				builder.append("\t\t").append(tag);
			}

			return builder.toString();
		}

		if (isPool(getSource())) {
			Element el = getSourceNode().getElementById("description");
			if (el != null) {
				return el.text();
			}
		}

		return null;
	}

	@Override
	protected List<Entry<String, URL>> getChapters(Progress pg) throws IOException {
		if (isPool(getSource())) {
			String baseUrl = "https://e621.net/" + getSource().getPath() + "?page=";
			return getChapters(getSource(), pg, baseUrl, "");
		} else if (isSearchOrSet(getSource())) {
			String baseUrl = "https://e621.net/posts/?page=";
			String search = "&tags=" + getTagsFromUrl(getSource());
			return getChapters(getSource(), pg, baseUrl, search);
		}

		return new LinkedList<Entry<String, URL>>();
	}

	private List<Entry<String, URL>> getChapters(URL source, Progress pg, String baseUrl, String parameters)
			throws IOException {
		List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();

		if (source.getHost().contains("e926")) {
			baseUrl = baseUrl.replace("e621", "e926");
		}

		for (int i = 1; true; i++) {
			URL url = new URL(baseUrl + i + parameters);
			try {
				InputStream pageI = Instance.getCache().open(url, this, false);
				try {
					if (IOUtils.readSmallStream(pageI).contains("Nobody here but us chickens!")) {
						break;
					}
					urls.add(new AbstractMap.SimpleEntry<String, URL>("Page " + Integer.toString(i), url));
				} finally {
					pageI.close();
				}
			} catch (Exception e) {
				break;
			}
		}

		// They are sorted in reverse order on the website
		Collections.reverse(urls);
		return urls;
	}

	@Override
	protected String getChapterContent(URL chapUrl, int number, Progress pg) throws IOException {
		StringBuilder builder = new StringBuilder();
		Document chapterNode = loadDocument(chapUrl);
		for (Element el : chapterNode.getElementsByTag("article")) {
			builder.append("[");
			builder.append(el.attr("data-file-url"));
			builder.append("]<br/>");
		}

		return builder.toString();
	}

	@Override
	protected URL getCanonicalUrl(URL source) {
		if (isSetOriginalUrl(source)) {
			try {
				Document doc = DataUtil.load(Instance.getCache().open(source, this, false), "UTF-8", source.toString());
				for (Element shortname : doc.getElementsByClass("set-shortname")) {
					for (Element el : shortname.getElementsByTag("a")) {
						if (!el.attr("href").isEmpty())
							return new URL(el.absUrl("href"));
					}
				}
			} catch (IOException e) {
				Instance.getTraceHandler().error(e);
			}
		}

		return super.getCanonicalUrl(source);
	}

	// returns "xxx+ddd+ggg" if "tags=xxx+ddd+ggg" was present in the query
	private String getTagsFromUrl(URL url) {
		String tags = url == null ? "" : url.getQuery();
		int pos = tags.indexOf("tags=");

		if (pos >= 0) {
			tags = tags.substring(pos).substring("tags=".length());
		} else {
			return "";
		}

		pos = tags.indexOf('&');
		if (pos > 0) {
			tags = tags.substring(0, pos);
		}
		pos = tags.indexOf('/');
		if (pos > 0) {
			tags = tags.substring(0, pos);
		}

		return tags;
	}

	private String getTitle() {
		String title = "";

		Element el = getSourceNode().getElementsByTag("title").first();
		if (el != null) {
			title = el.text().trim();
		}

		for (String s : new String[] { "e621", "-", "e621" }) {
			if (title.startsWith(s)) {
				title = title.substring(s.length()).trim();
			}
			if (title.endsWith(s)) {
				title = title.substring(0, title.length() - s.length()).trim();
			}

		}

		if (isSearchOrSet(getSource())) {
			title = title.isEmpty() ? "e621" : "[e621] " + title;
		}
		return title;
	}

	private String getAuthor() throws IOException {
		StringBuilder builder = new StringBuilder();

		if (isSearchOrSet(getSource())) {
			for (Element el : getSourceNode().getElementsByClass("search-tag")) {
				if (el.attr("itemprop").equals("author")) {
					if (builder.length() > 0) {
						builder.append(", ");
					}
					builder.append(el.text().trim());
				}
			}
		}

		if (isPool(getSource())) {
			String desc = getDesc();
			String descL = desc.toLowerCase();

			if (descL.startsWith("by:") || descL.startsWith("by ")) {
				desc = desc.substring(3).trim();
				desc = desc.split("\n")[0];

				String tab[] = desc.split(" ");
				for (int i = 0; i < Math.min(tab.length, 5); i++) {
					if (tab[i].startsWith("http"))
						break;
					builder.append(" ").append(tab[i]);
				}
			}
		}

		return builder.toString();
	}

	// no tags for pools
	private List<String> getTags() {
		List<String> tags = new ArrayList<String>();
		if (isSearchOrSet(getSource())) {
			String str = getTagsFromUrl(getSource());
			for (String tag : str.split("\\+")) {
				try {
					tags.add(URLDecoder.decode(tag.trim(), "UTF-8").trim());
				} catch (UnsupportedEncodingException e) {
				}
			}
		}

		return tags;
	}

	private Image getCover() throws IOException {
		Image image = null;
		List<Entry<String, URL>> chapters = getChapters(null);
		if (!chapters.isEmpty()) {
			URL url = chapters.get(0).getValue();
			image = bsImages.getImage(this, url);
		}

		return image;
	}

	// note: will be removed at getCanonicalUrl()
	private boolean isSetOriginalUrl(URL originalUrl) {
		return originalUrl.getPath().startsWith("/post_sets/");
	}

	private boolean isPool(URL url) {
		return url.getPath().startsWith("/pools/");
	}

	// set will be renamed into search by canonical url
	private boolean isSearchOrSet(URL url) {
		return
		// search:
		(url.getPath().equals("/posts") && url.getQuery().contains("tags="))
				// or set:
				|| isSetOriginalUrl(url);
	}
}
Commit	Line	Data
	1	package be.nikiroo.fanfix.supported;
	2
	3	import java.io.IOException;
	4	import java.io.InputStream;
	5	import java.io.UnsupportedEncodingException;
	6	import java.net.URL;
	7	import java.net.URLDecoder;
	8	import java.util.AbstractMap;
	9	import java.util.ArrayList;
	10	import java.util.Collections;
	11	import java.util.Date;
	12	import java.util.LinkedList;
	13	import java.util.List;
	14	import java.util.Map.Entry;
	15
	16	import org.jsoup.helper.DataUtil;
	17	import org.jsoup.nodes.Document;
	18	import org.jsoup.nodes.Element;
	19
	20	import be.nikiroo.fanfix.Instance;
	21	import be.nikiroo.fanfix.data.MetaData;
	22	import be.nikiroo.utils.IOUtils;
	23	import be.nikiroo.utils.Image;
	24	import be.nikiroo.utils.Progress;
	25	import be.nikiroo.utils.StringUtils;
	26
	27	/**
	28	* Support class for <a href="http://e621.net/">e621.net</a> and
	29	* <a href="http://e926.net/">e926.net</a>, a Furry website supporting comics,
	30	* including some of MLP.
	31	* <p>
	32	* <a href="http://e926.net/">e926.net</a> only shows the "clean" images and
	33	* comics, but it can be difficult to browse.
	34	*
	35	* @author niki
	36	*/
	37	class E621 extends BasicSupport {
	38	@Override
	39	protected boolean supports(URL url) {
	40	String host = url.getHost();
	41	if (host.startsWith("www.")) {
	42	host = host.substring("www.".length());
	43	}
	44
	45	return ("e621.net".equals(host) \|\| "e926.net".equals(host)) && (isPool(url) \|\| isSearchOrSet(url));
	46	}
	47
	48	@Override
	49	protected boolean isHtml() {
	50	return true;
	51	}
	52
	53	@Override
	54	protected MetaData getMeta() throws IOException {
	55	MetaData meta = new MetaData();
	56
	57	meta.setTitle(getTitle());
	58	meta.setAuthor(getAuthor());
	59	meta.setDate("");
	60	meta.setTags(getTags());
	61	meta.setSource(getType().getSourceName());
	62	meta.setUrl(getSource().toString());
	63	meta.setPublisher(getType().getSourceName());
	64	meta.setUuid(getSource().toString());
	65	meta.setLuid("");
	66	meta.setLang("en");
	67	meta.setSubject("Furry");
	68	meta.setType(getType().toString());
	69	meta.setImageDocument(true);
	70	meta.setCover(getCover());
	71	meta.setFakeCover(true);
	72
	73	return meta;
	74	}
	75
	76	@Override
	77	protected String getDesc() throws IOException {
	78	if (isSearchOrSet(getSource())) {
	79	StringBuilder builder = new StringBuilder();
	80	builder.append("A collection of images from ").append(getSource().getHost()).append("\n") //
	81	.append("\tTime of creation: " + StringUtils.fromTime(new Date().getTime())).append("\n") //
	82	.append("\tTags: ");//
	83	for (String tag : getTags()) {
	84	builder.append("\t\t").append(tag);
	85	}
	86
	87	return builder.toString();
	88	}
	89
	90	if (isPool(getSource())) {
	91	Element el = getSourceNode().getElementById("description");
	92	if (el != null) {
	93	return el.text();
	94	}
	95	}
	96
	97	return null;
	98	}
	99
	100	@Override
	101	protected List<Entry<String, URL>> getChapters(Progress pg) throws IOException {
	102	if (isPool(getSource())) {
	103	String baseUrl = "https://e621.net/" + getSource().getPath() + "?page=";
	104	return getChapters(getSource(), pg, baseUrl, "");
	105	} else if (isSearchOrSet(getSource())) {
	106	String baseUrl = "https://e621.net/posts/?page=";
	107	String search = "&tags=" + getTagsFromUrl(getSource());
	108	return getChapters(getSource(), pg, baseUrl, search);
	109	}
	110
	111	return new LinkedList<Entry<String, URL>>();
	112	}
	113
	114	private List<Entry<String, URL>> getChapters(URL source, Progress pg, String baseUrl, String parameters)
	115	throws IOException {
	116	List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
	117
	118	if (source.getHost().contains("e926")) {
	119	baseUrl = baseUrl.replace("e621", "e926");
	120	}
	121
	122	for (int i = 1; true; i++) {
	123	URL url = new URL(baseUrl + i + parameters);
	124	try {
	125	InputStream pageI = Instance.getCache().open(url, this, false);
	126	try {
	127	if (IOUtils.readSmallStream(pageI).contains("Nobody here but us chickens!")) {
	128	break;
	129	}
	130	urls.add(new AbstractMap.SimpleEntry<String, URL>("Page " + Integer.toString(i), url));
	131	} finally {
	132	pageI.close();
	133	}
	134	} catch (Exception e) {
	135	break;
	136	}
	137	}
	138
	139	// They are sorted in reverse order on the website
	140	Collections.reverse(urls);
	141	return urls;
	142	}
	143
	144	@Override
	145	protected String getChapterContent(URL chapUrl, int number, Progress pg) throws IOException {
	146	StringBuilder builder = new StringBuilder();
	147	Document chapterNode = loadDocument(chapUrl);
	148	for (Element el : chapterNode.getElementsByTag("article")) {
	149	builder.append("[");
	150	builder.append(el.attr("data-file-url"));
	151	builder.append("]<br/>");
	152	}
	153
	154	return builder.toString();
	155	}
	156
	157	@Override
	158	protected URL getCanonicalUrl(URL source) {
	159	if (isSetOriginalUrl(source)) {
	160	try {
	161	Document doc = DataUtil.load(Instance.getCache().open(source, this, false), "UTF-8", source.toString());
	162	for (Element shortname : doc.getElementsByClass("set-shortname")) {
	163	for (Element el : shortname.getElementsByTag("a")) {
	164	if (!el.attr("href").isEmpty())
	165	return new URL(el.absUrl("href"));
	166	}
	167	}
	168	} catch (IOException e) {
	169	Instance.getTraceHandler().error(e);
	170	}
	171	}
	172
	173	return super.getCanonicalUrl(source);
	174	}
	175
	176	// returns "xxx+ddd+ggg" if "tags=xxx+ddd+ggg" was present in the query
	177	private String getTagsFromUrl(URL url) {
	178	String tags = url == null ? "" : url.getQuery();
	179	int pos = tags.indexOf("tags=");
	180
	181	if (pos >= 0) {
	182	tags = tags.substring(pos).substring("tags=".length());
	183	} else {
	184	return "";
	185	}
	186
	187	pos = tags.indexOf('&');
	188	if (pos > 0) {
	189	tags = tags.substring(0, pos);
	190	}
	191	pos = tags.indexOf('/');
	192	if (pos > 0) {
	193	tags = tags.substring(0, pos);
	194	}
	195
	196	return tags;
	197	}
	198
	199	private String getTitle() {
	200	String title = "";
	201
	202	Element el = getSourceNode().getElementsByTag("title").first();
	203	if (el != null) {
	204	title = el.text().trim();
	205	}
	206
	207	for (String s : new String[] { "e621", "-", "e621" }) {
	208	if (title.startsWith(s)) {
	209	title = title.substring(s.length()).trim();
	210	}
	211	if (title.endsWith(s)) {
	212	title = title.substring(0, title.length() - s.length()).trim();
	213	}
	214
	215	}
	216
	217	if (isSearchOrSet(getSource())) {
	218	title = title.isEmpty() ? "e621" : "[e621] " + title;
	219	}
	220	return title;
	221	}
	222
	223	private String getAuthor() throws IOException {
	224	StringBuilder builder = new StringBuilder();
	225
	226	if (isSearchOrSet(getSource())) {
	227	for (Element el : getSourceNode().getElementsByClass("search-tag")) {
	228	if (el.attr("itemprop").equals("author")) {
	229	if (builder.length() > 0) {
	230	builder.append(", ");
	231	}
	232	builder.append(el.text().trim());
	233	}
	234	}
	235	}
	236
	237	if (isPool(getSource())) {
	238	String desc = getDesc();
	239	String descL = desc.toLowerCase();
	240
	241	if (descL.startsWith("by:") \|\| descL.startsWith("by ")) {
	242	desc = desc.substring(3).trim();
	243	desc = desc.split("\n")[0];
	244
	245	String tab[] = desc.split(" ");
	246	for (int i = 0; i < Math.min(tab.length, 5); i++) {
	247	if (tab[i].startsWith("http"))
	248	break;
	249	builder.append(" ").append(tab[i]);
	250	}
	251	}
	252	}
	253
	254	return builder.toString();
	255	}
	256
	257	// no tags for pools
	258	private List<String> getTags() {
	259	List<String> tags = new ArrayList<String>();
	260	if (isSearchOrSet(getSource())) {
	261	String str = getTagsFromUrl(getSource());
	262	for (String tag : str.split("\\+")) {
	263	try {
	264	tags.add(URLDecoder.decode(tag.trim(), "UTF-8").trim());
	265	} catch (UnsupportedEncodingException e) {
	266	}
	267	}
	268	}
	269
	270	return tags;
	271	}
	272
	273	private Image getCover() throws IOException {
	274	Image image = null;
	275	List<Entry<String, URL>> chapters = getChapters(null);
	276	if (!chapters.isEmpty()) {
	277	URL url = chapters.get(0).getValue();
	278	image = bsImages.getImage(this, url);
	279	}
	280
	281	return image;
	282	}
	283
	284	// note: will be removed at getCanonicalUrl()
	285	private boolean isSetOriginalUrl(URL originalUrl) {
	286	return originalUrl.getPath().startsWith("/post_sets/");
	287	}
	288
	289	private boolean isPool(URL url) {
	290	return url.getPath().startsWith("/pools/");
	291	}
	292
	293	// set will be renamed into search by canonical url
	294	private boolean isSearchOrSet(URL url) {
	295	return
	296	// search:
	297	(url.getPath().equals("/posts") && url.getQuery().contains("tags="))
	298	// or set:
	299	\|\| isSetOriginalUrl(url);
	300	}
	301	}