[nikiroo-utils.git] / src / be / nikiroo / fanfix / supported / MangaFox.java

package be.nikiroo.fanfix.supported;

import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map.Entry;
import java.util.SortedMap;
import java.util.TreeMap;

import org.jsoup.helper.DataUtil;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import be.nikiroo.fanfix.Instance;
import be.nikiroo.fanfix.data.MetaData;
import be.nikiroo.utils.Image;
import be.nikiroo.utils.Progress;
import be.nikiroo.utils.StringUtils;

class MangaFox extends BasicSupport {
	@Override
	protected boolean isHtml() {
		return true;
	}

	@Override
	protected MetaData getMeta() throws IOException {
		MetaData meta = new MetaData();
		Element doc = getSourceNode();

		Element title = doc.getElementById("title");
		Elements table = null;
		if (title != null) {
			table = title.getElementsByTag("table");
		}
		if (table != null) {
			// Rows: header, data
			Elements rows = table.first().getElementsByTag("tr");
			if (rows.size() > 1) {
				table = rows.get(1).getElementsByTag("td");
				// Columns: Realeased, Authors, Artists, Genres
				if (table.size() < 4) {
					table = null;
				}
			}
		}

		meta.setTitle(getTitle());
		if (table != null) {
			meta.setAuthor(getAuthors(table.get(1).text() + ","
					+ table.get(2).text()));

			meta.setDate(StringUtils.unhtml(table.get(0).text()).trim());
			meta.setTags(explode(table.get(3).text()));
		}
		meta.setSource(getType().getSourceName());
		meta.setUrl(getSource().toString());
		meta.setPublisher(getType().getSourceName());
		meta.setUuid(getSource().toString());
		meta.setLuid("");
		meta.setLang("en");
		meta.setSubject("manga");
		meta.setType(getType().toString());
		meta.setImageDocument(true);
		meta.setCover(getCover());

		return meta;
	}

	private String getTitle() {
		Element doc = getSourceNode();

		Element title = doc.getElementById("title");
		Element h1 = title.getElementsByTag("h1").first();
		if (h1 != null) {
			return StringUtils.unhtml(h1.text()).trim();
		}

		return null;
	}

	private String getAuthors(String authorList) {
		String author = "";
		for (String auth : explode(authorList)) {
			if (!author.isEmpty()) {
				author = author + ", ";
			}
			author += auth;
		}

		return author;
	}

	@Override
	protected String getDesc() {
		Element doc = getSourceNode();
		Element title = doc.getElementsByClass("summary").first();
		if (title != null) {
			return StringUtils.unhtml(title.text()).trim();
		}

		return null;
	}

	private Image getCover() {
		Element doc = getSourceNode();
		Element cover = doc.getElementsByClass("cover").first();
		if (cover != null) {
			cover = cover.getElementsByTag("img").first();
		}

		if (cover != null) {
			String coverUrl = cover.absUrl("src");

			InputStream coverIn;
			try {
				coverIn = openEx(coverUrl);
				try {
					return new Image(coverIn);
				} finally {
					coverIn.close();
				}
			} catch (IOException e) {
				Instance.getTraceHandler().error(e);
			}
		}

		return null;
	}

	@Override
	protected List<Entry<String, URL>> getChapters(Progress pg) {
		List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();

		String prefix = null; // each chapter starts with this prefix, then a
								// chapter number (including "x.5"), then name

		Element doc = getSourceNode();
		for (Element li : doc.getElementsByTag("li")) {
			Element el = li.getElementsByTag("h4").first();
			if (el == null) {
				el = li.getElementsByTag("h3").first();
			}
			if (el != null) {
				Element a = el.getElementsByTag("a").first();
				if (a != null) {
					String title = StringUtils.unhtml(el.text()).trim();
					try {
						String url = a.absUrl("href");
						if (url.endsWith("1.html")) {
							url = url.substring(0,
									url.length() - "1.html".length());
						}
						if (!url.endsWith("/")) {
							url += "/";
						}

						if (prefix == null || !prefix.isEmpty()) {
							StringBuilder possiblePrefix = new StringBuilder(
									StringUtils.unhtml(a.text()).trim());
							while (possiblePrefix.length() > 0) {
								char car = possiblePrefix.charAt(possiblePrefix
										.length() - 1);
								boolean punctuation = (car == '.' || car == ' ');
								boolean digit = (car >= '0' && car <= '9');
								if (!punctuation && !digit) {
									break;
								}

								possiblePrefix.setLength(possiblePrefix
										.length() - 1);
							}

							if (prefix == null) {
								prefix = possiblePrefix.toString();
							}

							if (!prefix.equalsIgnoreCase(possiblePrefix
									.toString())) {
								prefix = ""; // prefix not ok
							}
						}

						urls.add(new AbstractMap.SimpleEntry<String, URL>(
								title, new URL(url)));
					} catch (Exception e) {
						Instance.getTraceHandler().error(e);
					}
				}
			}
		}

		if (prefix != null && !prefix.isEmpty()) {
			try {
				// We found a prefix, so everything should be sortable
				SortedMap<Double, Entry<String, URL>> map = new TreeMap<Double, Entry<String, URL>>();
				for (Entry<String, URL> entry : urls) {
					String num = entry.getKey().substring(prefix.length() + 1)
							.trim();
					String name = "";
					int pos = num.indexOf(' ');
					if (pos >= 0) {
						name = num.substring(pos).trim();
						num = num.substring(0, pos).trim();
					}

					if (!name.isEmpty()) {
						name = "Tome " + num + ": " + name;
					} else {
						name = "Tome " + num;
					}

					double key = Double.parseDouble(num);

					map.put(key, new AbstractMap.SimpleEntry<String, URL>(name,
							entry.getValue()));
				}
				urls = new ArrayList<Entry<String, URL>>(map.values());
			} catch (NumberFormatException e) {
				Instance.getTraceHandler()
						.error(new IOException(
								"Cannot find a tome number, revert to default sorting",
								e));
				// by default, the chapters are in reversed order
				Collections.reverse(urls);
			}
		} else {
			// by default, the chapters are in reversed order
			Collections.reverse(urls);
		}

		return urls;
	}

	@Override
	protected String getChapterContent(URL chapUrl, int number, Progress pg)
			throws IOException {
		if (pg == null) {
			pg = new Progress();
		}

		StringBuilder builder = new StringBuilder();

		String url = chapUrl.toString();
		InputStream imageIn = null;
		Element imageDoc = null;

		// 1. find out how many images there are
		int size;
		try {
			// note: when used, the base URL can be an ad-page
			imageIn = openEx(url + "1.html");
			imageDoc = DataUtil.load(imageIn, "UTF-8", url + "1.html");
		} catch (IOException e) {
			Instance.getTraceHandler().error(
					new IOException("Cannot get image " + 1 + " of manga", e));
		} finally {
			if (imageIn != null) {
				imageIn.close();
			}
		}
		Element select = imageDoc.getElementsByClass("m").first();
		Elements options = select.getElementsByTag("option");
		size = options.size() - 1; // last is "Comments"

		pg.setMinMax(0, size);

		// 2. list them
		for (int i = 1; i <= size; i++) {
			if (i > 1) { // because first one was opened for size
				try {
					imageIn = openEx(url + i + ".html");
					imageDoc = DataUtil.load(imageIn, "UTF-8", url + i
							+ ".html");

					String linkImage = imageDoc.getElementById("image").absUrl(
							"src");
					if (linkImage != null) {
						builder.append("[");
						// to help with the retry and the originalUrl, part 1
						builder.append(withoutQuery(linkImage));
						builder.append("]<br/>");
					}

					// to help with the retry and the originalUrl, part 2
					refresh(linkImage);
				} catch (IOException e) {
					Instance.getTraceHandler().error(
							new IOException("Cannot get image " + i
									+ " of manga", e));
				} finally {
					if (imageIn != null) {
						imageIn.close();
					}
				}
			}
		}

		return builder.toString();
	}

	/**
	 * Refresh the {@link URL} by calling {@link MangaFox#openEx(String)}.
	 * 
	 * @param url
	 *            the URL to refresh
	 * 
	 * @return TRUE if it was refreshed
	 */
	private boolean refresh(String url) {
		try {
			openEx(url).close();
			return true;
		} catch (Exception e) {
			return false;
		}
	}

	/**
	 * Open the URL through the cache, but: retry a second time after 100ms if
	 * it fails, remove the query part of the {@link URL} before saving it to
	 * the cache (so it can be recalled later).
	 * 
	 * @param url
	 *            the {@link URL}
	 * 
	 * @return the resource
	 * 
	 * @throws IOException
	 *             in case of I/O error
	 */
	private InputStream openEx(String url) throws IOException {
		try {
			return Instance.getCache().open(new URL(url), withoutQuery(url),
					this, true);
		} catch (Exception e) {
			// second chance
			try {
				Thread.sleep(100);
			} catch (InterruptedException ee) {
			}

			return Instance.getCache().open(new URL(url), withoutQuery(url),
					this, true);
		}
	}

	/**
	 * Return the same input {@link URL} but without the query part.
	 * 
	 * @param url
	 *            the inpiut {@link URL} as a {@link String}
	 * 
	 * @return the input {@link URL} without query
	 */
	private URL withoutQuery(String url) {
		URL o = null;
		try {
			// Remove the query from o (originalUrl), so it can be cached
			// correctly
			o = new URL(url);
			o = new URL(o.getProtocol() + "://" + o.getHost() + o.getPath());

			return o;
		} catch (MalformedURLException e) {
			return null;
		}
	}

	/**
	 * Explode an HTML comma-separated list of values into a non-duplicate text
	 * {@link List} .
	 * 
	 * @param values
	 *            the comma-separated values in HTML format
	 * 
	 * @return the full list with no duplicate in text format
	 */
	private List<String> explode(String values) {
		List<String> list = new ArrayList<String>();
		if (values != null && !values.isEmpty()) {
			for (String auth : values.split(",")) {
				String a = StringUtils.unhtml(auth).trim();
				if (!a.isEmpty() && !list.contains(a.trim())) {
					list.add(a);
				}
			}
		}

		return list;
	}

	@Override
	protected boolean supports(URL url) {
		return "mangafox.me".equals(url.getHost())
				|| "www.mangafox.me".equals(url.getHost())
				|| "fanfox.net".equals(url.getHost())
				|| "www.fanfox.net".equals(url.getHost());
	}
}
Commit	Line	Data
	1	package be.nikiroo.fanfix.supported;
	2
	3	import java.io.IOException;
	4	import java.io.InputStream;
	5	import java.net.MalformedURLException;
	6	import java.net.URL;
	7	import java.util.AbstractMap;
	8	import java.util.ArrayList;
	9	import java.util.Collections;
	10	import java.util.List;
	11	import java.util.Map.Entry;
	12	import java.util.SortedMap;
	13	import java.util.TreeMap;
	14
	15	import org.jsoup.helper.DataUtil;
	16	import org.jsoup.nodes.Element;
	17	import org.jsoup.select.Elements;
	18
	19	import be.nikiroo.fanfix.Instance;
	20	import be.nikiroo.fanfix.data.MetaData;
	21	import be.nikiroo.utils.Image;
	22	import be.nikiroo.utils.Progress;
	23	import be.nikiroo.utils.StringUtils;
	24
	25	class MangaFox extends BasicSupport {
	26	@Override
	27	protected boolean isHtml() {
	28	return true;
	29	}
	30
	31	@Override
	32	protected MetaData getMeta() throws IOException {
	33	MetaData meta = new MetaData();
	34	Element doc = getSourceNode();
	35
	36	Element title = doc.getElementById("title");
	37	Elements table = null;
	38	if (title != null) {
	39	table = title.getElementsByTag("table");
	40	}
	41	if (table != null) {
	42	// Rows: header, data
	43	Elements rows = table.first().getElementsByTag("tr");
	44	if (rows.size() > 1) {
	45	table = rows.get(1).getElementsByTag("td");
	46	// Columns: Realeased, Authors, Artists, Genres
	47	if (table.size() < 4) {
	48	table = null;
	49	}
	50	}
	51	}
	52
	53	meta.setTitle(getTitle());
	54	if (table != null) {
	55	meta.setAuthor(getAuthors(table.get(1).text() + ","
	56	+ table.get(2).text()));
	57
	58	meta.setDate(StringUtils.unhtml(table.get(0).text()).trim());
	59	meta.setTags(explode(table.get(3).text()));
	60	}
	61	meta.setSource(getType().getSourceName());
	62	meta.setUrl(getSource().toString());
	63	meta.setPublisher(getType().getSourceName());
	64	meta.setUuid(getSource().toString());
	65	meta.setLuid("");
	66	meta.setLang("en");
	67	meta.setSubject("manga");
	68	meta.setType(getType().toString());
	69	meta.setImageDocument(true);
	70	meta.setCover(getCover());
	71
	72	return meta;
	73	}
	74
	75	private String getTitle() {
	76	Element doc = getSourceNode();
	77
	78	Element title = doc.getElementById("title");
	79	Element h1 = title.getElementsByTag("h1").first();
	80	if (h1 != null) {
	81	return StringUtils.unhtml(h1.text()).trim();
	82	}
	83
	84	return null;
	85	}
	86
	87	private String getAuthors(String authorList) {
	88	String author = "";
	89	for (String auth : explode(authorList)) {
	90	if (!author.isEmpty()) {
	91	author = author + ", ";
	92	}
	93	author += auth;
	94	}
	95
	96	return author;
	97	}
	98
	99	@Override
	100	protected String getDesc() {
	101	Element doc = getSourceNode();
	102	Element title = doc.getElementsByClass("summary").first();
	103	if (title != null) {
	104	return StringUtils.unhtml(title.text()).trim();
	105	}
	106
	107	return null;
	108	}
	109
	110	private Image getCover() {
	111	Element doc = getSourceNode();
	112	Element cover = doc.getElementsByClass("cover").first();
	113	if (cover != null) {
	114	cover = cover.getElementsByTag("img").first();
	115	}
	116
	117	if (cover != null) {
	118	String coverUrl = cover.absUrl("src");
	119
	120	InputStream coverIn;
	121	try {
	122	coverIn = openEx(coverUrl);
	123	try {
	124	return new Image(coverIn);
	125	} finally {
	126	coverIn.close();
	127	}
	128	} catch (IOException e) {
	129	Instance.getTraceHandler().error(e);
	130	}
	131	}
	132
	133	return null;
	134	}
	135
	136	@Override
	137	protected List<Entry<String, URL>> getChapters(Progress pg) {
	138	List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
	139
	140	String prefix = null; // each chapter starts with this prefix, then a
	141	// chapter number (including "x.5"), then name
	142
	143	Element doc = getSourceNode();
	144	for (Element li : doc.getElementsByTag("li")) {
	145	Element el = li.getElementsByTag("h4").first();
	146	if (el == null) {
	147	el = li.getElementsByTag("h3").first();
	148	}
	149	if (el != null) {
	150	Element a = el.getElementsByTag("a").first();
	151	if (a != null) {
	152	String title = StringUtils.unhtml(el.text()).trim();
	153	try {
	154	String url = a.absUrl("href");
	155	if (url.endsWith("1.html")) {
	156	url = url.substring(0,
	157	url.length() - "1.html".length());
	158	}
	159	if (!url.endsWith("/")) {
	160	url += "/";
	161	}
	162
	163	if (prefix == null \|\| !prefix.isEmpty()) {
	164	StringBuilder possiblePrefix = new StringBuilder(
	165	StringUtils.unhtml(a.text()).trim());
	166	while (possiblePrefix.length() > 0) {
	167	char car = possiblePrefix.charAt(possiblePrefix
	168	.length() - 1);
	169	boolean punctuation = (car == '.' \|\| car == ' ');
	170	boolean digit = (car >= '0' && car <= '9');
	171	if (!punctuation && !digit) {
	172	break;
	173	}
	174
	175	possiblePrefix.setLength(possiblePrefix
	176	.length() - 1);
	177	}
	178
	179	if (prefix == null) {
	180	prefix = possiblePrefix.toString();
	181	}
	182
	183	if (!prefix.equalsIgnoreCase(possiblePrefix
	184	.toString())) {
	185	prefix = ""; // prefix not ok
	186	}
	187	}
	188
	189	urls.add(new AbstractMap.SimpleEntry<String, URL>(
	190	title, new URL(url)));
	191	} catch (Exception e) {
	192	Instance.getTraceHandler().error(e);
	193	}
	194	}
	195	}
	196	}
	197
	198	if (prefix != null && !prefix.isEmpty()) {
	199	try {
	200	// We found a prefix, so everything should be sortable
	201	SortedMap<Double, Entry<String, URL>> map = new TreeMap<Double, Entry<String, URL>>();
	202	for (Entry<String, URL> entry : urls) {
	203	String num = entry.getKey().substring(prefix.length() + 1)
	204	.trim();
	205	String name = "";
	206	int pos = num.indexOf(' ');
	207	if (pos >= 0) {
	208	name = num.substring(pos).trim();
	209	num = num.substring(0, pos).trim();
	210	}
	211
	212	if (!name.isEmpty()) {
	213	name = "Tome " + num + ": " + name;
	214	} else {
	215	name = "Tome " + num;
	216	}
	217
	218	double key = Double.parseDouble(num);
	219
	220	map.put(key, new AbstractMap.SimpleEntry<String, URL>(name,
	221	entry.getValue()));
	222	}
	223	urls = new ArrayList<Entry<String, URL>>(map.values());
	224	} catch (NumberFormatException e) {
	225	Instance.getTraceHandler()
	226	.error(new IOException(
	227	"Cannot find a tome number, revert to default sorting",
	228	e));
	229	// by default, the chapters are in reversed order
	230	Collections.reverse(urls);
	231	}
	232	} else {
	233	// by default, the chapters are in reversed order
	234	Collections.reverse(urls);
	235	}
	236
	237	return urls;
	238	}
	239
	240	@Override
	241	protected String getChapterContent(URL chapUrl, int number, Progress pg)
	242	throws IOException {
	243	if (pg == null) {
	244	pg = new Progress();
	245	}
	246
	247	StringBuilder builder = new StringBuilder();
	248
	249	String url = chapUrl.toString();
	250	InputStream imageIn = null;
	251	Element imageDoc = null;
	252
	253	// 1. find out how many images there are
	254	int size;
	255	try {
	256	// note: when used, the base URL can be an ad-page
	257	imageIn = openEx(url + "1.html");
	258	imageDoc = DataUtil.load(imageIn, "UTF-8", url + "1.html");
	259	} catch (IOException e) {
	260	Instance.getTraceHandler().error(
	261	new IOException("Cannot get image " + 1 + " of manga", e));
	262	} finally {
	263	if (imageIn != null) {
	264	imageIn.close();
	265	}
	266	}
	267	Element select = imageDoc.getElementsByClass("m").first();
	268	Elements options = select.getElementsByTag("option");
	269	size = options.size() - 1; // last is "Comments"
	270
	271	pg.setMinMax(0, size);
	272
	273	// 2. list them
	274	for (int i = 1; i <= size; i++) {
	275	if (i > 1) { // because first one was opened for size
	276	try {
	277	imageIn = openEx(url + i + ".html");
	278	imageDoc = DataUtil.load(imageIn, "UTF-8", url + i
	279	+ ".html");
	280
	281	String linkImage = imageDoc.getElementById("image").absUrl(
	282	"src");
	283	if (linkImage != null) {
	284	builder.append("[");
	285	// to help with the retry and the originalUrl, part 1
	286	builder.append(withoutQuery(linkImage));
	287	builder.append("]<br/>");
	288	}
	289
	290	// to help with the retry and the originalUrl, part 2
	291	refresh(linkImage);
	292	} catch (IOException e) {
	293	Instance.getTraceHandler().error(
	294	new IOException("Cannot get image " + i
	295	+ " of manga", e));
	296	} finally {
	297	if (imageIn != null) {
	298	imageIn.close();
	299	}
	300	}
	301	}
	302	}
	303
	304	return builder.toString();
	305	}
	306
	307	/**
	308	* Refresh the {@link URL} by calling {@link MangaFox#openEx(String)}.
	309	*
	310	* @param url
	311	* the URL to refresh
	312	*
	313	* @return TRUE if it was refreshed
	314	*/
	315	private boolean refresh(String url) {
	316	try {
	317	openEx(url).close();
	318	return true;
	319	} catch (Exception e) {
	320	return false;
	321	}
	322	}
	323
	324	/**
	325	* Open the URL through the cache, but: retry a second time after 100ms if
	326	* it fails, remove the query part of the {@link URL} before saving it to
	327	* the cache (so it can be recalled later).
	328	*
	329	* @param url
	330	* the {@link URL}
	331	*
	332	* @return the resource
	333	*
	334	* @throws IOException
	335	* in case of I/O error
	336	*/
	337	private InputStream openEx(String url) throws IOException {
	338	try {
	339	return Instance.getCache().open(new URL(url), withoutQuery(url),
	340	this, true);
	341	} catch (Exception e) {
	342	// second chance
	343	try {
	344	Thread.sleep(100);
	345	} catch (InterruptedException ee) {
	346	}
	347
	348	return Instance.getCache().open(new URL(url), withoutQuery(url),
	349	this, true);
	350	}
	351	}
	352
	353	/**
	354	* Return the same input {@link URL} but without the query part.
	355	*
	356	* @param url
	357	* the inpiut {@link URL} as a {@link String}
	358	*
	359	* @return the input {@link URL} without query
	360	*/
	361	private URL withoutQuery(String url) {
	362	URL o = null;
	363	try {
	364	// Remove the query from o (originalUrl), so it can be cached
	365	// correctly
	366	o = new URL(url);
	367	o = new URL(o.getProtocol() + "://" + o.getHost() + o.getPath());
	368
	369	return o;
	370	} catch (MalformedURLException e) {
	371	return null;
	372	}
	373	}
	374
	375	/**
	376	* Explode an HTML comma-separated list of values into a non-duplicate text
	377	* {@link List} .
	378	*
	379	* @param values
	380	* the comma-separated values in HTML format
	381	*
	382	* @return the full list with no duplicate in text format
	383	*/
	384	private List<String> explode(String values) {
	385	List<String> list = new ArrayList<String>();
	386	if (values != null && !values.isEmpty()) {
	387	for (String auth : values.split(",")) {
	388	String a = StringUtils.unhtml(auth).trim();
	389	if (!a.isEmpty() && !list.contains(a.trim())) {
	390	list.add(a);
	391	}
	392	}
	393	}
	394
	395	return list;
	396	}
	397
	398	@Override
	399	protected boolean supports(URL url) {
	400	return "mangafox.me".equals(url.getHost())
	401	\|\| "www.mangafox.me".equals(url.getHost())
	402	\|\| "fanfox.net".equals(url.getHost())
	403	\|\| "www.fanfox.net".equals(url.getHost());
	404	}
	405	}