[nikiroo-utils.git] / src / be / nikiroo / fanfix / supported / MangaFox.java

package be.nikiroo.fanfix.supported;

import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map.Entry;
import java.util.SortedMap;
import java.util.TreeMap;

import org.jsoup.helper.DataUtil;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import be.nikiroo.fanfix.Instance;
import be.nikiroo.fanfix.data.MetaData;
import be.nikiroo.utils.Image;
import be.nikiroo.utils.Progress;
import be.nikiroo.utils.StringUtils;

class MangaFox extends BasicSupport {
	@Override
	protected boolean isHtml() {
		return true;
	}

	@Override
	public String getSourceName() {
		return "MangaFox.me";
	}

	@Override
	protected MetaData getMeta() throws IOException {
		MetaData meta = new MetaData();
		Element doc = getSourceNode();

		Element title = doc.getElementById("title");
		Elements table = null;
		if (title != null) {
			table = title.getElementsByTag("table");
		}
		if (table != null) {
			// Rows: header, data
			Elements rows = table.first().getElementsByTag("tr");
			if (rows.size() > 1) {
				table = rows.get(1).getElementsByTag("td");
				// Columns: Realeased, Authors, Artists, Genres
				if (table.size() < 4) {
					table = null;
				}
			}
		}

		meta.setTitle(getTitle());
		if (table != null) {
			meta.setAuthor(getAuthors(table.get(1).text() + ","
					+ table.get(2).text()));

			meta.setDate(StringUtils.unhtml(table.get(0).text()).trim());
			meta.setTags(explode(table.get(3).text()));
		}
		meta.setSource(getSourceName());
		meta.setUrl(getSource().toString());
		meta.setPublisher(getSourceName());
		meta.setUuid(getSource().toString());
		meta.setLuid("");
		meta.setLang("en");
		meta.setSubject("manga");
		meta.setType(getType().toString());
		meta.setImageDocument(true);
		meta.setCover(getCover());

		return meta;
	}

	private String getTitle() {
		Element doc = getSourceNode();

		Element title = doc.getElementById("title");
		Element h1 = title.getElementsByTag("h1").first();
		if (h1 != null) {
			return StringUtils.unhtml(h1.text()).trim();
		}

		return null;
	}

	private String getAuthors(String authorList) {
		String author = "";
		for (String auth : explode(authorList)) {
			if (!author.isEmpty()) {
				author = author + ", ";
			}
			author += auth;
		}

		return author;
	}

	@Override
	protected String getDesc() {
		Element doc = getSourceNode();
		Element title = doc.getElementsByClass("summary").first();
		if (title != null) {
			return StringUtils.unhtml(title.text()).trim();
		}

		return null;
	}

	private Image getCover() {
		Element doc = getSourceNode();
		Element cover = doc.getElementsByClass("cover").first();
		if (cover != null) {
			cover = cover.getElementsByTag("img").first();
		}

		if (cover != null) {
			String coverUrl = cover.absUrl("src");

			InputStream coverIn;
			try {
				coverIn = openEx(coverUrl);
				try {
					return new Image(coverIn);
				} finally {
					coverIn.close();
				}
			} catch (IOException e) {
				Instance.getTraceHandler().error(e);
			}
		}

		return null;
	}

	@Override
	protected List<Entry<String, URL>> getChapters(Progress pg) {
		List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();

		String prefix = null; // each chapter starts with this prefix, then a
								// chapter number (including "x.5"), then name

		Element doc = getSourceNode();
		for (Element li : doc.getElementsByTag("li")) {
			Element el = li.getElementsByTag("h4").first();
			if (el == null) {
				el = li.getElementsByTag("h3").first();
			}
			if (el != null) {
				Element a = el.getElementsByTag("a").first();
				if (a != null) {
					String title = StringUtils.unhtml(el.text()).trim();
					try {
						String url = a.absUrl("href");
						if (url.endsWith("1.html")) {
							url = url.substring(0,
									url.length() - "1.html".length());
						}
						if (!url.endsWith("/")) {
							url += "/";
						}

						if (prefix == null || !prefix.isEmpty()) {
							StringBuilder possiblePrefix = new StringBuilder(
									StringUtils.unhtml(a.text()).trim());
							while (possiblePrefix.length() > 0) {
								char car = possiblePrefix.charAt(possiblePrefix
										.length() - 1);
								boolean punctuation = (car == '.' || car == ' ');
								boolean digit = (car >= '0' && car <= '9');
								if (!punctuation && !digit) {
									break;
								}

								possiblePrefix.setLength(possiblePrefix
										.length() - 1);
							}

							if (prefix == null) {
								prefix = possiblePrefix.toString();
							}

							if (!prefix.equalsIgnoreCase(possiblePrefix
									.toString())) {
								prefix = ""; // prefix not ok
							}
						}

						urls.add(new AbstractMap.SimpleEntry<String, URL>(
								title, new URL(url)));
					} catch (Exception e) {
						Instance.getTraceHandler().error(e);
					}
				}
			}
		}

		if (prefix != null && !prefix.isEmpty()) {
			try {
				// We found a prefix, so everything should be sortable
				SortedMap<Double, Entry<String, URL>> map = new TreeMap<Double, Entry<String, URL>>();
				for (Entry<String, URL> entry : urls) {
					String num = entry.getKey().substring(prefix.length() + 1)
							.trim();
					String name = "";
					int pos = num.indexOf(' ');
					if (pos >= 0) {
						name = num.substring(pos).trim();
						num = num.substring(0, pos).trim();
					}

					if (!name.isEmpty()) {
						name = "Tome " + num + ": " + name;
					} else {
						name = "Tome " + num;
					}

					double key = Double.parseDouble(num);

					map.put(key, new AbstractMap.SimpleEntry<String, URL>(name,
							entry.getValue()));
				}
				urls = new ArrayList<Entry<String, URL>>(map.values());
			} catch (NumberFormatException e) {
				Instance.getTraceHandler()
						.error(new IOException(
								"Cannot find a tome number, revert to default sorting",
								e));
				// by default, the chapters are in reversed order
				Collections.reverse(urls);
			}
		} else {
			// by default, the chapters are in reversed order
			Collections.reverse(urls);
		}

		return urls;
	}

	@Override
	protected String getChapterContent(URL chapUrl, int number, Progress pg)
			throws IOException {
		if (pg == null) {
			pg = new Progress();
		}

		StringBuilder builder = new StringBuilder();

		String url = chapUrl.toString();
		InputStream imageIn = null;
		Element imageDoc = null;

		// 1. find out how many images there are
		int size;
		try {
			// note: when used, the base URL can be an ad-page
			imageIn = openEx(url + "1.html");
			imageDoc = DataUtil.load(imageIn, "UTF-8", url + "1.html");
		} catch (IOException e) {
			Instance.getTraceHandler().error(
					new IOException("Cannot get image " + 1 + " of manga", e));
		} finally {
			if (imageIn != null) {
				imageIn.close();
			}
		}
		Element select = imageDoc.getElementsByClass("m").first();
		Elements options = select.getElementsByTag("option");
		size = options.size() - 1; // last is "Comments"

		pg.setMinMax(0, size);

		// 2. list them
		for (int i = 1; i <= size; i++) {
			if (i > 1) { // because first one was opened for size
				try {
					imageIn = openEx(url + i + ".html");
					imageDoc = DataUtil.load(imageIn, "UTF-8", url + i
							+ ".html");

					String linkImage = imageDoc.getElementById("image").absUrl(
							"src");
					if (linkImage != null) {
						builder.append("[");
						// to help with the retry and the originalUrl, part 1
						builder.append(withoutQuery(linkImage));
						builder.append("]<br/>");
					}

					// to help with the retry and the originalUrl, part 2
					refresh(linkImage);
				} catch (IOException e) {
					Instance.getTraceHandler().error(
							new IOException("Cannot get image " + i
									+ " of manga", e));
				} finally {
					if (imageIn != null) {
						imageIn.close();
					}
				}
			}
		}

		return builder.toString();
	}

	/**
	 * Refresh the {@link URL} by calling {@link MangaFox#openEx(String)}.
	 * 
	 * @param url
	 *            the URL to refresh
	 * 
	 * @return TRUE if it was refreshed
	 */
	private boolean refresh(String url) {
		try {
			openEx(url).close();
			return true;
		} catch (Exception e) {
			return false;
		}
	}

	/**
	 * Open the URL through the cache, but: retry a second time after 100ms if
	 * it fails, remove the query part of the {@link URL} before saving it to
	 * the cache (so it can be recalled later).
	 * 
	 * @param url
	 *            the {@link URL}
	 * 
	 * @return the resource
	 * 
	 * @throws IOException
	 *             in case of I/O error
	 */
	private InputStream openEx(String url) throws IOException {
		try {
			return Instance.getCache().open(new URL(url), this, true,
					withoutQuery(url));
		} catch (Exception e) {
			// second chance
			try {
				Thread.sleep(100);
			} catch (InterruptedException ee) {
			}

			return Instance.getCache().open(new URL(url), this, true,
					withoutQuery(url));
		}
	}

	/**
	 * Return the same input {@link URL} but without the query part.
	 * 
	 * @param url
	 *            the inpiut {@link URL} as a {@link String}
	 * 
	 * @return the input {@link URL} without query
	 */
	private URL withoutQuery(String url) {
		URL o = null;
		try {
			// Remove the query from o (originalUrl), so it can be cached
			// correctly
			o = new URL(url);
			o = new URL(o.getProtocol() + "://" + o.getHost() + o.getPath());

			return o;
		} catch (MalformedURLException e) {
			return null;
		}
	}

	/**
	 * Explode an HTML comma-separated list of values into a non-duplicate text
	 * {@link List} .
	 * 
	 * @param values
	 *            the comma-separated values in HTML format
	 * 
	 * @return the full list with no duplicate in text format
	 */
	private List<String> explode(String values) {
		List<String> list = new ArrayList<String>();
		if (values != null && !values.isEmpty()) {
			for (String auth : values.split(",")) {
				String a = StringUtils.unhtml(auth).trim();
				if (!a.isEmpty() && !list.contains(a.trim())) {
					list.add(a);
				}
			}
		}

		return list;
	}

	@Override
	protected boolean supports(URL url) {
		return "mangafox.me".equals(url.getHost())
				|| "www.mangafox.me".equals(url.getHost())
				|| "fanfox.net".equals(url.getHost())
				|| "www.fanfox.net".equals(url.getHost());
	}
}
Commit	Line	Data
	1	package be.nikiroo.fanfix.supported;
	2
	3	import java.io.IOException;
	4	import java.io.InputStream;
	5	import java.net.MalformedURLException;
	6	import java.net.URL;
	7	import java.util.AbstractMap;
	8	import java.util.ArrayList;
	9	import java.util.Collections;
	10	import java.util.List;
	11	import java.util.Map.Entry;
	12	import java.util.SortedMap;
	13	import java.util.TreeMap;
	14
	15	import org.jsoup.helper.DataUtil;
	16	import org.jsoup.nodes.Element;
	17	import org.jsoup.select.Elements;
	18
	19	import be.nikiroo.fanfix.Instance;
	20	import be.nikiroo.fanfix.data.MetaData;
	21	import be.nikiroo.utils.Image;
	22	import be.nikiroo.utils.Progress;
	23	import be.nikiroo.utils.StringUtils;
	24
	25	class MangaFox extends BasicSupport {
	26	@Override
	27	protected boolean isHtml() {
	28	return true;
	29	}
	30
	31	@Override
	32	public String getSourceName() {
	33	return "MangaFox.me";
	34	}
	35
	36	@Override
	37	protected MetaData getMeta() throws IOException {
	38	MetaData meta = new MetaData();
	39	Element doc = getSourceNode();
	40
	41	Element title = doc.getElementById("title");
	42	Elements table = null;
	43	if (title != null) {
	44	table = title.getElementsByTag("table");
	45	}
	46	if (table != null) {
	47	// Rows: header, data
	48	Elements rows = table.first().getElementsByTag("tr");
	49	if (rows.size() > 1) {
	50	table = rows.get(1).getElementsByTag("td");
	51	// Columns: Realeased, Authors, Artists, Genres
	52	if (table.size() < 4) {
	53	table = null;
	54	}
	55	}
	56	}
	57
	58	meta.setTitle(getTitle());
	59	if (table != null) {
	60	meta.setAuthor(getAuthors(table.get(1).text() + ","
	61	+ table.get(2).text()));
	62
	63	meta.setDate(StringUtils.unhtml(table.get(0).text()).trim());
	64	meta.setTags(explode(table.get(3).text()));
	65	}
	66	meta.setSource(getSourceName());
	67	meta.setUrl(getSource().toString());
	68	meta.setPublisher(getSourceName());
	69	meta.setUuid(getSource().toString());
	70	meta.setLuid("");
	71	meta.setLang("en");
	72	meta.setSubject("manga");
	73	meta.setType(getType().toString());
	74	meta.setImageDocument(true);
	75	meta.setCover(getCover());
	76
	77	return meta;
	78	}
	79
	80	private String getTitle() {
	81	Element doc = getSourceNode();
	82
	83	Element title = doc.getElementById("title");
	84	Element h1 = title.getElementsByTag("h1").first();
	85	if (h1 != null) {
	86	return StringUtils.unhtml(h1.text()).trim();
	87	}
	88
	89	return null;
	90	}
	91
	92	private String getAuthors(String authorList) {
	93	String author = "";
	94	for (String auth : explode(authorList)) {
	95	if (!author.isEmpty()) {
	96	author = author + ", ";
	97	}
	98	author += auth;
	99	}
	100
	101	return author;
	102	}
	103
	104	@Override
	105	protected String getDesc() {
	106	Element doc = getSourceNode();
	107	Element title = doc.getElementsByClass("summary").first();
	108	if (title != null) {
	109	return StringUtils.unhtml(title.text()).trim();
	110	}
	111
	112	return null;
	113	}
	114
	115	private Image getCover() {
	116	Element doc = getSourceNode();
	117	Element cover = doc.getElementsByClass("cover").first();
	118	if (cover != null) {
	119	cover = cover.getElementsByTag("img").first();
	120	}
	121
	122	if (cover != null) {
	123	String coverUrl = cover.absUrl("src");
	124
	125	InputStream coverIn;
	126	try {
	127	coverIn = openEx(coverUrl);
	128	try {
	129	return new Image(coverIn);
	130	} finally {
	131	coverIn.close();
	132	}
	133	} catch (IOException e) {
	134	Instance.getTraceHandler().error(e);
	135	}
	136	}
	137
	138	return null;
	139	}
	140
	141	@Override
	142	protected List<Entry<String, URL>> getChapters(Progress pg) {
	143	List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
	144
	145	String prefix = null; // each chapter starts with this prefix, then a
	146	// chapter number (including "x.5"), then name
	147
	148	Element doc = getSourceNode();
	149	for (Element li : doc.getElementsByTag("li")) {
	150	Element el = li.getElementsByTag("h4").first();
	151	if (el == null) {
	152	el = li.getElementsByTag("h3").first();
	153	}
	154	if (el != null) {
	155	Element a = el.getElementsByTag("a").first();
	156	if (a != null) {
	157	String title = StringUtils.unhtml(el.text()).trim();
	158	try {
	159	String url = a.absUrl("href");
	160	if (url.endsWith("1.html")) {
	161	url = url.substring(0,
	162	url.length() - "1.html".length());
	163	}
	164	if (!url.endsWith("/")) {
	165	url += "/";
	166	}
	167
	168	if (prefix == null \|\| !prefix.isEmpty()) {
	169	StringBuilder possiblePrefix = new StringBuilder(
	170	StringUtils.unhtml(a.text()).trim());
	171	while (possiblePrefix.length() > 0) {
	172	char car = possiblePrefix.charAt(possiblePrefix
	173	.length() - 1);
	174	boolean punctuation = (car == '.' \|\| car == ' ');
	175	boolean digit = (car >= '0' && car <= '9');
	176	if (!punctuation && !digit) {
	177	break;
	178	}
	179
	180	possiblePrefix.setLength(possiblePrefix
	181	.length() - 1);
	182	}
	183
	184	if (prefix == null) {
	185	prefix = possiblePrefix.toString();
	186	}
	187
	188	if (!prefix.equalsIgnoreCase(possiblePrefix
	189	.toString())) {
	190	prefix = ""; // prefix not ok
	191	}
	192	}
	193
	194	urls.add(new AbstractMap.SimpleEntry<String, URL>(
	195	title, new URL(url)));
	196	} catch (Exception e) {
	197	Instance.getTraceHandler().error(e);
	198	}
	199	}
	200	}
	201	}
	202
	203	if (prefix != null && !prefix.isEmpty()) {
	204	try {
	205	// We found a prefix, so everything should be sortable
	206	SortedMap<Double, Entry<String, URL>> map = new TreeMap<Double, Entry<String, URL>>();
	207	for (Entry<String, URL> entry : urls) {
	208	String num = entry.getKey().substring(prefix.length() + 1)
	209	.trim();
	210	String name = "";
	211	int pos = num.indexOf(' ');
	212	if (pos >= 0) {
	213	name = num.substring(pos).trim();
	214	num = num.substring(0, pos).trim();
	215	}
	216
	217	if (!name.isEmpty()) {
	218	name = "Tome " + num + ": " + name;
	219	} else {
	220	name = "Tome " + num;
	221	}
	222
	223	double key = Double.parseDouble(num);
	224
	225	map.put(key, new AbstractMap.SimpleEntry<String, URL>(name,
	226	entry.getValue()));
	227	}
	228	urls = new ArrayList<Entry<String, URL>>(map.values());
	229	} catch (NumberFormatException e) {
	230	Instance.getTraceHandler()
	231	.error(new IOException(
	232	"Cannot find a tome number, revert to default sorting",
	233	e));
	234	// by default, the chapters are in reversed order
	235	Collections.reverse(urls);
	236	}
	237	} else {
	238	// by default, the chapters are in reversed order
	239	Collections.reverse(urls);
	240	}
	241
	242	return urls;
	243	}
	244
	245	@Override
	246	protected String getChapterContent(URL chapUrl, int number, Progress pg)
	247	throws IOException {
	248	if (pg == null) {
	249	pg = new Progress();
	250	}
	251
	252	StringBuilder builder = new StringBuilder();
	253
	254	String url = chapUrl.toString();
	255	InputStream imageIn = null;
	256	Element imageDoc = null;
	257
	258	// 1. find out how many images there are
	259	int size;
	260	try {
	261	// note: when used, the base URL can be an ad-page
	262	imageIn = openEx(url + "1.html");
	263	imageDoc = DataUtil.load(imageIn, "UTF-8", url + "1.html");
	264	} catch (IOException e) {
	265	Instance.getTraceHandler().error(
	266	new IOException("Cannot get image " + 1 + " of manga", e));
	267	} finally {
	268	if (imageIn != null) {
	269	imageIn.close();
	270	}
	271	}
	272	Element select = imageDoc.getElementsByClass("m").first();
	273	Elements options = select.getElementsByTag("option");
	274	size = options.size() - 1; // last is "Comments"
	275
	276	pg.setMinMax(0, size);
	277
	278	// 2. list them
	279	for (int i = 1; i <= size; i++) {
	280	if (i > 1) { // because first one was opened for size
	281	try {
	282	imageIn = openEx(url + i + ".html");
	283	imageDoc = DataUtil.load(imageIn, "UTF-8", url + i
	284	+ ".html");
	285
	286	String linkImage = imageDoc.getElementById("image").absUrl(
	287	"src");
	288	if (linkImage != null) {
	289	builder.append("[");
	290	// to help with the retry and the originalUrl, part 1
	291	builder.append(withoutQuery(linkImage));
	292	builder.append("]<br/>");
	293	}
	294
	295	// to help with the retry and the originalUrl, part 2
	296	refresh(linkImage);
	297	} catch (IOException e) {
	298	Instance.getTraceHandler().error(
	299	new IOException("Cannot get image " + i
	300	+ " of manga", e));
	301	} finally {
	302	if (imageIn != null) {
	303	imageIn.close();
	304	}
	305	}
	306	}
	307	}
	308
	309	return builder.toString();
	310	}
	311
	312	/**
	313	* Refresh the {@link URL} by calling {@link MangaFox#openEx(String)}.
	314	*
	315	* @param url
	316	* the URL to refresh
	317	*
	318	* @return TRUE if it was refreshed
	319	*/
	320	private boolean refresh(String url) {
	321	try {
	322	openEx(url).close();
	323	return true;
	324	} catch (Exception e) {
	325	return false;
	326	}
	327	}
	328
	329	/**
	330	* Open the URL through the cache, but: retry a second time after 100ms if
	331	* it fails, remove the query part of the {@link URL} before saving it to
	332	* the cache (so it can be recalled later).
	333	*
	334	* @param url
	335	* the {@link URL}
	336	*
	337	* @return the resource
	338	*
	339	* @throws IOException
	340	* in case of I/O error
	341	*/
	342	private InputStream openEx(String url) throws IOException {
	343	try {
	344	return Instance.getCache().open(new URL(url), this, true,
	345	withoutQuery(url));
	346	} catch (Exception e) {
	347	// second chance
	348	try {
	349	Thread.sleep(100);
	350	} catch (InterruptedException ee) {
	351	}
	352
	353	return Instance.getCache().open(new URL(url), this, true,
	354	withoutQuery(url));
	355	}
	356	}
	357
	358	/**
	359	* Return the same input {@link URL} but without the query part.
	360	*
	361	* @param url
	362	* the inpiut {@link URL} as a {@link String}
	363	*
	364	* @return the input {@link URL} without query
	365	*/
	366	private URL withoutQuery(String url) {
	367	URL o = null;
	368	try {
	369	// Remove the query from o (originalUrl), so it can be cached
	370	// correctly
	371	o = new URL(url);
	372	o = new URL(o.getProtocol() + "://" + o.getHost() + o.getPath());
	373
	374	return o;
	375	} catch (MalformedURLException e) {
	376	return null;
	377	}
	378	}
	379
	380	/**
	381	* Explode an HTML comma-separated list of values into a non-duplicate text
	382	* {@link List} .
	383	*
	384	* @param values
	385	* the comma-separated values in HTML format
	386	*
	387	* @return the full list with no duplicate in text format
	388	*/
	389	private List<String> explode(String values) {
	390	List<String> list = new ArrayList<String>();
	391	if (values != null && !values.isEmpty()) {
	392	for (String auth : values.split(",")) {
	393	String a = StringUtils.unhtml(auth).trim();
	394	if (!a.isEmpty() && !list.contains(a.trim())) {
	395	list.add(a);
	396	}
	397	}
	398	}
	399
	400	return list;
	401	}
	402
	403	@Override
	404	protected boolean supports(URL url) {
	405	return "mangafox.me".equals(url.getHost())
	406	\|\| "www.mangafox.me".equals(url.getHost())
	407	\|\| "fanfox.net".equals(url.getHost())
	408	\|\| "www.fanfox.net".equals(url.getHost());
	409	}
	410	}