[fanfix.git] / src / be / nikiroo / fanfix / supported / MangaFox.java

package be.nikiroo.fanfix.supported;

import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map.Entry;
import java.util.SortedMap;
import java.util.TreeMap;

import org.jsoup.helper.DataUtil;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import be.nikiroo.fanfix.Instance;
import be.nikiroo.fanfix.data.MetaData;
import be.nikiroo.utils.Image;
import be.nikiroo.utils.Progress;
import be.nikiroo.utils.StringUtils;

class MangaFox extends BasicSupport {
	@Override
	protected boolean isHtml() {
		return true;
	}

	@Override
	public String getSourceName() {
		return "MangaFox.me";
	}

	@Override
	protected MetaData getMeta() throws IOException {
		MetaData meta = new MetaData();
		Element doc = getSourceNode();

		Element title = doc.getElementById("title");
		Elements table = null;
		if (title != null) {
			table = title.getElementsByTag("table");
		}
		if (table != null) {
			// Rows: header, data
			Elements rows = table.first().getElementsByTag("tr");
			if (rows.size() > 1) {
				table = rows.get(1).getElementsByTag("td");
				// Columns: Realeased, Authors, Artists, Genres
				if (table.size() < 4) {
					table = null;
				}
			}
		}

		meta.setTitle(getTitle());
		if (table != null) {
			meta.setAuthor(getAuthors(table.get(1).text() + ","
					+ table.get(2).text()));

			meta.setDate(StringUtils.unhtml(table.get(0).text()).trim());
			meta.setTags(explode(table.get(3).text()));
		}
		meta.setSource(getSourceName());
		meta.setUrl(getSource().toString());
		meta.setPublisher(getSourceName());
		meta.setUuid(getSource().toString());
		meta.setLuid("");
		meta.setLang("en");
		meta.setSubject("manga");
		meta.setType(getType().toString());
		meta.setImageDocument(true);
		meta.setCover(getCover());

		return meta;
	}

	private String getTitle() {
		Element doc = getSourceNode();

		Element title = doc.getElementById("title");
		Element h1 = title.getElementsByTag("h1").first();
		if (h1 != null) {
			return StringUtils.unhtml(h1.text()).trim();
		}

		return null;
	}

	private String getAuthors(String authorList) {
		String author = "";
		for (String auth : explode(authorList)) {
			if (!author.isEmpty()) {
				author = author + ", ";
			}
			author += auth;
		}

		return author;
	}

	@Override
	protected String getDesc() {
		Element doc = getSourceNode();
		Element title = doc.getElementsByClass("summary").first();
		if (title != null) {
			StringUtils.unhtml(title.text()).trim();
		}

		return null;
	}

	private Image getCover() {
		Element doc = getSourceNode();
		Element cover = doc.getElementsByClass("cover").first();
		if (cover != null) {
			cover = cover.getElementsByTag("img").first();
		}

		if (cover != null) {
			String coverUrl = cover.absUrl("src");

			InputStream coverIn;
			try {
				coverIn = openEx(coverUrl);
				try {
					return new Image(coverIn);
				} finally {
					coverIn.close();
				}
			} catch (IOException e) {
				Instance.getTraceHandler().error(e);
			}
		}

		return null;
	}

	@Override
	protected List<Entry<String, URL>> getChapters(Progress pg) {
		List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();

		String prefix = null; // each chapter starts with this prefix, then a
								// chapter number (including "x.5"), then name

		Element doc = getSourceNode();
		for (Element li : doc.getElementsByTag("li")) {
			Element el = li.getElementsByTag("h4").first();
			if (el == null) {
				el = li.getElementsByTag("h3").first();
			}
			if (el != null) {
				Element a = el.getElementsByTag("a").first();
				if (a != null) {
					String title = StringUtils.unhtml(el.text()).trim();
					try {
						String url = a.absUrl("href");
						if (url.endsWith("1.html")) {
							url = url.substring(0,
									url.length() - "1.html".length());
						}
						if (!url.endsWith("/")) {
							url += "/";
						}

						if (prefix == null || !prefix.isEmpty()) {
							StringBuilder possiblePrefix = new StringBuilder(
									StringUtils.unhtml(a.text()).trim());
							while (possiblePrefix.length() > 0) {
								char car = possiblePrefix.charAt(possiblePrefix
										.length() - 1);
								boolean punctuation = (car == '.' || car == ' ');
								boolean digit = (car >= '0' && car <= '9');
								if (!punctuation && !digit) {
									break;
								}

								possiblePrefix.setLength(possiblePrefix
										.length() - 1);
							}

							if (prefix == null) {
								prefix = possiblePrefix.toString();
							}

							if (!prefix.equalsIgnoreCase(possiblePrefix
									.toString())) {
								prefix = ""; // prefix not ok
							}
						}

						urls.add(new AbstractMap.SimpleEntry<String, URL>(
								title, new URL(url)));
					} catch (Exception e) {
						Instance.getTraceHandler().error(e);
					}
				}
			}
		}

		if (prefix != null && !prefix.isEmpty()) {
			try {
				// We found a prefix, so everything should be sortable
				SortedMap<Double, Entry<String, URL>> map = new TreeMap<Double, Entry<String, URL>>();
				for (Entry<String, URL> entry : urls) {
					String num = entry.getKey().substring(prefix.length() + 1)
							.trim();
					String name = "";
					int pos = num.indexOf(' ');
					if (pos >= 0) {
						name = num.substring(pos).trim();
						num = num.substring(0, pos).trim();
					}

					if (!name.isEmpty()) {
						name = "Tome " + num + ": " + name;
					} else {
						name = "Tome " + num;
					}

					double key = Double.parseDouble(num);

					map.put(key, new AbstractMap.SimpleEntry<String, URL>(name,
							entry.getValue()));
				}
				urls = new ArrayList<Entry<String, URL>>(map.values());
			} catch (NumberFormatException e) {
				Instance.getTraceHandler()
						.error(new IOException(
								"Cannot find a tome number, revert to default sorting",
								e));
				// by default, the chapters are in reversed order
				Collections.reverse(urls);
			}
		} else {
			// by default, the chapters are in reversed order
			Collections.reverse(urls);
		}

		return urls;
	}

	@Override
	protected String getChapterContent(URL chapUrl, int number, Progress pg)
			throws IOException {
		if (pg == null) {
			pg = new Progress();
		}

		StringBuilder builder = new StringBuilder();

		String url = chapUrl.toString();
		InputStream imageIn = null;
		Element imageDoc = null;

		// 1. find out how many images there are
		int size;
		try {
			// note: when used, the base URL can be an ad-page
			imageIn = openEx(url + "1.html");
			imageDoc = DataUtil.load(imageIn, "UTF-8", url + "1.html");
		} finally {
			imageIn.close();
		}
		Element select = imageDoc.getElementsByClass("m").first();
		Elements options = select.getElementsByTag("option");
		size = options.size() - 1; // last is "Comments"

		pg.setMinMax(0, size);

		// 2. list them
		for (int i = 1; i <= size; i++) {
			if (i > 1) { // because fist one was opened for size
				try {
					imageIn = openEx(url + i + ".html");
					imageDoc = DataUtil.load(imageIn, "UTF-8", url + i
							+ ".html");
				} finally {
					imageIn.close();
				}
			}

			String linkImage = imageDoc.getElementById("image").absUrl("src");
			if (linkImage != null) {
				builder.append("[");
				// to help with the retry and the originalUrl, part 1
				builder.append(withoutQuery(linkImage));
				builder.append("]<br/>");
			}

			// to help with the retry and the originalUrl, part 2
			refresh(linkImage);
		}

		return builder.toString();
	}

	/**
	 * Refresh the {@link URL} by calling {@link MangaFox#openEx(String)}.
	 * 
	 * @param url
	 *            the URL to refresh
	 * 
	 * @return TRUE if it was refreshed
	 */
	private boolean refresh(String url) {
		try {
			openEx(url).close();
			return true;
		} catch (Exception e) {
			return false;
		}
	}

	/**
	 * Open the URL through the cache, but: retry a second time after 100ms if
	 * it fails, remove the query part of the {@link URL} before saving it to
	 * the cache (so it can be recalled later).
	 * 
	 * @param url
	 *            the {@link URL}
	 * 
	 * @return the resource
	 * 
	 * @throws IOException
	 *             in case of I/O error
	 */
	private InputStream openEx(String url) throws IOException {
		try {
			return Instance.getCache().open(new URL(url), this, true,
					withoutQuery(url));
		} catch (Exception e) {
			// second chance
			try {
				Thread.sleep(100);
			} catch (InterruptedException ee) {
			}

			return Instance.getCache().open(new URL(url), this, true,
					withoutQuery(url));
		}
	}

	/**
	 * Return the same input {@link URL} but without the query part.
	 * 
	 * @param url
	 *            the inpiut {@link URL} as a {@link String}
	 * 
	 * @return the input {@link URL} without query
	 */
	private URL withoutQuery(String url) {
		URL o = null;
		try {
			// Remove the query from o (originalUrl), so it can be cached
			// correctly
			o = new URL(url);
			o = new URL(o.getProtocol() + "://" + o.getHost() + o.getPath());

			return o;
		} catch (MalformedURLException e) {
			return null;
		}
	}

	/**
	 * Explode an HTML comma-separated list of values into a non-duplicate text
	 * {@link List} .
	 * 
	 * @param values
	 *            the comma-separated values in HTML format
	 * 
	 * @return the full list with no duplicate in text format
	 */
	private List<String> explode(String values) {
		List<String> list = new ArrayList<String>();
		if (values != null && !values.isEmpty()) {
			for (String auth : values.split(",")) {
				String a = StringUtils.unhtml(auth).trim();
				if (!a.isEmpty() && !list.contains(a.trim())) {
					list.add(a);
				}
			}
		}

		return list;
	}

	@Override
	protected boolean supports(URL url) {
		return "mangafox.me".equals(url.getHost())
				|| "www.mangafox.me".equals(url.getHost())
				|| "fanfox.net".equals(url.getHost())
				|| "www.fanfox.net".equals(url.getHost());
	}
}
Commit	Line	Data
08fe2e33 NR	1	package be.nikiroo.fanfix.supported;
	2
	3	import java.io.IOException;
	4	import java.io.InputStream;
	5	import java.net.MalformedURLException;
	6	import java.net.URL;
cb554033	7	import java.util.AbstractMap;
08fe2e33 NR	8	import java.util.ArrayList;
	9	import java.util.Collections;
	10	import java.util.List;
	11	import java.util.Map.Entry;
41c3bba7 NR	12	import java.util.SortedMap;
41c3bba7 NR	13	import java.util.TreeMap;
cb554033 NR	14
	15	import org.jsoup.helper.DataUtil;
	16	import org.jsoup.nodes.Element;
	17	import org.jsoup.select.Elements;
08fe2e33 NR	18
08fe2e33 NR	19	import be.nikiroo.fanfix.Instance;
68686a37	20	import be.nikiroo.fanfix.data.MetaData;
16a81ef7	21	import be.nikiroo.utils.Image;
ed08c171	22	import be.nikiroo.utils.Progress;
08fe2e33 NR	23	import be.nikiroo.utils.StringUtils;
08fe2e33 NR	24
cb554033	25	class MangaFox extends BasicSupport {
08fe2e33 NR	26	@Override
	27	protected boolean isHtml() {
	28	return true;
	29	}
	30
	31	@Override
	32	public String getSourceName() {
d3c15421	33	return "MangaFox.me";
08fe2e33 NR	34	}
	35
	36	@Override
cb554033	37	protected MetaData getMeta() throws IOException {
68686a37	38	MetaData meta = new MetaData();
cb554033 NR	39	Element doc = getSourceNode();
	40
	41	Element title = doc.getElementById("title");
	42	Elements table = null;
	43	if (title != null) {
	44	table = title.getElementsByTag("table");
	45	}
	46	if (table != null) {
	47	// Rows: header, data
	48	Elements rows = table.first().getElementsByTag("tr");
	49	if (rows.size() > 1) {
	50	table = rows.get(1).getElementsByTag("td");
	51	// Columns: Realeased, Authors, Artists, Genres
	52	if (table.size() < 4) {
	53	table = null;
	54	}
	55	}
	56	}
68686a37	57
cb554033 NR	58	meta.setTitle(getTitle());
	59	if (table != null) {
	60	meta.setAuthor(getAuthors(table.get(1).text() + ","
	61	+ table.get(2).text()));
	62
	63	meta.setDate(StringUtils.unhtml(table.get(0).text()).trim());
	64	meta.setTags(explode(table.get(3).text()));
	65	}
68686a37	66	meta.setSource(getSourceName());
cb554033	67	meta.setUrl(getSource().toString());
68686a37	68	meta.setPublisher(getSourceName());
cb554033	69	meta.setUuid(getSource().toString());
68686a37	70	meta.setLuid("");
276f95c6	71	meta.setLang("en");
68686a37 NR	72	meta.setSubject("manga");
	73	meta.setType(getType().toString());
	74	meta.setImageDocument(true);
cb554033	75	meta.setCover(getCover());
68686a37 NR	76
68686a37 NR	77	return meta;
08fe2e33 NR	78	}
08fe2e33 NR	79
cb554033 NR	80	private String getTitle() {
cb554033 NR	81	Element doc = getSourceNode();
08fe2e33	82
cb554033 NR	83	Element title = doc.getElementById("title");
	84	Element h1 = title.getElementsByTag("h1").first();
	85	if (h1 != null) {
	86	return StringUtils.unhtml(h1.text()).trim();
08fe2e33 NR	87	}
	88
	89	return null;
	90	}
	91
cb554033 NR	92	private String getAuthors(String authorList) {
	93	String author = "";
	94	for (String auth : explode(authorList)) {
	95	if (!author.isEmpty()) {
	96	author = author + ", ";
08fe2e33	97	}
cb554033	98	author += auth;
08fe2e33 NR	99	}
08fe2e33 NR	100
cb554033	101	return author;
08fe2e33 NR	102	}
	103
	104	@Override
cb554033 NR	105	protected String getDesc() {
	106	Element doc = getSourceNode();
	107	Element title = doc.getElementsByClass("summary").first();
	108	if (title != null) {
	109	StringUtils.unhtml(title.text()).trim();
08fe2e33 NR	110	}
	111
	112	return null;
	113	}
	114
cb554033 NR	115	private Image getCover() {
	116	Element doc = getSourceNode();
	117	Element cover = doc.getElementsByClass("cover").first();
	118	if (cover != null) {
	119	cover = cover.getElementsByTag("img").first();
08fe2e33 NR	120	}
	121
	122	if (cover != null) {
cb554033 NR	123	String coverUrl = cover.absUrl("src");
cb554033 NR	124
68686a37	125	InputStream coverIn;
08fe2e33	126	try {
cb554033	127	coverIn = openEx(coverUrl);
68686a37	128	try {
16a81ef7	129	return new Image(coverIn);
68686a37 NR	130	} finally {
	131	coverIn.close();
	132	}
	133	} catch (IOException e) {
cb554033	134	Instance.getTraceHandler().error(e);
08fe2e33 NR	135	}
	136	}
	137
	138	return null;
	139	}
	140
	141	@Override
cb554033	142	protected List<Entry<String, URL>> getChapters(Progress pg) {
08fe2e33 NR	143	List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
08fe2e33 NR	144
41c3bba7 NR	145	String prefix = null; // each chapter starts with this prefix, then a
	146	// chapter number (including "x.5"), then name
	147
cb554033 NR	148	Element doc = getSourceNode();
	149	for (Element li : doc.getElementsByTag("li")) {
	150	Element el = li.getElementsByTag("h4").first();
	151	if (el == null) {
	152	el = li.getElementsByTag("h3").first();
08fe2e33	153	}
cb554033 NR	154	if (el != null) {
	155	Element a = el.getElementsByTag("a").first();
	156	if (a != null) {
	157	String title = StringUtils.unhtml(el.text()).trim();
	158	try {
	159	String url = a.absUrl("href");
	160	if (url.endsWith("1.html")) {
	161	url = url.substring(0,
	162	url.length() - "1.html".length());
08fe2e33	163	}
cb554033 NR	164	if (!url.endsWith("/")) {
cb554033 NR	165	url += "/";
08fe2e33 NR	166	}
08fe2e33 NR	167
41c3bba7 NR	168	if (prefix == null \|\| !prefix.isEmpty()) {
	169	StringBuilder possiblePrefix = new StringBuilder(
	170	StringUtils.unhtml(a.text()).trim());
	171	while (possiblePrefix.length() > 0) {
	172	char car = possiblePrefix.charAt(possiblePrefix
	173	.length() - 1);
	174	boolean punctuation = (car == '.' \|\| car == ' ');
	175	boolean digit = (car >= '0' && car <= '9');
	176	if (!punctuation && !digit) {
	177	break;
	178	}
	179
	180	possiblePrefix.setLength(possiblePrefix
	181	.length() - 1);
	182	}
	183
	184	if (prefix == null) {
	185	prefix = possiblePrefix.toString();
	186	}
	187
	188	if (!prefix.equalsIgnoreCase(possiblePrefix
	189	.toString())) {
	190	prefix = ""; // prefix not ok
	191	}
	192	}
	193
cb554033 NR	194	urls.add(new AbstractMap.SimpleEntry<String, URL>(
	195	title, new URL(url)));
	196	} catch (Exception e) {
	197	Instance.getTraceHandler().error(e);
	198	}
08fe2e33 NR	199	}
	200	}
	201	}
	202
41c3bba7 NR	203	if (prefix != null && !prefix.isEmpty()) {
	204	try {
	205	// We found a prefix, so everything should be sortable
	206	SortedMap<Double, Entry<String, URL>> map = new TreeMap<Double, Entry<String, URL>>();
	207	for (Entry<String, URL> entry : urls) {
	208	String num = entry.getKey().substring(prefix.length() + 1)
	209	.trim();
	210	String name = "";
	211	int pos = num.indexOf(' ');
	212	if (pos >= 0) {
	213	name = num.substring(pos).trim();
	214	num = num.substring(0, pos).trim();
	215	}
	216
	217	if (!name.isEmpty()) {
	218	name = "Tome " + num + ": " + name;
	219	} else {
	220	name = "Tome " + num;
	221	}
	222
	223	double key = Double.parseDouble(num);
	224
	225	map.put(key, new AbstractMap.SimpleEntry<String, URL>(name,
	226	entry.getValue()));
	227	}
	228	urls = new ArrayList<Entry<String, URL>>(map.values());
	229	} catch (NumberFormatException e) {
	230	Instance.getTraceHandler()
	231	.error(new IOException(
	232	"Cannot find a tome number, revert to default sorting",
	233	e));
	234	// by default, the chapters are in reversed order
	235	Collections.reverse(urls);
	236	}
	237	} else {
	238	// by default, the chapters are in reversed order
	239	Collections.reverse(urls);
	240	}
08fe2e33 NR	241
	242	return urls;
	243	}
	244
	245	@Override
cb554033 NR	246	protected String getChapterContent(URL chapUrl, int number, Progress pg)
cb554033 NR	247	throws IOException {
ed08c171 NR	248	if (pg == null) {
ed08c171 NR	249	pg = new Progress();
ed08c171 NR	250	}
ed08c171 NR	251
08fe2e33	252	StringBuilder builder = new StringBuilder();
08fe2e33	253
cb554033 NR	254	String url = chapUrl.toString();
	255	InputStream imageIn = null;
	256	Element imageDoc = null;
	257
	258	// 1. find out how many images there are
	259	int size;
	260	try {
	261	// note: when used, the base URL can be an ad-page
	262	imageIn = openEx(url + "1.html");
	263	imageDoc = DataUtil.load(imageIn, "UTF-8", url + "1.html");
	264	} finally {
	265	imageIn.close();
	266	}
	267	Element select = imageDoc.getElementsByClass("m").first();
	268	Elements options = select.getElementsByTag("option");
	269	size = options.size() - 1; // last is "Comments"
	270
	271	pg.setMinMax(0, size);
	272
	273	// 2. list them
	274	for (int i = 1; i <= size; i++) {
	275	if (i > 1) { // because fist one was opened for size
	276	try {
	277	imageIn = openEx(url + i + ".html");
	278	imageDoc = DataUtil.load(imageIn, "UTF-8", url + i
	279	+ ".html");
	280	} finally {
	281	imageIn.close();
08fe2e33 NR	282	}
	283	}
	284
cb554033	285	String linkImage = imageDoc.getElementById("image").absUrl("src");
08fe2e33 NR	286	if (linkImage != null) {
	287	builder.append("[");
	288	// to help with the retry and the originalUrl, part 1
	289	builder.append(withoutQuery(linkImage));
406447a4	290	builder.append("]<br/>");
08fe2e33 NR	291	}
	292
	293	// to help with the retry and the originalUrl, part 2
	294	refresh(linkImage);
08fe2e33 NR	295	}
08fe2e33 NR	296
08fe2e33 NR	297	return builder.toString();
	298	}
	299
08fe2e33	300	/**
41c3bba7	301	* Refresh the {@link URL} by calling {@link MangaFox#openEx(String)}.
08fe2e33 NR	302	*
	303	* @param url
	304	* the URL to refresh
	305	*
	306	* @return TRUE if it was refreshed
	307	*/
	308	private boolean refresh(String url) {
	309	try {
	310	openEx(url).close();
	311	return true;
	312	} catch (Exception e) {
	313	return false;
	314	}
	315	}
	316
	317	/**
	318	* Open the URL through the cache, but: retry a second time after 100ms if
	319	* it fails, remove the query part of the {@link URL} before saving it to
	320	* the cache (so it can be recalled later).
	321	*
	322	* @param url
	323	* the {@link URL}
	324	*
	325	* @return the resource
	326	*
	327	* @throws IOException
	328	* in case of I/O error
	329	*/
	330	private InputStream openEx(String url) throws IOException {
	331	try {
	332	return Instance.getCache().open(new URL(url), this, true,
	333	withoutQuery(url));
	334	} catch (Exception e) {
	335	// second chance
	336	try {
	337	Thread.sleep(100);
	338	} catch (InterruptedException ee) {
	339	}
	340
	341	return Instance.getCache().open(new URL(url), this, true,
	342	withoutQuery(url));
	343	}
	344	}
	345
	346	/**
	347	* Return the same input {@link URL} but without the query part.
	348	*
	349	* @param url
	350	* the inpiut {@link URL} as a {@link String}
	351	*
	352	* @return the input {@link URL} without query
	353	*/
	354	private URL withoutQuery(String url) {
	355	URL o = null;
	356	try {
	357	// Remove the query from o (originalUrl), so it can be cached
	358	// correctly
	359	o = new URL(url);
	360	o = new URL(o.getProtocol() + "://" + o.getHost() + o.getPath());
	361
	362	return o;
	363	} catch (MalformedURLException e) {
	364	return null;
	365	}
366	}
cb554033 NR	367
	368	/**
	369	* Explode an HTML comma-separated list of values into a non-duplicate text
	370	* {@link List} .
	371	*
	372	* @param values
	373	* the comma-separated values in HTML format
	374	*
	375	* @return the full list with no duplicate in text format
	376	*/
	377	private List<String> explode(String values) {
	378	List<String> list = new ArrayList<String>();
	379	if (values != null && !values.isEmpty()) {
	380	for (String auth : values.split(",")) {
	381	String a = StringUtils.unhtml(auth).trim();
	382	if (!a.isEmpty() && !list.contains(a.trim())) {
	383	list.add(a);
	384	}
	385	}
	386	}
	387
	388	return list;
	389	}
	390
	391	@Override
	392	protected boolean supports(URL url) {
	393	return "mangafox.me".equals(url.getHost())
	394	\|\| "www.mangafox.me".equals(url.getHost())
	395	\|\| "fanfox.net".equals(url.getHost())
	396	\|\| "www.fanfox.net".equals(url.getHost());
	397	}
08fe2e33	398	}