From: Niki Roo <niki@nikiroo.be>
Date: Fri, 3 Apr 2020 13:14:05 +0000 (+0200)
Subject: mangafox: fix mangafox, but site is too full of javascript and obvious anti-copy... 
X-Git-Url: http://git.nikiroo.be/?p=fanfix.git;a=commitdiff_plain;h=f3ce1b69a14003f0a6067e501b36a9051bceb34a

mangafox: fix mangafox, but site is too full of javascript and obvious anti-copy, will be removed soon
---

diff --git a/src/be/nikiroo/fanfix/supported/MangaFox.java b/src/be/nikiroo/fanfix/supported/MangaFox.java
index dae2d31..a9db419 100644
--- a/src/be/nikiroo/fanfix/supported/MangaFox.java
+++ b/src/be/nikiroo/fanfix/supported/MangaFox.java
@@ -9,12 +9,10 @@ import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map.Entry;
-import java.util.SortedMap;
-import java.util.TreeMap;
 
 import org.jsoup.helper.DataUtil;
+import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
-import org.jsoup.select.Elements;
 
 import be.nikiroo.fanfix.Instance;
 import be.nikiroo.fanfix.data.MetaData;
@@ -31,33 +29,12 @@ class MangaFox extends BasicSupport {
 	@Override
 	protected MetaData getMeta() throws IOException {
 		MetaData meta = new MetaData();
-		Element doc = getSourceNode();
-
-		Element title = doc.getElementById("title");
-		Elements table = null;
-		if (title != null) {
-			table = title.getElementsByTag("table");
-		}
-		if (table != null) {
-			// Rows: header, data
-			Elements rows = table.first().getElementsByTag("tr");
-			if (rows.size() > 1) {
-				table = rows.get(1).getElementsByTag("td");
-				// Columns: Realeased, Authors, Artists, Genres
-				if (table.size() < 4) {
-					table = null;
-				}
-			}
-		}
 
 		meta.setTitle(getTitle());
-		if (table != null) {
-			meta.setAuthor(getAuthors(table.get(1).text() + ","
-					+ table.get(2).text()));
-
-			meta.setDate(StringUtils.unhtml(table.get(0).text()).trim());
-			meta.setTags(explode(table.get(3).text()));
-		}
+		// No date anymore on mangafox
+		// meta.setDate();
+		meta.setAuthor(getAuthor());
+		meta.setTags(getTags());
 		meta.setSource(getType().getSourceName());
 		meta.setUrl(getSource().toString());
 		meta.setPublisher(getType().getSourceName());
@@ -75,31 +52,47 @@ class MangaFox extends BasicSupport {
 	private String getTitle() {
 		Element doc = getSourceNode();
 
-		Element title = doc.getElementById("title");
-		Element h1 = title.getElementsByTag("h1").first();
-		if (h1 != null) {
-			return StringUtils.unhtml(h1.text()).trim();
+		Element el = doc.getElementsByClass("detail-info-right-title-font").first();
+		if (el != null) {
+			return StringUtils.unhtml(el.text()).trim();
 		}
 
 		return null;
 	}
 
-	private String getAuthors(String authorList) {
-		String author = "";
-		for (String auth : explode(authorList)) {
-			if (!author.isEmpty()) {
-				author = author + ", ";
+	private String getAuthor() {
+		StringBuilder builder = new StringBuilder();
+		for (String author : getListA("detail-info-right-say")) {
+			if (builder.length() > 0)
+				builder.append(", ");
+			builder.append(author);
+		}
+
+		return builder.toString();
+	}
+
+	private List<String> getTags() {
+		return getListA("detail-info-right-tag-list");
+	}
+
+	private List<String> getListA(String uniqueClass) {
+		List<String> list = new ArrayList<String>();
+
+		Element doc = getSourceNode();
+		Element el = doc.getElementsByClass(uniqueClass).first();
+		if (el != null) {
+			for (Element valueA : el.getElementsByTag("a")) {
+				list.add(StringUtils.unhtml(valueA.text()).trim());
 			}
-			author += auth;
 		}
 
-		return author;
+		return list;
 	}
 
 	@Override
 	protected String getDesc() {
 		Element doc = getSourceNode();
-		Element title = doc.getElementsByClass("summary").first();
+		Element title = doc.getElementsByClass("fullcontent").first();
 		if (title != null) {
 			return StringUtils.unhtml(title.text()).trim();
 		}
@@ -109,11 +102,7 @@ class MangaFox extends BasicSupport {
 
 	private Image getCover() {
 		Element doc = getSourceNode();
-		Element cover = doc.getElementsByClass("cover").first();
-		if (cover != null) {
-			cover = cover.getElementsByTag("img").first();
-		}
-
+		Element cover = doc.getElementsByClass("detail-info-cover-img").first();
 		if (cover != null) {
 			String coverUrl = cover.absUrl("src");
 
@@ -137,207 +126,144 @@ class MangaFox extends BasicSupport {
 	protected List<Entry<String, URL>> getChapters(Progress pg) {
 		List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
 
-		String prefix = null; // each chapter starts with this prefix, then a
-								// chapter number (including "x.5"), then name
+		String prefix = getTitle(); // each chapter starts with this prefix, then a
+		// chapter number (including "x.5"), then name
 
+		// normally, only one list...
 		Element doc = getSourceNode();
-		for (Element li : doc.getElementsByTag("li")) {
-			Element el = li.getElementsByTag("h4").first();
-			if (el == null) {
-				el = li.getElementsByTag("h3").first();
-			}
-			if (el != null) {
-				Element a = el.getElementsByTag("a").first();
-				if (a != null) {
-					String title = StringUtils.unhtml(el.text()).trim();
-					try {
-						String url = a.absUrl("href");
-						if (url.endsWith("1.html")) {
-							url = url.substring(0,
-									url.length() - "1.html".length());
-						}
-						if (!url.endsWith("/")) {
-							url += "/";
-						}
-
-						if (prefix == null || !prefix.isEmpty()) {
-							StringBuilder possiblePrefix = new StringBuilder(
-									StringUtils.unhtml(a.text()).trim());
-							while (possiblePrefix.length() > 0) {
-								char car = possiblePrefix.charAt(possiblePrefix
-										.length() - 1);
-								boolean punctuation = (car == '.' || car == ' ');
-								boolean digit = (car >= '0' && car <= '9');
-								if (!punctuation && !digit) {
-									break;
-								}
-
-								possiblePrefix.setLength(possiblePrefix
-										.length() - 1);
-							}
-
-							if (prefix == null) {
-								prefix = possiblePrefix.toString();
-							}
-
-							if (!prefix.equalsIgnoreCase(possiblePrefix
-									.toString())) {
-								prefix = ""; // prefix not ok
-							}
-						}
-
-						urls.add(new AbstractMap.SimpleEntry<String, URL>(
-								title, new URL(url)));
-					} catch (Exception e) {
-						Instance.getTraceHandler().error(e);
-					}
+		for (Element list : doc.getElementsByClass("detail-main-list")) {
+			for (Element el : list.getElementsByTag("a")) {
+				String title = el.attr("title");
+				if (title.startsWith(prefix)) {
+					title = title.substring(prefix.length()).trim();
 				}
-			}
-		}
 
-		if (prefix != null && !prefix.isEmpty()) {
-			try {
-				// We found a prefix, so everything should be sortable
-				SortedMap<Double, Entry<String, URL>> map = new TreeMap<Double, Entry<String, URL>>();
-				for (Entry<String, URL> entry : urls) {
-					String num = entry.getKey().substring(prefix.length() + 1)
-							.trim();
-					String name = "";
-					int pos = num.indexOf(' ');
-					if (pos >= 0) {
-						name = num.substring(pos).trim();
-						num = num.substring(0, pos).trim();
-					}
-
-					if (!name.isEmpty()) {
-						name = "Tome " + num + ": " + name;
-					} else {
-						name = "Tome " + num;
-					}
-
-					double key = Double.parseDouble(num);
-
-					map.put(key, new AbstractMap.SimpleEntry<String, URL>(name,
-							entry.getValue()));
+				String url = el.absUrl("href");
+
+				try {
+					urls.add(new AbstractMap.SimpleEntry<String, URL>(title, new URL(url)));
+				} catch (Exception e) {
+					Instance.getTraceHandler().error(e);
 				}
-				urls = new ArrayList<Entry<String, URL>>(map.values());
-			} catch (NumberFormatException e) {
-				Instance.getTraceHandler()
-						.error(new IOException(
-								"Cannot find a tome number, revert to default sorting",
-								e));
-				// by default, the chapters are in reversed order
-				Collections.reverse(urls);
 			}
-		} else {
-			// by default, the chapters are in reversed order
-			Collections.reverse(urls);
 		}
 
+		// by default, the chapters are in reversed order
+		Collections.reverse(urls);
+
 		return urls;
 	}
 
 	@Override
-	protected String getChapterContent(URL chapUrl, int number, Progress pg)
-			throws IOException {
+	protected String getChapterContent(URL chapUrl, int number, Progress pg) throws IOException {
 		if (pg == null) {
 			pg = new Progress();
 		}
 
 		StringBuilder builder = new StringBuilder();
 
-		String url = chapUrl.toString();
-		InputStream imageIn = null;
-		Element imageDoc = null;
-
-		// 1. find out how many images there are
-		int size;
-		try {
-			// note: when used, the base URL can be an ad-page
-			imageIn = openEx(url + "1.html");
-			imageDoc = DataUtil.load(imageIn, "UTF-8", url + "1.html");
-		} catch (IOException e) {
-			Instance.getTraceHandler().error(
-					new IOException("Cannot get image " + 1 + " of manga", e));
-		} finally {
-			if (imageIn != null) {
-				imageIn.close();
-			}
+		Document chapDoc = DataUtil.load(Instance.getCache().open(chapUrl, this, false), "UTF-8", chapUrl.toString());
+
+		// Example of what we want:
+		// URL: http://fanfox.net/manga/solo_leveling/c110.5/1.html#ipg1
+		// IMAGE, not working:
+		// http://s.fanfox.net/store/manga/29037/110.5/compressed/s034.jpg?token=f630767b0c96f6cc793fc8f1fc177c0ae9342eb1&amp;ttl=1585929600
+		// IMAGE, working:
+		// http://s.fanfox.net/store/manga/29037/000.0/compressed/m2018110o_143554_925.jpg?token=7d74569986335d49651ef1040f7dcb9dbd559b1b&ttl=1585929600
+		// NOTE: (c110.5 -> 110.5, c000 -> 000.0)
+		// NOTE: image key: m2018110o_143554_925 can be found in the script, but not
+		// sorted
+
+		// 0. Get the javascript content
+		StringBuilder javascript = new StringBuilder();
+		for (Element script : chapDoc.getElementsByTag("script")) {
+			javascript.append(script.html());
+			javascript.append("\n");
 		}
-		Element select = imageDoc.getElementsByClass("m").first();
-		Elements options = select.getElementsByTag("option");
-		size = options.size() - 1; // last is "Comments"
 
-		pg.setMinMax(0, size);
+		// 1. Get the chapter url part
+		String chap = chapUrl.getPath();
+		chap = chap.split("#")[0];
+		if (chap.endsWith("/1.html")) {
+			chap = chap.substring(0, chap.length() - "/1.html".length());
+		}
+		int pos = chap.lastIndexOf("/");
+		chap = chap.substring(pos + 1);
+		if (!chap.contains(".")) {
+			chap = chap + ".0";
+		}
+		if (chap.startsWith("c")) {
+			chap = chap.substring(1);
+		}
 
-		// 2. list them
-		for (int i = 1; i <= size; i++) {
-			if (i > 1) { // because first one was opened for size
-				try {
-					imageIn = openEx(url + i + ".html");
-					imageDoc = DataUtil.load(imageIn, "UTF-8", url + i
-							+ ".html");
-
-					String linkImage = imageDoc.getElementById("image").absUrl(
-							"src");
-					if (linkImage != null) {
-						builder.append("[");
-						// to help with the retry and the originalUrl, part 1
-						builder.append(withoutQuery(linkImage));
-						builder.append("]<br/>");
-					}
-
-					// to help with the retry and the originalUrl, part 2
-					refresh(linkImage);
-				} catch (IOException e) {
-					Instance.getTraceHandler().error(
-							new IOException("Cannot get image " + i
-									+ " of manga", e));
-				} finally {
-					if (imageIn != null) {
-						imageIn.close();
-					}
-				}
-			}
+		// 2. Token:
+		// <meta name="og:image"
+		// content="http://fmcdn.fanfox.net/store/manga/29037/cover.jpg?token=4b2056d83973716c715f2404940822dff942a7b4&ttl=1585998000&v=1584582495"
+		Element el = chapDoc.select("meta[name=\"og:image\"]").first();
+		String token = el.attr("content").split("\\?")[1];
+
+		// 3. Comic ID
+		int comicId = getIntVar(javascript, "comicid");
+
+		// 4. Get images
+		List<String> chapKeys = getImageKeys(javascript);
+		// http://s.fanfox.net/store/manga/29037/000.0/compressed/m2018110o_143554_925.jpg?token=7d74569986335d49651ef1040f7dcb9dbd559b1b&ttl=1585929600
+		String base = "http://s.fanfox.net/store/manga/%s/%s/compressed/%s.jpg?%s";
+		for (String key : chapKeys) {
+			String img = String.format(base, comicId, chap, key, token);
+			builder.append("[");
+			builder.append(img);
+			builder.append("]<br/>");
 		}
 
 		return builder.toString();
 	}
 
-	/**
-	 * Refresh the {@link URL} by calling {@link MangaFox#openEx(String)}.
-	 * 
-	 * @param url
-	 *            the URL to refresh
-	 * 
-	 * @return TRUE if it was refreshed
-	 */
-	private boolean refresh(String url) {
-		try {
-			openEx(url).close();
-			return true;
-		} catch (Exception e) {
-			return false;
+	private int getIntVar(StringBuilder builder, String var) {
+		var = "var " + var;
+
+		int pos = builder.indexOf(var) + var.length();
+		String value = builder.subSequence(pos, pos + 20).toString();
+		value = value.split("=")[1].trim();
+		value = value.split(";")[0].trim();
+
+		return Integer.parseInt(value);
+	}
+
+	private List<String> getImageKeys(StringBuilder builder) {
+		List<String> chapKeys = new ArrayList<String>();
+
+		String start = "|compressed|";
+		String stop = ">";
+		int pos = builder.indexOf(start) + start.length();
+		int pos2 = builder.indexOf(stop, pos) - stop.length();
+
+		String data = builder.substring(pos, pos2);
+		data = data.replace("|", "'");
+		for (String key : data.split("'")) {
+			if (key.startsWith("m") && !key.equals("manga")) {
+				chapKeys.add(key);
+			}
 		}
+
+		Collections.sort(chapKeys);
+		return chapKeys;
 	}
 
 	/**
-	 * Open the URL through the cache, but: retry a second time after 100ms if
-	 * it fails, remove the query part of the {@link URL} before saving it to
-	 * the cache (so it can be recalled later).
+	 * Open the URL through the cache, but: retry a second time after 100ms if it
+	 * fails, remove the query part of the {@link URL} before saving it to the cache
+	 * (so it can be recalled later).
 	 * 
-	 * @param url
-	 *            the {@link URL}
+	 * @param url the {@link URL}
 	 * 
 	 * @return the resource
 	 * 
-	 * @throws IOException
-	 *             in case of I/O error
+	 * @throws IOException in case of I/O error
 	 */
 	private InputStream openEx(String url) throws IOException {
 		try {
-			return Instance.getCache().open(new URL(url), withoutQuery(url),
-					this, true);
+			return Instance.getCache().open(new URL(url), withoutQuery(url), this, true);
 		} catch (Exception e) {
 			// second chance
 			try {
@@ -345,16 +271,14 @@ class MangaFox extends BasicSupport {
 			} catch (InterruptedException ee) {
 			}
 
-			return Instance.getCache().open(new URL(url), withoutQuery(url),
-					this, true);
+			return Instance.getCache().open(new URL(url), withoutQuery(url), this, true);
 		}
 	}
 
 	/**
 	 * Return the same input {@link URL} but without the query part.
 	 * 
-	 * @param url
-	 *            the inpiut {@link URL} as a {@link String}
+	 * @param url the inpiut {@link URL} as a {@link String}
 	 * 
 	 * @return the input {@link URL} without query
 	 */
@@ -372,34 +296,9 @@ class MangaFox extends BasicSupport {
 		}
 	}
 
-	/**
-	 * Explode an HTML comma-separated list of values into a non-duplicate text
-	 * {@link List} .
-	 * 
-	 * @param values
-	 *            the comma-separated values in HTML format
-	 * 
-	 * @return the full list with no duplicate in text format
-	 */
-	private List<String> explode(String values) {
-		List<String> list = new ArrayList<String>();
-		if (values != null && !values.isEmpty()) {
-			for (String auth : values.split(",")) {
-				String a = StringUtils.unhtml(auth).trim();
-				if (!a.isEmpty() && !list.contains(a.trim())) {
-					list.add(a);
-				}
-			}
-		}
-
-		return list;
-	}
-
 	@Override
 	protected boolean supports(URL url) {
-		return "mangafox.me".equals(url.getHost())
-				|| "www.mangafox.me".equals(url.getHost())
-				|| "fanfox.net".equals(url.getHost())
-				|| "www.fanfox.net".equals(url.getHost());
+		return "mangafox.me".equals(url.getHost()) || "www.mangafox.me".equals(url.getHost())
+				|| "fanfox.net".equals(url.getHost()) || "www.fanfox.net".equals(url.getHost());
 	}
 }