mangafox: fix mangafox, but site is too full of javascript and obvious anti-copy...

[fanfix.git] / src / be / nikiroo / fanfix / supported / MangaFox.java
diff --git a/src/be/nikiroo/fanfix/supported/MangaFox.java b/src/be/nikiroo/fanfix/supported/MangaFox.java

index 8fc1965dfe20839fe9c406a07f12ad52c3a5527a..a9db419e803eae8cb5125e63d1af346a00978205 100644 (file)
--- a/src/be/nikiroo/fanfix/supported/MangaFox.java
+++ b/src/be/nikiroo/fanfix/supported/MangaFox.java
@@ -4,11 +4,15 @@ import java.io.IOException;
  import java.io.InputStream;
  import java.net.MalformedURLException;
  import java.net.URL;
+import java.util.AbstractMap;
  import java.util.ArrayList;
  import java.util.Collections;
  import java.util.List;
  import java.util.Map.Entry;
-import java.util.Scanner;
+
+import org.jsoup.helper.DataUtil;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
  
  import be.nikiroo.fanfix.Instance;
  import be.nikiroo.fanfix.data.MetaData;
@@ -16,180 +20,102 @@ import be.nikiroo.utils.Image;
  import be.nikiroo.utils.Progress;
  import be.nikiroo.utils.StringUtils;
  
-class MangaFox extends BasicSupport_Deprecated {
+class MangaFox extends BasicSupport {
         @Override
         protected boolean isHtml() {
                 return true;
         }
  
         @Override
-       public String getSourceName() {
-               return "MangaFox.me";
-       }
-
-       @Override
-       protected MetaData getMeta(URL source, InputStream in) throws IOException {
+       protected MetaData getMeta() throws IOException {
                 MetaData meta = new MetaData();
  
-               meta.setTitle(getTitle(reset(in)));
-               meta.setAuthor(getAuthor(reset(in)));
-               meta.setDate(getDate(reset(in)));
-               meta.setTags(getTags(reset(in)));
-               meta.setSource(getSourceName());
-               meta.setUrl(source.toString());
-               meta.setPublisher(getSourceName());
-               meta.setUuid(source.toString());
+               meta.setTitle(getTitle());
+               // No date anymore on mangafox
+               // meta.setDate();
+               meta.setAuthor(getAuthor());
+               meta.setTags(getTags());
+               meta.setSource(getType().getSourceName());
+               meta.setUrl(getSource().toString());
+               meta.setPublisher(getType().getSourceName());
+               meta.setUuid(getSource().toString());
                 meta.setLuid("");
-               meta.setLang("EN");
+               meta.setLang("en");
                 meta.setSubject("manga");
                 meta.setType(getType().toString());
                 meta.setImageDocument(true);
-               meta.setCover(getCover(reset(in)));
+               meta.setCover(getCover());
  
                 return meta;
         }
  
-       private List<String> getTags(InputStream in) {
-               List<String> tags = new ArrayList<String>();
+       private String getTitle() {
+               Element doc = getSourceNode();
  
-               String line = getLine(in, "/genres/", 0);
-               if (line != null) {
-                       line = StringUtils.unhtml(line);
-                       String[] tab = line.split(",");
-                       if (tab != null) {
-                               for (String tag : tab) {
-                                       tags.add(tag.trim());
-                               }
-                       }
-               }
-
-               return tags;
-       }
-
-       private String getTitle(InputStream in) {
-               String line = getLine(in, " property=\"og:title\"", 0);
-               if (line != null) {
-                       int pos = -1;
-                       for (int i = 0; i < 3; i++) {
-                               pos = line.indexOf('"', pos + 1);
-                       }
-
-                       if (pos >= 0) {
-                               line = line.substring(pos + 1);
-                               pos = line.indexOf('"');
-                               if (pos >= 0) {
-                                       return line.substring(0, pos);
-                               }
-                       }
+               Element el = doc.getElementsByClass("detail-info-right-title-font").first();
+               if (el != null) {
+                       return StringUtils.unhtml(el.text()).trim();
                 }
  
                 return null;
         }
  
-       private String getAuthor(InputStream in) {
-               List<String> authors = new ArrayList<String>();
-
-               String line = getLine(in, "/author/", 0, false);
-               if (line != null) {
-                       for (String ln : StringUtils.unhtml(line).split(",")) {
-                               if (ln != null && !ln.trim().isEmpty()
-                                               && !authors.contains(ln.trim())) {
-                                       authors.add(ln.trim());
-                               }
-                       }
-               }
-
-               try {
-                       in.reset();
-               } catch (IOException e) {
-                       Instance.getTraceHandler().error(e);
-               }
-
-               line = getLine(in, "/artist/", 0, false);
-               if (line != null) {
-                       for (String ln : StringUtils.unhtml(line).split(",")) {
-                               if (ln != null && !ln.trim().isEmpty()
-                                               && !authors.contains(ln.trim())) {
-                                       authors.add(ln.trim());
-                               }
-                       }
-               }
-
-               if (authors.isEmpty()) {
-                       return null;
-               }
-
+       private String getAuthor() {
                 StringBuilder builder = new StringBuilder();
-               for (String author : authors) {
-                       if (builder.length() > 0) {
+               for (String author : getListA("detail-info-right-say")) {
+                       if (builder.length() > 0)
                                 builder.append(", ");
-                       }
-
                         builder.append(author);
                 }
  
                 return builder.toString();
         }
  
-       private String getDate(InputStream in) {
-               String line = getLine(in, "/released/", 0);
-               if (line != null) {
-                       line = StringUtils.unhtml(line);
-                       return line.trim();
-               }
-
-               return null;
+       private List<String> getTags() {
+               return getListA("detail-info-right-tag-list");
         }
  
-       @Override
-       protected String getDesc(URL source, InputStream in) {
-               String line = getLine(in, " property=\"og:description\"", 0);
-               if (line != null) {
-                       int pos = -1;
-                       for (int i = 0; i < 3; i++) {
-                               pos = line.indexOf('"', pos + 1);
-                       }
+       private List<String> getListA(String uniqueClass) {
+               List<String> list = new ArrayList<String>();
  
-                       if (pos >= 0) {
-                               line = line.substring(pos + 1);
-                               pos = line.indexOf('"');
-                               if (pos >= 0) {
-                                       return line.substring(0, pos);
-                               }
+               Element doc = getSourceNode();
+               Element el = doc.getElementsByClass(uniqueClass).first();
+               if (el != null) {
+                       for (Element valueA : el.getElementsByTag("a")) {
+                               list.add(StringUtils.unhtml(valueA.text()).trim());
                         }
                 }
  
-               return null;
+               return list;
         }
  
-       private Image getCover(InputStream in) {
-               String line = getLine(in, " property=\"og:image\"", 0);
-               String cover = null;
-               if (line != null) {
-                       int pos = -1;
-                       for (int i = 0; i < 3; i++) {
-                               pos = line.indexOf('"', pos + 1);
-                       }
-
-                       if (pos >= 0) {
-                               line = line.substring(pos + 1);
-                               pos = line.indexOf('"');
-                               if (pos >= 0) {
-                                       cover = line.substring(0, pos);
-                               }
-                       }
+       @Override
+       protected String getDesc() {
+               Element doc = getSourceNode();
+               Element title = doc.getElementsByClass("fullcontent").first();
+               if (title != null) {
+                       return StringUtils.unhtml(title.text()).trim();
                 }
  
+               return null;
+       }
+
+       private Image getCover() {
+               Element doc = getSourceNode();
+               Element cover = doc.getElementsByClass("detail-info-cover-img").first();
                 if (cover != null) {
+                       String coverUrl = cover.absUrl("src");
+
                         InputStream coverIn;
                         try {
-                               coverIn = openEx(cover);
+                               coverIn = openEx(coverUrl);
                                 try {
                                         return new Image(coverIn);
                                 } finally {
                                         coverIn.close();
                                 }
                         } catch (IOException e) {
+                               Instance.getTraceHandler().error(e);
                         }
                 }
  
@@ -197,226 +123,147 @@ class MangaFox extends BasicSupport_Deprecated {
         }
  
         @Override
-       protected List<Entry<String, URL>> getChapters(URL source, InputStream in,
-                       Progress pg) {
+       protected List<Entry<String, URL>> getChapters(Progress pg) {
                 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
  
-               String volumeAt = "<h3 class=\"volume\">";
-               String linkAt = "href=\"http://mangafox.me/";
-               String endAt = "<script type=\"text/javascript\">";
-
-               boolean started = false;
-
-               @SuppressWarnings("resource")
-               Scanner scan = new Scanner(in, "UTF-8");
-               scan.useDelimiter("\\n");
-               while (scan.hasNext()) {
-                       String line = scan.next();
+               String prefix = getTitle(); // each chapter starts with this prefix, then a
+               // chapter number (including "x.5"), then name
  
-                       if (started && line.contains(endAt)) {
-                               break;
-                       } else if (!started && line.contains(volumeAt)) {
-                               started = true;
-                       }
-
-                       if (started && line.contains(linkAt)) {
-                               // Chapter content url
-                               String url = null;
-                               int pos = line.indexOf("href=\"");
-                               if (pos >= 0) {
-                                       line = line.substring(pos + "href=\"".length());
-                                       pos = line.indexOf('\"');
-                                       if (pos >= 0) {
-                                               url = line.substring(0, pos);
-                                       }
+               // normally, only one list...
+               Element doc = getSourceNode();
+               for (Element list : doc.getElementsByClass("detail-main-list")) {
+                       for (Element el : list.getElementsByTag("a")) {
+                               String title = el.attr("title");
+                               if (title.startsWith(prefix)) {
+                                       title = title.substring(prefix.length()).trim();
                                 }
  
-                               // Chapter name
-                               String name = null;
-                               if (scan.hasNext()) {
-                                       name = StringUtils.unhtml(scan.next()).trim();
-                                       // Remove the "new" tag if present
-                                       if (name.endsWith("new")) {
-                                               name = name.substring(0, name.length() - 3).trim();
-                                       }
-                               }
+                               String url = el.absUrl("href");
  
                                 try {
-                                       final String key = name;
-                                       final URL value = new URL(url);
-                                       urls.add(new Entry<String, URL>() {
-                                               @Override
-                                               public URL setValue(URL value) {
-                                                       return null;
-                                               }
-
-                                               @Override
-                                               public String getKey() {
-                                                       return key;
-                                               }
-
-                                               @Override
-                                               public URL getValue() {
-                                                       return value;
-                                               }
-                                       });
-                               } catch (MalformedURLException e) {
+                                       urls.add(new AbstractMap.SimpleEntry<String, URL>(title, new URL(url)));
+                               } catch (Exception e) {
                                         Instance.getTraceHandler().error(e);
                                 }
                         }
                 }
  
-               if (pg == null) {
-                       pg = new Progress(0, urls.size());
-               } else {
-                       pg.setMinMax(0, urls.size());
-               }
-
-               int i = 1;
-               for (Entry<String, URL> entry : urls) {
-                       // to help with the retry and the originalUrl
-                       refresh(entry.getValue().toString());
-                       pg.setProgress(i++);
-               }
-
-               // the chapters are in reversed order
+               // by default, the chapters are in reversed order
                 Collections.reverse(urls);
  
                 return urls;
         }
  
         @Override
-       protected String getChapterContent(URL source, InputStream in, int number,
-                       Progress pg) {
+       protected String getChapterContent(URL chapUrl, int number, Progress pg) throws IOException {
                 if (pg == null) {
                         pg = new Progress();
-               } else {
-                       // Since we have no idea how many images we have, we cycle from 0
-                       // to max, then again, then again...
-                       pg.setMinMax(0, 20);
                 }
  
                 StringBuilder builder = new StringBuilder();
-               String base = getCurrentReferer().toString();
-               int pos = base.lastIndexOf('/');
-               base = base.substring(0, pos + 1); // including the '/' at the end
-
-               int i = 1;
-               boolean close = false;
-               while (in != null) {
-                       String linkNextLine = getLine(in, "return enlarge()", 0);
-                       try {
-                               in.reset();
-                       } catch (IOException e) {
-                               Instance.getTraceHandler().error(e);
-                       }
  
-                       String linkImageLine = getLine(in, "return enlarge()", 1);
-                       String linkNext = null;
-                       String linkImage = null;
-                       pos = linkNextLine.indexOf("href=\"");
-                       if (pos >= 0) {
-                               linkNextLine = linkNextLine.substring(pos + "href=\"".length());
-                               pos = linkNextLine.indexOf('\"');
-                               if (pos >= 0) {
-                                       linkNext = linkNextLine.substring(0, pos);
-                               }
-                       }
-                       pos = linkImageLine.indexOf("src=\"");
-                       if (pos >= 0) {
-                               linkImageLine = linkImageLine
-                                               .substring(pos + "src=\"".length());
-                               pos = linkImageLine.indexOf('\"');
-                               if (pos >= 0) {
-                                       linkImage = linkImageLine.substring(0, pos);
-                               }
-                       }
-
-                       if (linkImage != null) {
-                               builder.append("[");
-                               // to help with the retry and the originalUrl, part 1
-                               builder.append(withoutQuery(linkImage));
-                               builder.append("]<br/>");
-                       }
-
-                       // to help with the retry and the originalUrl, part 2
-                       refresh(linkImage);
-                       pg.setProgress((i++) % pg.getMax());
-
-                       if (close) {
-                               try {
-                                       in.close();
-                               } catch (IOException e) {
-                                       Instance.getTraceHandler().error(e);
-                               }
-                       }
+               Document chapDoc = DataUtil.load(Instance.getCache().open(chapUrl, this, false), "UTF-8", chapUrl.toString());
+
+               // Example of what we want:
+               // URL: http://fanfox.net/manga/solo_leveling/c110.5/1.html#ipg1
+               // IMAGE, not working:
+               // http://s.fanfox.net/store/manga/29037/110.5/compressed/s034.jpg?token=f630767b0c96f6cc793fc8f1fc177c0ae9342eb1&amp;ttl=1585929600
+               // IMAGE, working:
+               // http://s.fanfox.net/store/manga/29037/000.0/compressed/m2018110o_143554_925.jpg?token=7d74569986335d49651ef1040f7dcb9dbd559b1b&ttl=1585929600
+               // NOTE: (c110.5 -> 110.5, c000 -> 000.0)
+               // NOTE: image key: m2018110o_143554_925 can be found in the script, but not
+               // sorted
+
+               // 0. Get the javascript content
+               StringBuilder javascript = new StringBuilder();
+               for (Element script : chapDoc.getElementsByTag("script")) {
+                       javascript.append(script.html());
+                       javascript.append("\n");
+               }
  
-                       in = null;
-                       if (linkNext != null && !"javascript:void(0);".equals(linkNext)) {
-                               URL url;
-                               try {
-                                       url = new URL(base + linkNext);
-                                       in = openEx(base + linkNext);
-                                       setCurrentReferer(url);
-                                       pg.setProgress((i++) % pg.getMax());
-                               } catch (IOException e) {
-                                       Instance.getTraceHandler().error(
-                                                       new IOException(
-                                                                       "Cannot get the next manga page which is: "
-                                                                                       + linkNext, e));
-                               }
-                       }
+               // 1. Get the chapter url part
+               String chap = chapUrl.getPath();
+               chap = chap.split("#")[0];
+               if (chap.endsWith("/1.html")) {
+                       chap = chap.substring(0, chap.length() - "/1.html".length());
+               }
+               int pos = chap.lastIndexOf("/");
+               chap = chap.substring(pos + 1);
+               if (!chap.contains(".")) {
+                       chap = chap + ".0";
+               }
+               if (chap.startsWith("c")) {
+                       chap = chap.substring(1);
+               }
  
-                       close = true;
+               // 2. Token:
+               // <meta name="og:image"
+               // content="http://fmcdn.fanfox.net/store/manga/29037/cover.jpg?token=4b2056d83973716c715f2404940822dff942a7b4&ttl=1585998000&v=1584582495"
+               Element el = chapDoc.select("meta[name=\"og:image\"]").first();
+               String token = el.attr("content").split("\\?")[1];
+
+               // 3. Comic ID
+               int comicId = getIntVar(javascript, "comicid");
+
+               // 4. Get images
+               List<String> chapKeys = getImageKeys(javascript);
+               // http://s.fanfox.net/store/manga/29037/000.0/compressed/m2018110o_143554_925.jpg?token=7d74569986335d49651ef1040f7dcb9dbd559b1b&ttl=1585929600
+               String base = "http://s.fanfox.net/store/manga/%s/%s/compressed/%s.jpg?%s";
+               for (String key : chapKeys) {
+                       String img = String.format(base, comicId, chap, key, token);
+                       builder.append("[");
+                       builder.append(img);
+                       builder.append("]<br/>");
                 }
  
-               setCurrentReferer(source);
                 return builder.toString();
         }
  
-       @Override
-       protected boolean supports(URL url) {
-               // Broken code (see MangaFoxNew)
-               if (true)
-                       return false;
-               return "mangafox.me".equals(url.getHost())
-                               || "www.mangafox.me".equals(url.getHost());
+       private int getIntVar(StringBuilder builder, String var) {
+               var = "var " + var;
+
+               int pos = builder.indexOf(var) + var.length();
+               String value = builder.subSequence(pos, pos + 20).toString();
+               value = value.split("=")[1].trim();
+               value = value.split(";")[0].trim();
+
+               return Integer.parseInt(value);
         }
  
-       /**
-        * Refresh the {@link URL} by calling {@link MangaFox#openEx(String)}.
-        * 
-        * @param url
-        *            the URL to refresh
-        * 
-        * @return TRUE if it was refreshed
-        */
-       private boolean refresh(String url) {
-               try {
-                       openEx(url).close();
-                       return true;
-               } catch (Exception e) {
-                       return false;
+       private List<String> getImageKeys(StringBuilder builder) {
+               List<String> chapKeys = new ArrayList<String>();
+
+               String start = "|compressed|";
+               String stop = ">";
+               int pos = builder.indexOf(start) + start.length();
+               int pos2 = builder.indexOf(stop, pos) - stop.length();
+
+               String data = builder.substring(pos, pos2);
+               data = data.replace("|", "'");
+               for (String key : data.split("'")) {
+                       if (key.startsWith("m") && !key.equals("manga")) {
+                               chapKeys.add(key);
+                       }
                 }
+
+               Collections.sort(chapKeys);
+               return chapKeys;
         }
  
         /**
-        * Open the URL through the cache, but: retry a second time after 100ms if
-        * it fails, remove the query part of the {@link URL} before saving it to
-        * the cache (so it can be recalled later).
+        * Open the URL through the cache, but: retry a second time after 100ms if it
+        * fails, remove the query part of the {@link URL} before saving it to the cache
+        * (so it can be recalled later).
          * 
-        * @param url
-        *            the {@link URL}
+        * @param url the {@link URL}
          * 
          * @return the resource
          * 
-        * @throws IOException
-        *             in case of I/O error
+        * @throws IOException in case of I/O error
          */
         private InputStream openEx(String url) throws IOException {
                 try {
-                       return Instance.getCache().open(new URL(url), this, true,
-                                       withoutQuery(url));
+                       return Instance.getCache().open(new URL(url), withoutQuery(url), this, true);
                 } catch (Exception e) {
                         // second chance
                         try {
@@ -424,16 +271,14 @@ class MangaFox extends BasicSupport_Deprecated {
                         } catch (InterruptedException ee) {
                         }
  
-                       return Instance.getCache().open(new URL(url), this, true,
-                                       withoutQuery(url));
+                       return Instance.getCache().open(new URL(url), withoutQuery(url), this, true);
                 }
         }
  
         /**
          * Return the same input {@link URL} but without the query part.
          * 
-        * @param url
-        *            the inpiut {@link URL} as a {@link String}
+        * @param url the inpiut {@link URL} as a {@link String}
          * 
          * @return the input {@link URL} without query
          */
@@ -450,4 +295,10 @@ class MangaFox extends BasicSupport_Deprecated {
                         return null;
                 }
         }
+
+       @Override
+       protected boolean supports(URL url) {
+               return "mangafox.me".equals(url.getHost()) || "www.mangafox.me".equals(url.getHost())
+                               || "fanfox.net".equals(url.getHost()) || "www.fanfox.net".equals(url.getHost());
+       }
  }