fix cache, MangaLEL +search
[nikiroo-utils.git] / src / be / nikiroo / fanfix / supported / MangaFox.java
index fb72bf531d00d9e6b20d88f127b9a19e98dc64e3..dae2d314f900d79a70c5d6237667d007a9a616dd 100644 (file)
@@ -4,13 +4,22 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.net.MalformedURLException;
 import java.net.URL;
+import java.util.AbstractMap;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map.Entry;
-import java.util.Scanner;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import org.jsoup.helper.DataUtil;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
 
 import be.nikiroo.fanfix.Instance;
+import be.nikiroo.fanfix.data.MetaData;
+import be.nikiroo.utils.Image;
+import be.nikiroo.utils.Progress;
 import be.nikiroo.utils.StringUtils;
 
 class MangaFox extends BasicSupport {
@@ -20,162 +29,104 @@ class MangaFox extends BasicSupport {
        }
 
        @Override
-       public String getSourceName() {
-               return "MangaFox.met";
-       }
-
-       @Override
-       protected String getSubject(URL source, InputStream in) {
-               return "manga";
-       }
-
-       @Override
-       public boolean isImageDocument(URL source, InputStream in)
-                       throws IOException {
-               return true;
-       }
-
-       @Override
-       protected List<String> getTags(URL source, InputStream in) {
-               List<String> tags = new ArrayList<String>();
-
-               String line = getLine(in, "/genres/", 0);
-               if (line != null) {
-                       line = StringUtils.unhtml(line);
-                       String[] tab = line.split(",");
-                       if (tab != null) {
-                               for (String tag : tab) {
-                                       tags.add(tag.trim());
-                               }
-                       }
+       protected MetaData getMeta() throws IOException {
+               MetaData meta = new MetaData();
+               Element doc = getSourceNode();
+
+               Element title = doc.getElementById("title");
+               Elements table = null;
+               if (title != null) {
+                       table = title.getElementsByTag("table");
                }
-
-               return tags;
-       }
-
-       @Override
-       protected String getTitle(URL source, InputStream in) {
-               String line = getLine(in, " property=\"og:title\"", 0);
-               if (line != null) {
-                       int pos = -1;
-                       for (int i = 0; i < 3; i++) {
-                               pos = line.indexOf('"', pos + 1);
-                       }
-
-                       if (pos >= 0) {
-                               line = line.substring(pos + 1);
-                               pos = line.indexOf('"');
-                               if (pos >= 0) {
-                                       return line.substring(0, pos);
+               if (table != null) {
+                       // Rows: header, data
+                       Elements rows = table.first().getElementsByTag("tr");
+                       if (rows.size() > 1) {
+                               table = rows.get(1).getElementsByTag("td");
+                               // Columns: Realeased, Authors, Artists, Genres
+                               if (table.size() < 4) {
+                                       table = null;
                                }
                        }
                }
 
-               return null;
-       }
+               meta.setTitle(getTitle());
+               if (table != null) {
+                       meta.setAuthor(getAuthors(table.get(1).text() + ","
+                                       + table.get(2).text()));
 
-       @Override
-       protected String getAuthor(URL source, InputStream in) {
-               List<String> authors = new ArrayList<String>();
-
-               String line = getLine(in, "/author/", 0, false);
-               if (line != null) {
-                       for (String ln : StringUtils.unhtml(line).split(",")) {
-                               if (ln != null && !ln.trim().isEmpty()
-                                               && !authors.contains(ln.trim())) {
-                                       authors.add(ln.trim());
-                               }
-                       }
+                       meta.setDate(StringUtils.unhtml(table.get(0).text()).trim());
+                       meta.setTags(explode(table.get(3).text()));
                }
+               meta.setSource(getType().getSourceName());
+               meta.setUrl(getSource().toString());
+               meta.setPublisher(getType().getSourceName());
+               meta.setUuid(getSource().toString());
+               meta.setLuid("");
+               meta.setLang("en");
+               meta.setSubject("manga");
+               meta.setType(getType().toString());
+               meta.setImageDocument(true);
+               meta.setCover(getCover());
+
+               return meta;
+       }
 
-               try {
-                       in.reset();
-               } catch (IOException e) {
-                       Instance.syserr(e);
-               }
+       private String getTitle() {
+               Element doc = getSourceNode();
 
-               line = getLine(in, "/artist/", 0, false);
-               if (line != null) {
-                       for (String ln : StringUtils.unhtml(line).split(",")) {
-                               if (ln != null && !ln.trim().isEmpty()
-                                               && !authors.contains(ln.trim())) {
-                                       authors.add(ln.trim());
-                               }
-                       }
+               Element title = doc.getElementById("title");
+               Element h1 = title.getElementsByTag("h1").first();
+               if (h1 != null) {
+                       return StringUtils.unhtml(h1.text()).trim();
                }
 
-               if (authors.isEmpty()) {
-                       return null;
-               } else {
-                       StringBuilder builder = new StringBuilder();
-                       for (String author : authors) {
-                               if (builder.length() > 0) {
-                                       builder.append(", ");
-                               }
-
-                               builder.append(author);
-                       }
-
-                       return builder.toString();
-               }
+               return null;
        }
 
-       @Override
-       protected String getDate(URL source, InputStream in) {
-               String line = getLine(in, "/released/", 0);
-               if (line != null) {
-                       line = StringUtils.unhtml(line);
-                       return line.trim();
+       private String getAuthors(String authorList) {
+               String author = "";
+               for (String auth : explode(authorList)) {
+                       if (!author.isEmpty()) {
+                               author = author + ", ";
+                       }
+                       author += auth;
                }
 
-               return null;
+               return author;
        }
 
        @Override
-       protected String getDesc(URL source, InputStream in) {
-               String line = getLine(in, " property=\"og:description\"", 0);
-               if (line != null) {
-                       int pos = -1;
-                       for (int i = 0; i < 3; i++) {
-                               pos = line.indexOf('"', pos + 1);
-                       }
-
-                       if (pos >= 0) {
-                               line = line.substring(pos + 1);
-                               pos = line.indexOf('"');
-                               if (pos >= 0) {
-                                       return line.substring(0, pos);
-                               }
-                       }
+       protected String getDesc() {
+               Element doc = getSourceNode();
+               Element title = doc.getElementsByClass("summary").first();
+               if (title != null) {
+                       return StringUtils.unhtml(title.text()).trim();
                }
 
                return null;
        }
 
-       @Override
-       protected URL getCover(URL url, InputStream in) {
-               String line = getLine(in, " property=\"og:image\"", 0);
-               String cover = null;
-               if (line != null) {
-                       int pos = -1;
-                       for (int i = 0; i < 3; i++) {
-                               pos = line.indexOf('"', pos + 1);
-                       }
-
-                       if (pos >= 0) {
-                               line = line.substring(pos + 1);
-                               pos = line.indexOf('"');
-                               if (pos >= 0) {
-                                       cover = line.substring(0, pos);
-                               }
-                       }
+       private Image getCover() {
+               Element doc = getSourceNode();
+               Element cover = doc.getElementsByClass("cover").first();
+               if (cover != null) {
+                       cover = cover.getElementsByTag("img").first();
                }
 
                if (cover != null) {
+                       String coverUrl = cover.absUrl("src");
+
+                       InputStream coverIn;
                        try {
-                               return new URL(cover);
-                       } catch (MalformedURLException e) {
-                               Instance.syserr(e);
+                               coverIn = openEx(coverUrl);
+                               try {
+                                       return new Image(coverIn);
+                               } finally {
+                                       coverIn.close();
+                               }
+                       } catch (IOException e) {
+                               Instance.getTraceHandler().error(e);
                        }
                }
 
@@ -183,162 +134,176 @@ class MangaFox extends BasicSupport {
        }
 
        @Override
-       protected List<Entry<String, URL>> getChapters(URL source, InputStream in) {
+       protected List<Entry<String, URL>> getChapters(Progress pg) {
                List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
 
-               String volumeAt = "<h3 class=\"volume\">";
-               String linkAt = "href=\"http://mangafox.me/";
-               String endAt = "<script type=\"text/javascript\">";
+               String prefix = null; // each chapter starts with this prefix, then a
+                                                               // chapter number (including "x.5"), then name
 
-               boolean started = false;
+               Element doc = getSourceNode();
+               for (Element li : doc.getElementsByTag("li")) {
+                       Element el = li.getElementsByTag("h4").first();
+                       if (el == null) {
+                               el = li.getElementsByTag("h3").first();
+                       }
+                       if (el != null) {
+                               Element a = el.getElementsByTag("a").first();
+                               if (a != null) {
+                                       String title = StringUtils.unhtml(el.text()).trim();
+                                       try {
+                                               String url = a.absUrl("href");
+                                               if (url.endsWith("1.html")) {
+                                                       url = url.substring(0,
+                                                                       url.length() - "1.html".length());
+                                               }
+                                               if (!url.endsWith("/")) {
+                                                       url += "/";
+                                               }
 
-               @SuppressWarnings("resource")
-               Scanner scan = new Scanner(in, "UTF-8");
-               scan.useDelimiter("\\n");
-               while (scan.hasNext()) {
-                       String line = scan.next();
+                                               if (prefix == null || !prefix.isEmpty()) {
+                                                       StringBuilder possiblePrefix = new StringBuilder(
+                                                                       StringUtils.unhtml(a.text()).trim());
+                                                       while (possiblePrefix.length() > 0) {
+                                                               char car = possiblePrefix.charAt(possiblePrefix
+                                                                               .length() - 1);
+                                                               boolean punctuation = (car == '.' || car == ' ');
+                                                               boolean digit = (car >= '0' && car <= '9');
+                                                               if (!punctuation && !digit) {
+                                                                       break;
+                                                               }
+
+                                                               possiblePrefix.setLength(possiblePrefix
+                                                                               .length() - 1);
+                                                       }
+
+                                                       if (prefix == null) {
+                                                               prefix = possiblePrefix.toString();
+                                                       }
+
+                                                       if (!prefix.equalsIgnoreCase(possiblePrefix
+                                                                       .toString())) {
+                                                               prefix = ""; // prefix not ok
+                                                       }
+                                               }
 
-                       if (started && line.contains(endAt)) {
-                               break;
-                       } else if (!started && line.contains(volumeAt)) {
-                               started = true;
+                                               urls.add(new AbstractMap.SimpleEntry<String, URL>(
+                                                               title, new URL(url)));
+                                       } catch (Exception e) {
+                                               Instance.getTraceHandler().error(e);
+                                       }
+                               }
                        }
+               }
 
-                       if (started && line.contains(linkAt)) {
-                               // Chapter content url
-                               String url = null;
-                               int pos = line.indexOf("href=\"");
-                               if (pos >= 0) {
-                                       line = line.substring(pos + "href=\"".length());
-                                       pos = line.indexOf('\"');
+               if (prefix != null && !prefix.isEmpty()) {
+                       try {
+                               // We found a prefix, so everything should be sortable
+                               SortedMap<Double, Entry<String, URL>> map = new TreeMap<Double, Entry<String, URL>>();
+                               for (Entry<String, URL> entry : urls) {
+                                       String num = entry.getKey().substring(prefix.length() + 1)
+                                                       .trim();
+                                       String name = "";
+                                       int pos = num.indexOf(' ');
                                        if (pos >= 0) {
-                                               url = line.substring(0, pos);
+                                               name = num.substring(pos).trim();
+                                               num = num.substring(0, pos).trim();
                                        }
-                               }
 
-                               // Chapter name
-                               String name = null;
-                               if (scan.hasNext()) {
-                                       name = StringUtils.unhtml(scan.next()).trim();
-                                       // Remove the "new" tag if present
-                                       if (name.endsWith("new")) {
-                                               name = name.substring(0, name.length() - 3).trim();
+                                       if (!name.isEmpty()) {
+                                               name = "Tome " + num + ": " + name;
+                                       } else {
+                                               name = "Tome " + num;
                                        }
-                               }
 
-                               // to help with the retry and the originalUrl
-                               refresh(url);
+                                       double key = Double.parseDouble(num);
 
-                               try {
-                                       final String key = name;
-                                       final URL value = new URL(url);
-                                       urls.add(new Entry<String, URL>() {
-                                               public URL setValue(URL value) {
-                                                       return null;
-                                               }
-
-                                               public String getKey() {
-                                                       return key;
-                                               }
-
-                                               public URL getValue() {
-                                                       return value;
-                                               }
-                                       });
-                               } catch (MalformedURLException e) {
-                                       Instance.syserr(e);
+                                       map.put(key, new AbstractMap.SimpleEntry<String, URL>(name,
+                                                       entry.getValue()));
                                }
+                               urls = new ArrayList<Entry<String, URL>>(map.values());
+                       } catch (NumberFormatException e) {
+                               Instance.getTraceHandler()
+                                               .error(new IOException(
+                                                               "Cannot find a tome number, revert to default sorting",
+                                                               e));
+                               // by default, the chapters are in reversed order
+                               Collections.reverse(urls);
                        }
+               } else {
+                       // by default, the chapters are in reversed order
+                       Collections.reverse(urls);
                }
 
-               // the chapters are in reversed order
-               Collections.reverse(urls);
-
                return urls;
        }
 
        @Override
-       protected String getChapterContent(URL source, InputStream in, int number) {
-               StringBuilder builder = new StringBuilder();
-               String base = getCurrentReferer().toString();
-               int pos = base.lastIndexOf('/');
-               base = base.substring(0, pos + 1); // including the '/' at the end
+       protected String getChapterContent(URL chapUrl, int number, Progress pg)
+                       throws IOException {
+               if (pg == null) {
+                       pg = new Progress();
+               }
 
-               boolean close = false;
-               while (in != null) {
-                       String linkNextLine = getLine(in, "return enlarge()", 0);
-                       try {
-                               in.reset();
-                       } catch (IOException e) {
-                               Instance.syserr(e);
-                       }
+               StringBuilder builder = new StringBuilder();
 
-                       String linkImageLine = getLine(in, "return enlarge()", 1);
-                       String linkNext = null;
-                       String linkImage = null;
-                       pos = linkNextLine.indexOf("href=\"");
-                       if (pos >= 0) {
-                               linkNextLine = linkNextLine.substring(pos + "href=\"".length());
-                               pos = linkNextLine.indexOf('\"');
-                               if (pos >= 0) {
-                                       linkNext = linkNextLine.substring(0, pos);
-                               }
-                       }
-                       pos = linkImageLine.indexOf("src=\"");
-                       if (pos >= 0) {
-                               linkImageLine = linkImageLine
-                                               .substring(pos + "src=\"".length());
-                               pos = linkImageLine.indexOf('\"');
-                               if (pos >= 0) {
-                                       linkImage = linkImageLine.substring(0, pos);
-                               }
-                       }
+               String url = chapUrl.toString();
+               InputStream imageIn = null;
+               Element imageDoc = null;
 
-                       if (linkImage != null) {
-                               builder.append("[");
-                               // to help with the retry and the originalUrl, part 1
-                               builder.append(withoutQuery(linkImage));
-                               builder.append("]\n");
+               // 1. find out how many images there are
+               int size;
+               try {
+                       // note: when used, the base URL can be an ad-page
+                       imageIn = openEx(url + "1.html");
+                       imageDoc = DataUtil.load(imageIn, "UTF-8", url + "1.html");
+               } catch (IOException e) {
+                       Instance.getTraceHandler().error(
+                                       new IOException("Cannot get image " + 1 + " of manga", e));
+               } finally {
+                       if (imageIn != null) {
+                               imageIn.close();
                        }
+               }
+               Element select = imageDoc.getElementsByClass("m").first();
+               Elements options = select.getElementsByTag("option");
+               size = options.size() - 1; // last is "Comments"
 
-                       // to help with the retry and the originalUrl, part 2
-                       refresh(linkImage);
+               pg.setMinMax(0, size);
 
-                       if (close) {
+               // 2. list them
+               for (int i = 1; i <= size; i++) {
+                       if (i > 1) { // because first one was opened for size
                                try {
-                                       in.close();
-                               } catch (IOException e) {
-                                       Instance.syserr(e);
-                               }
-                       }
+                                       imageIn = openEx(url + i + ".html");
+                                       imageDoc = DataUtil.load(imageIn, "UTF-8", url + i
+                                                       + ".html");
+
+                                       String linkImage = imageDoc.getElementById("image").absUrl(
+                                                       "src");
+                                       if (linkImage != null) {
+                                               builder.append("[");
+                                               // to help with the retry and the originalUrl, part 1
+                                               builder.append(withoutQuery(linkImage));
+                                               builder.append("]<br/>");
+                                       }
 
-                       in = null;
-                       if (linkNext != null && !"javascript:void(0);".equals(linkNext)) {
-                               URL url;
-                               try {
-                                       url = new URL(base + linkNext);
-                                       in = openEx(base + linkNext);
-                                       setCurrentReferer(url);
+                                       // to help with the retry and the originalUrl, part 2
+                                       refresh(linkImage);
                                } catch (IOException e) {
-                                       Instance.syserr(new IOException(
-                                                       "Cannot get the next manga page which is: "
-                                                                       + linkNext, e));
+                                       Instance.getTraceHandler().error(
+                                                       new IOException("Cannot get image " + i
+                                                                       + " of manga", e));
+                               } finally {
+                                       if (imageIn != null) {
+                                               imageIn.close();
+                                       }
                                }
                        }
-
-                       close = true;
                }
 
-               setCurrentReferer(source);
                return builder.toString();
        }
 
-       @Override
-       protected boolean supports(URL url) {
-               return "mangafox.me".equals(url.getHost())
-                               || "www.mangafox.me".equals(url.getHost());
-       }
-
        /**
         * Refresh the {@link URL} by calling {@link MangaFox#openEx(String)}.
         * 
@@ -371,8 +336,8 @@ class MangaFox extends BasicSupport {
         */
        private InputStream openEx(String url) throws IOException {
                try {
-                       return Instance.getCache().open(new URL(url), this, true,
-                                       withoutQuery(url));
+                       return Instance.getCache().open(new URL(url), withoutQuery(url),
+                                       this, true);
                } catch (Exception e) {
                        // second chance
                        try {
@@ -380,8 +345,8 @@ class MangaFox extends BasicSupport {
                        } catch (InterruptedException ee) {
                        }
 
-                       return Instance.getCache().open(new URL(url), this, true,
-                                       withoutQuery(url));
+                       return Instance.getCache().open(new URL(url), withoutQuery(url),
+                                       this, true);
                }
        }
 
@@ -406,4 +371,35 @@ class MangaFox extends BasicSupport {
                        return null;
                }
        }
+
+       /**
+        * Explode an HTML comma-separated list of values into a non-duplicate text
+        * {@link List} .
+        * 
+        * @param values
+        *            the comma-separated values in HTML format
+        * 
+        * @return the full list with no duplicate in text format
+        */
+       private List<String> explode(String values) {
+               List<String> list = new ArrayList<String>();
+               if (values != null && !values.isEmpty()) {
+                       for (String auth : values.split(",")) {
+                               String a = StringUtils.unhtml(auth).trim();
+                               if (!a.isEmpty() && !list.contains(a.trim())) {
+                                       list.add(a);
+                               }
+                       }
+               }
+
+               return list;
+       }
+
+       @Override
+       protected boolean supports(URL url) {
+               return "mangafox.me".equals(url.getHost())
+                               || "www.mangafox.me".equals(url.getHost())
+                               || "fanfox.net".equals(url.getHost())
+                               || "www.fanfox.net".equals(url.getHost());
+       }
 }