update e621 (site changed + move to not deprecated BasicSupport
authorNiki Roo <niki@nikiroo.be>
Thu, 2 Apr 2020 13:11:32 +0000 (15:11 +0200)
committerNiki Roo <niki@nikiroo.be>
Thu, 2 Apr 2020 13:11:32 +0000 (15:11 +0200)
src/be/nikiroo/fanfix/supported/BasicSupportImages.java
src/be/nikiroo/fanfix/supported/E621.java

index 69a7c86720002b18c36bd2a4a79ee72353984429..4b7eeba36574f084ee6c044b1d1dd4ced798416e 100644 (file)
@@ -20,16 +20,32 @@ public class BasicSupportImages {
         * Check if the given resource can be a local image or a remote image, then
         * refresh the cache with it if it is.
         * 
+        * @param support
+        *            the support to use to download the resource (can be NULL)
         * @param dir
         *            the local directory to search, if any
         * @param line
         *            the resource to check
         * 
         * @return the image if found, or NULL
-        * 
         */
        public Image getImage(BasicSupport support, File dir, String line) {
                URL url = getImageUrl(support, dir, line);
+               return getImage(support,url);
+       }
+       
+       /**
+        * Check if the given resource can be a local image or a remote image, then
+        * refresh the cache with it if it is.
+        * 
+        * @param support
+        *            the support to use to download the resource (can be NULL)
+        * @param url
+        *            the actual URL to check (file or remote, can be NULL)
+        * 
+        * @return the image if found, or NULL
+        */
+       public Image getImage(BasicSupport support, URL url) {
                if (url != null) {
                        if ("file".equals(url.getProtocol())) {
                                if (new File(url.getPath()).isDirectory()) {
@@ -58,6 +74,8 @@ public class BasicSupportImages {
         * Check if the given resource can be a local image or a remote image, then
         * refresh the cache with it if it is.
         * 
+        * @param support
+        *            the support to use to download the resource (can be NULL)
         * @param dir
         *            the local directory to search, if any
         * @param line
index dfa9e5ed6a60e4694fc8494dcf196b4328259ee6..316ac2c2bcfbad54411467af2f34713e37772d8d 100644 (file)
@@ -3,28 +3,30 @@ package be.nikiroo.fanfix.supported;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.UnsupportedEncodingException;
-import java.net.MalformedURLException;
 import java.net.URL;
 import java.net.URLDecoder;
 import java.util.AbstractMap;
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.Date;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map.Entry;
-import java.util.Scanner;
+
+import org.jsoup.helper.DataUtil;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
 
 import be.nikiroo.fanfix.Instance;
-import be.nikiroo.fanfix.data.Chapter;
 import be.nikiroo.fanfix.data.MetaData;
-import be.nikiroo.fanfix.data.Story;
+import be.nikiroo.utils.IOUtils;
 import be.nikiroo.utils.Image;
 import be.nikiroo.utils.Progress;
 import be.nikiroo.utils.StringUtils;
 
 /**
- * Support class for <a href="http://e621.net/">e621.net</a> and <a
- * href="http://e926.net/">e926.net</a>, a Furry website supporting comics,
+ * Support class for <a href="http://e621.net/">e621.net</a> and
+ * <a href="http://e926.net/">e926.net</a>, a Furry website supporting comics,
  * including some of MLP.
  * <p>
  * <a href="http://e926.net/">e926.net</a> only shows the "clean" images and
@@ -32,82 +34,7 @@ import be.nikiroo.utils.StringUtils;
  * 
  * @author niki
  */
-class E621 extends BasicSupport_Deprecated {
-       @Override
-       protected MetaData getMeta(URL source, InputStream in) throws IOException {
-               MetaData meta = new MetaData();
-
-               meta.setTitle(getTitle(reset(in)));
-               meta.setAuthor(getAuthor(source, reset(in)));
-               meta.setDate("");
-               meta.setTags(getTags(source, reset(in), false));
-               meta.setSource(getType().getSourceName());
-               meta.setUrl(source.toString());
-               meta.setPublisher(getType().getSourceName());
-               meta.setUuid(source.toString());
-               meta.setLuid("");
-               meta.setLang("en");
-               meta.setSubject("Furry");
-               meta.setType(getType().toString());
-               meta.setImageDocument(true);
-               meta.setCover(getCover(source, reset(in)));
-               meta.setFakeCover(true);
-
-               return meta;
-       }
-
-       private List<String> getTags(URL source, InputStream in, boolean authors) {
-               List<String> tags = new ArrayList<String>();
-
-               if (isSearch(source)) {
-                       String tagLine = getLine(in, "id=\"tag-sidebar\"", 1);
-                       if (tagLine != null) {
-                               String key = "href=\"";
-                               for (int pos = tagLine.indexOf(key); pos >= 0; pos = tagLine
-                                               .indexOf(key, pos + 1)) {
-                                       int end = tagLine.indexOf("\"", pos + key.length());
-                                       if (end >= 0) {
-                                               String href = tagLine.substring(pos, end);
-                                               String subkey;
-                                               if (authors)
-                                                       subkey = "?name=";
-                                               else
-                                                       subkey = "?title=";
-                                               if (href.contains(subkey)) {
-                                                       String tag = href.substring(href.indexOf(subkey)
-                                                                       + subkey.length());
-                                                       try {
-                                                               tags.add(URLDecoder.decode(tag, "UTF-8"));
-                                                       } catch (UnsupportedEncodingException e) {
-                                                               // supported JVMs must have UTF-8 support
-                                                               e.printStackTrace();
-                                                       }
-                                               }
-                                       }
-                               }
-
-                       }
-               }
-
-               return tags;
-       }
-
-       @Override
-       public Story process(URL url, Progress pg) throws IOException {
-               // There is no chapters on e621, just pagination...
-               Story story = super.process(url, pg);
-
-               Chapter only = new Chapter(1, null);
-               for (Chapter chap : story) {
-                       only.getParagraphs().addAll(chap.getParagraphs());
-               }
-
-               story.getChapters().clear();
-               story.getChapters().add(only);
-
-               return story;
-       }
-
+class E621 extends BasicSupport {
        @Override
        protected boolean supports(URL url) {
                String host = url.getHost();
@@ -115,8 +42,7 @@ class E621 extends BasicSupport_Deprecated {
                        host = host.substring("www.".length());
                }
 
-               return ("e621.net".equals(host) || "e926.net".equals(host))
-                               && (isPool(url) || isSearch(url));
+               return ("e621.net".equals(host) || "e926.net".equals(host)) && (isPool(url) || isSearchOrSet(url));
        }
 
        @Override
@@ -124,163 +50,84 @@ class E621 extends BasicSupport_Deprecated {
                return true;
        }
 
-       private Image getCover(URL source, InputStream in) throws IOException {
-               URL urlForCover = source;
-               if (isPool(source)) {
-                       urlForCover = new URL(source.toString() + "?page=1");
-               }
+       @Override
+       protected MetaData getMeta() throws IOException {
+               MetaData meta = new MetaData();
 
-               String images = getChapterContent(urlForCover, in, 1, null);
-               if (!images.isEmpty()) {
-                       int pos = images.indexOf("<br/>");
-                       if (pos >= 0) {
-                               images = images.substring(1, pos - 1);
-                               return getImage(this, null, images);
-                       }
-               }
+               meta.setTitle(getTitle());
+               meta.setAuthor(getAuthor());
+               meta.setDate("");
+               meta.setTags(getTags());
+               meta.setSource(getType().getSourceName());
+               meta.setUrl(getSource().toString());
+               meta.setPublisher(getType().getSourceName());
+               meta.setUuid(getSource().toString());
+               meta.setLuid("");
+               meta.setLang("en");
+               meta.setSubject("Furry");
+               meta.setType(getType().toString());
+               meta.setImageDocument(true);
+               meta.setCover(getCover());
+               meta.setFakeCover(true);
 
-               return null;
+               return meta;
        }
 
-       private String getAuthor(URL source, InputStream in) {
-               if (isSearch(source)) {
+       @Override
+       protected String getDesc() throws IOException {
+               if (isSearchOrSet(getSource())) {
                        StringBuilder builder = new StringBuilder();
-                       for (String author : getTags(source, in, true)) {
-                               if (builder.length() > 0)
-                                       builder.append(", ");
-                               builder.append(author);
+                       builder.append("A collection of images from ").append(getSource().getHost()).append("\n") //
+                                       .append("\tTime of creation: " + StringUtils.fromTime(new Date().getTime())).append("\n") //
+                                       .append("\tTags: ");//
+                       for (String tag : getTags()) {
+                               builder.append("\t\t").append(tag);
                        }
 
                        return builder.toString();
                }
 
-               String author = getLine(in, "href=\"/post/show/", 0);
-               if (author != null) {
-                       String key = "href=\"";
-                       int pos = author.indexOf(key);
-                       if (pos >= 0) {
-                               author = author.substring(pos + key.length());
-                               pos = author.indexOf("\"");
-                               if (pos >= 0) {
-                                       author = author.substring(0, pos - 1);
-                                       String page = source.getProtocol() + "://"
-                                                       + source.getHost() + author;
-                                       try {
-                                               InputStream pageIn = Instance.getCache().open(
-                                                               new URL(page), this, false);
-                                               try {
-                                                       key = "class=\"tag-type-artist\"";
-                                                       author = getLine(pageIn, key, 0);
-                                                       if (author != null) {
-                                                               pos = author.indexOf("<a href=\"");
-                                                               if (pos >= 0) {
-                                                                       author = author.substring(pos);
-                                                                       pos = author.indexOf("</a>");
-                                                                       if (pos >= 0) {
-                                                                               author = author.substring(0, pos);
-                                                                               return StringUtils.unhtml(author);
-                                                                       }
-                                                               }
-                                                       }
-                                               } finally {
-                                                       pageIn.close();
-                                               }
-                                       } catch (Exception e) {
-                                               // No author found
-                                       }
-                               }
+               if (isPool(getSource())) {
+                       Element el = getSourceNode().getElementById("description");
+                       if (el != null) {
+                               return el.text();
                        }
                }
 
                return null;
        }
 
-       private String getTitle(InputStream in) {
-               String title = getLine(in, "<title>", 0);
-               if (title != null) {
-                       int pos = title.indexOf('>');
-                       if (pos >= 0) {
-                               title = title.substring(pos + 1);
-                               pos = title.indexOf('<');
-                               if (pos >= 0) {
-                                       title = title.substring(0, pos);
-                               }
-                       }
-
-                       if (title.startsWith("Pool:")) {
-                               title = title.substring("Pool:".length());
-                       }
-
-                       title = StringUtils.unhtml(title).trim();
-               }
-
-               return title;
-       }
-
        @Override
-       protected String getDesc(URL source, InputStream in) throws IOException {
-               String desc = getLine(in, "margin-bottom: 2em;", 0);
-
-               if (desc != null) {
-                       StringBuilder builder = new StringBuilder();
-
-                       boolean inTags = false;
-                       for (char car : desc.toCharArray()) {
-                               if ((inTags && car == '>') || (!inTags && car == '<')) {
-                                       inTags = !inTags;
-                               }
-
-                               if (inTags) {
-                                       builder.append(car);
-                               }
-                       }
-
-                       return builder.toString().trim();
-               }
-
-               return null;
-       }
-
-       @Override
-       protected List<Entry<String, URL>> getChapters(URL source, InputStream in,
-                       Progress pg) throws IOException {
-               if (isPool(source)) {
-                       return getChaptersPool(source, in, pg);
-               } else if (isSearch(source)) {
-                       return getChaptersSearch(source, in, pg);
+       protected List<Entry<String, URL>> getChapters(Progress pg) throws IOException {
+               if (isPool(getSource())) {
+                       String baseUrl = "https://e621.net/" + getSource().getPath() + "?page=";
+                       return getChapters(getSource(), pg, baseUrl, "");
+               } else if (isSearchOrSet(getSource())) {
+                       String baseUrl = "https://e621.net/posts/?page=";
+                       String search = "&tags=" + getTagsFromUrl(getSource());
+                       return getChapters(getSource(), pg, baseUrl, search);
                }
 
                return new LinkedList<Entry<String, URL>>();
        }
 
-       private List<Entry<String, URL>> getChaptersSearch(URL source,
-                       InputStream in, Progress pg) throws IOException {
+       private List<Entry<String, URL>> getChapters(URL source, Progress pg, String baseUrl, String parameters)
+                       throws IOException {
                List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
 
-               String search = source.getPath();
-               if (search.endsWith("/")) {
-                       search = search.substring(0, search.length() - 1);
-               }
-
-               int pos = search.lastIndexOf('/');
-               if (pos >= 0) {
-                       search = search.substring(pos + 1);
-               }
-
-               String baseUrl = "https://e621.net/post/index/";
                if (source.getHost().contains("e926")) {
                        baseUrl = baseUrl.replace("e621", "e926");
                }
 
                for (int i = 1; true; i++) {
-                       URL url = new URL(baseUrl + i + "/" + search + "/");
+                       URL url = new URL(baseUrl + i + parameters);
                        try {
                                InputStream pageI = Instance.getCache().open(url, this, false);
                                try {
-                                       if (getLine(pageI, "No posts matched your search.", 0) != null)
+                                       if (IOUtils.readSmallStream(pageI).contains("Nobody here but us chickens!")) {
                                                break;
-                                       urls.add(new AbstractMap.SimpleEntry<String, URL>("Page "
-                                                       + Integer.toString(i), url));
+                                       }
+                                       urls.add(new AbstractMap.SimpleEntry<String, URL>("Page " + Integer.toString(i), url));
                                } finally {
                                        pageI.close();
                                }
@@ -294,116 +141,161 @@ class E621 extends BasicSupport_Deprecated {
                return urls;
        }
 
-       private List<Entry<String, URL>> getChaptersPool(URL source,
-                       InputStream in, Progress pg) throws IOException {
-               List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
-               int last = 1; // no pool/show when only one page
-
-               @SuppressWarnings("resource")
-               Scanner scan = new Scanner(in, "UTF-8");
-               scan.useDelimiter("\\n");
-               while (scan.hasNext()) {
-                       String line = scan.next();
-                       for (int pos = line.indexOf(source.getPath()); pos >= 0; pos = line
-                                       .indexOf(source.getPath(), pos + source.getPath().length())) {
-                               int equalPos = line.indexOf("=", pos);
-                               int quotePos = line.indexOf("\"", pos);
-                               if (equalPos >= 0 && quotePos > equalPos) {
-                                       String snum = line.substring(equalPos + 1, quotePos);
-                                       try {
-                                               int num = Integer.parseInt(snum);
-                                               if (num > last) {
-                                                       last = num;
-                                               }
-                                       } catch (NumberFormatException e) {
+       @Override
+       protected String getChapterContent(URL chapUrl, int number, Progress pg) throws IOException {
+               StringBuilder builder = new StringBuilder();
+               Document chapterNode = loadDocument(chapUrl);
+               for (Element el : chapterNode.getElementsByTag("article")) {
+                       builder.append("[");
+                       builder.append(el.attr("data-file-url"));
+                       builder.append("]<br/>");
+               }
+
+               return builder.toString();
+       }
+
+       @Override
+       protected URL getCanonicalUrl(URL source) {
+               if (isSetOriginalUrl(source)) {
+                       try {
+                               Document doc = DataUtil.load(Instance.getCache().open(source, this, false), "UTF-8", source.toString());
+                               for (Element shortname : doc.getElementsByClass("set-shortname")) {
+                                       for (Element el : shortname.getElementsByTag("a")) {
+                                               if (!el.attr("href").isEmpty())
+                                                       return new URL(el.absUrl("href"));
                                        }
                                }
+                       } catch (IOException e) {
+                               Instance.getTraceHandler().error(e);
                        }
                }
 
-               for (int i = 1; i <= last; i++) {
-                       urls.add(new AbstractMap.SimpleEntry<String, URL>(Integer
-                                       .toString(i), new URL(source.toString() + "?page=" + i)));
+               return super.getCanonicalUrl(source);
+       }
+
+       // returns "xxx+ddd+ggg" if "tags=xxx+ddd+ggg" was present in the query
+       private String getTagsFromUrl(URL url) {
+               String tags = url == null ? "" : url.getQuery();
+               int pos = tags.indexOf("tags=");
+
+               if (pos >= 0) {
+                       tags = tags.substring(pos).substring("tags=".length());
+               } else {
+                       return "";
                }
 
-               return urls;
+               pos = tags.indexOf('&');
+               if (pos > 0) {
+                       tags = tags.substring(0, pos);
+               }
+               pos = tags.indexOf('/');
+               if (pos > 0) {
+                       tags = tags.substring(0, pos);
+               }
+
+               return tags;
        }
 
-       @Override
-       protected String getChapterContent(URL source, InputStream in, int number,
-                       Progress pg) throws IOException {
-               StringBuilder builder = new StringBuilder();
-               String staticSite = "https://static1.e621.net";
-               if (source.getHost().contains("e926")) {
-                       staticSite = staticSite.replace("e621", "e926");
+       private String getTitle() {
+               String title = "";
+
+               Element el = getSourceNode().getElementsByTag("title").first();
+               if (el != null) {
+                       title = el.text().trim();
                }
 
-               String key = staticSite + "/data/preview/";
-
-               @SuppressWarnings("resource")
-               Scanner scan = new Scanner(in, "UTF-8");
-               scan.useDelimiter("\\n");
-               while (scan.hasNext()) {
-                       String line = scan.next();
-                       if (line.contains("class=\"preview")) {
-                               for (int pos = line.indexOf(key); pos >= 0; pos = line.indexOf(
-                                               key, pos + key.length())) {
-                                       int endPos = line.indexOf("\"", pos);
-                                       if (endPos >= 0) {
-                                               String id = line.substring(pos + key.length(), endPos);
-                                               id = staticSite + "/data/" + id;
-
-                                               int dotPos = id.lastIndexOf(".");
-                                               if (dotPos >= 0) {
-                                                       id = id.substring(0, dotPos);
-                                                       builder.append("[");
-                                                       builder.append(id);
-                                                       builder.append("]<br/>");
-                                               }
-                                       }
-                               }
+               for (String s : new String[] { "e621", "-", "e621" }) {
+                       if (title.startsWith(s)) {
+                               title = title.substring(s.length()).trim();
                        }
+                       if (title.endsWith(s)) {
+                               title = title.substring(0, title.length() - s.length()).trim();
+                       }
+
                }
 
-               return builder.toString();
+               if (isSearchOrSet(getSource())) {
+                       title = title.isEmpty() ? "e621" : "[e621] " + title;
+               }
+               return title;
        }
 
-       @Override
-       protected URL getCanonicalUrl(URL source) {
-               if (isSearch(source)) {
-                       // /post?tags=tag1+tag2 -> ../post/index/1/tag1%32tag2
-                       String key = "?tags=";
-                       if (source.toString().contains(key)) {
-                               int pos = source.toString().indexOf(key);
-                               String tags = source.toString().substring(pos + key.length());
-                               tags = tags.replace("+", "%20");
-
-                               String base = source.toString().substring(0, pos);
-                               if (!base.endsWith("/")) {
-                                       base += "/";
+       private String getAuthor() throws IOException {
+               StringBuilder builder = new StringBuilder();
+
+               if (isSearchOrSet(getSource())) {
+                       for (Element el : getSourceNode().getElementsByClass("search-tag")) {
+                               if (el.attr("itemprop").equals("author")) {
+                                       if (builder.length() > 0) {
+                                               builder.append(", ");
+                                       }
+                                       builder.append(el.text().trim());
                                }
-                               if (base.endsWith("/search/")) {
-                                       base = base.substring(0, base.indexOf("/search/") + 1);
+                       }
+               }
+
+               if (isPool(getSource())) {
+                       String desc = getDesc();
+                       String descL = desc.toLowerCase();
+
+                       if (descL.startsWith("by:") || descL.startsWith("by ")) {
+                               desc = desc.substring(3).trim();
+                               desc = desc.split("\n")[0];
+
+                               String tab[] = desc.split(" ");
+                               for (int i = 0; i < Math.min(tab.length, 5); i++) {
+                                       if (tab[i].startsWith("http"))
+                                               break;
+                                       builder.append(" ").append(tab[i]);
                                }
+                       }
+               }
 
+               return builder.toString();
+       }
+
+       // no tags for pools
+       private List<String> getTags() {
+               List<String> tags = new ArrayList<String>();
+               if (isSearchOrSet(getSource())) {
+                       String str = getTagsFromUrl(getSource());
+                       for (String tag : str.split("\\+")) {
                                try {
-                                       return new URL(base + "index/1/" + tags);
-                               } catch (MalformedURLException e) {
-                                       Instance.getTraceHandler().error(e);
+                                       tags.add(URLDecoder.decode(tag.trim(), "UTF-8").trim());
+                               } catch (UnsupportedEncodingException e) {
                                }
                        }
                }
 
-               return super.getCanonicalUrl(source);
+               return tags;
+       }
+
+       private Image getCover() throws IOException {
+               Image image = null;
+               List<Entry<String, URL>> chapters = getChapters(null);
+               if (!chapters.isEmpty()) {
+                       URL url = chapters.get(0).getValue();
+                       image = bsImages.getImage(this, url);
+               }
+
+               return image;
+       }
+
+       // note: will be removed at getCanonicalUrl()
+       private boolean isSetOriginalUrl(URL originalUrl) {
+               return originalUrl.getPath().startsWith("/post_sets/");
        }
 
        private boolean isPool(URL url) {
-               return url.getPath().startsWith("/pool/");
+               return url.getPath().startsWith("/pools/");
        }
 
-       private boolean isSearch(URL url) {
-               return url.getPath().startsWith("/post/index/")
-                               || (url.getPath().equals("/post/search") && url.getQuery()
-                                               .startsWith("tags="));
+       // set will be renamed into search by canonical url
+       private boolean isSearchOrSet(URL url) {
+               return
+               // search:
+               (url.getPath().equals("/posts") && url.getQuery().contains("tags="))
+                               // or set:
+                               || isSetOriginalUrl(url);
        }
 }