space is %20, not %32 (hexa, not deci)
[nikiroo-utils.git] / src / be / nikiroo / fanfix / supported / E621.java
index 9315716715b5dbfd6fad8a54c4f1a57bc92fced2..aa3084f210cf99140d53d3878f326de46172d6f2 100644 (file)
@@ -1,11 +1,19 @@
 package be.nikiroo.fanfix.supported;
 
-import java.awt.image.BufferedImage;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.UnsupportedEncodingException;
+import java.net.MalformedURLException;
 import java.net.URL;
+import java.net.URLDecoder;
+import java.net.URLEncoder;
+import java.util.AbstractMap;
 import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.LinkedList;
 import java.util.List;
+import java.util.AbstractMap.SimpleEntry;
 import java.util.Map.Entry;
 import java.util.Scanner;
 
@@ -13,6 +21,7 @@ import be.nikiroo.fanfix.Instance;
 import be.nikiroo.fanfix.data.Chapter;
 import be.nikiroo.fanfix.data.MetaData;
 import be.nikiroo.fanfix.data.Story;
+import be.nikiroo.utils.Image;
 import be.nikiroo.utils.Progress;
 import be.nikiroo.utils.StringUtils;
 
@@ -26,7 +35,7 @@ import be.nikiroo.utils.StringUtils;
  * 
  * @author niki
  */
-class E621 extends BasicSupport {
+class E621 extends BasicSupport_Deprecated {
        @Override
        public String getSourceName() {
                return "e621.net";
@@ -39,22 +48,58 @@ class E621 extends BasicSupport {
                meta.setTitle(getTitle(reset(in)));
                meta.setAuthor(getAuthor(source, reset(in)));
                meta.setDate("");
-               meta.setTags(new ArrayList<String>()); // TODDO ???
+               meta.setTags(getTags(source, reset(in), false));
                meta.setSource(getSourceName());
                meta.setUrl(source.toString());
                meta.setPublisher(getSourceName());
                meta.setUuid(source.toString());
                meta.setLuid("");
-               meta.setLang("EN");
+               meta.setLang("en");
                meta.setSubject("Furry");
                meta.setType(getType().toString());
                meta.setImageDocument(true);
-               meta.setCover(getCover(source));
+               meta.setCover(getCover(source, reset(in)));
                meta.setFakeCover(true);
 
                return meta;
        }
 
+       private List<String> getTags(URL source, InputStream in, boolean authors) {
+               List<String> tags = new ArrayList<String>();
+
+               if (isSearch(source)) {
+                       String tagLine = getLine(in, "id=\"tag-sidebar\"", 1);
+                       if (tagLine != null) {
+                               String key = "href=\"";
+                               for (int pos = tagLine.indexOf(key); pos >= 0; pos = tagLine
+                                               .indexOf(key, pos + 1)) {
+                                       int end = tagLine.indexOf("\"", pos + key.length());
+                                       if (end >= 0) {
+                                               String href = tagLine.substring(pos, end);
+                                               String subkey;
+                                               if (authors)
+                                                       subkey = "?name=";
+                                               else
+                                                       subkey = "?title=";
+                                               if (href.contains(subkey)) {
+                                                       String tag = href.substring(href.indexOf(subkey)
+                                                                       + subkey.length());
+                                                       try {
+                                                               tags.add(URLDecoder.decode(tag, "UTF-8"));
+                                                       } catch (UnsupportedEncodingException e) {
+                                                               // supported JVMs must have UTF-8 support
+                                                               e.printStackTrace();
+                                                       }
+                                               }
+                                       }
+                               }
+
+                       }
+               }
+
+               return tags;
+       }
+
        @Override
        public Story process(URL url, Progress pg) throws IOException {
                // There is no chapters on e621, just pagination...
@@ -79,7 +124,7 @@ class E621 extends BasicSupport {
                }
 
                return ("e621.net".equals(host) || "e926.net".equals(host))
-                               && url.getPath().startsWith("/pool/");
+                               && (isPool(url) || isSearch(url));
        }
 
        @Override
@@ -87,12 +132,15 @@ class E621 extends BasicSupport {
                return true;
        }
 
-       private BufferedImage getCover(URL source) throws IOException {
-               InputStream in = Instance.getCache().open(source, this, true);
-               String images = getChapterContent(new URL(source.toString() + "?page="
-                               + 1), in, 1);
+       private Image getCover(URL source, InputStream in) throws IOException {
+               URL urlForCover = source;
+               if (isPool(source)) {
+                       urlForCover = new URL(source.toString() + "?page=1");
+               }
+
+               String images = getChapterContent(urlForCover, in, 1, null);
                if (!images.isEmpty()) {
-                       int pos = images.indexOf('\n');
+                       int pos = images.indexOf("<br/>");
                        if (pos >= 0) {
                                images = images.substring(1, pos - 1);
                                return getImage(this, null, images);
@@ -102,7 +150,18 @@ class E621 extends BasicSupport {
                return null;
        }
 
-       private String getAuthor(URL source, InputStream in) throws IOException {
+       private String getAuthor(URL source, InputStream in) {
+               if (isSearch(source)) {
+                       StringBuilder builder = new StringBuilder();
+                       for (String author : getTags(source, in, true)) {
+                               if (builder.length() > 0)
+                                       builder.append(", ");
+                               builder.append(author);
+                       }
+
+                       return builder.toString();
+               }
+
                String author = getLine(in, "href=\"/post/show/", 0);
                if (author != null) {
                        String key = "href=\"";
@@ -144,7 +203,7 @@ class E621 extends BasicSupport {
                return null;
        }
 
-       private String getTitle(InputStream in) throws IOException {
+       private String getTitle(InputStream in) {
                String title = getLine(in, "<title>", 0);
                if (title != null) {
                        int pos = title.indexOf('>');
@@ -191,8 +250,60 @@ class E621 extends BasicSupport {
        }
 
        @Override
-       protected List<Entry<String, URL>> getChapters(URL source, InputStream in)
-                       throws IOException {
+       protected List<Entry<String, URL>> getChapters(URL source, InputStream in,
+                       Progress pg) throws IOException {
+               if (isPool(source)) {
+                       return getChaptersPool(source, in, pg);
+               } else if (isSearch(source)) {
+                       return getChaptersSearch(source, in, pg);
+               }
+
+               return new LinkedList<Entry<String, URL>>();
+       }
+
+       private List<Entry<String, URL>> getChaptersSearch(URL source,
+                       InputStream in, Progress pg) throws IOException {
+               List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
+
+               String search = source.getPath();
+               if (search.endsWith("/")) {
+                       search = search.substring(0, search.length() - 1);
+               }
+
+               int pos = search.lastIndexOf('/');
+               if (pos >= 0) {
+                       search = search.substring(pos + 1);
+               }
+
+               String baseUrl = "https://e621.net/post/index/";
+               if (source.getHost().contains("e926")) {
+                       baseUrl = baseUrl.replace("e621", "e926");
+               }
+
+               for (int i = 1; true; i++) {
+                       URL url = new URL(baseUrl + i + "/" + search + "/");
+                       try {
+                               InputStream pageI = Instance.getCache().open(url, this, false);
+                               try {
+                                       if (getLine(pageI, "No posts matched your search.", 0) != null)
+                                               break;
+                                       urls.add(new AbstractMap.SimpleEntry<String, URL>("Page "
+                                                       + Integer.toString(i), url));
+                               } finally {
+                                       pageI.close();
+                               }
+                       } catch (Exception e) {
+                               break;
+                       }
+               }
+
+               // They are sorted in reverse order on the website
+               Collections.reverse(urls);
+               return urls;
+       }
+
+       private List<Entry<String, URL>> getChaptersPool(URL source,
+                       InputStream in, Progress pg) throws IOException {
                List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
                int last = 1; // no pool/show when only one page
 
@@ -219,29 +330,16 @@ class E621 extends BasicSupport {
                }
 
                for (int i = 1; i <= last; i++) {
-                       final String key = Integer.toString(i);
-                       final URL value = new URL(source.toString() + "?page=" + i);
-                       urls.add(new Entry<String, URL>() {
-                               public URL setValue(URL value) {
-                                       return null;
-                               }
-
-                               public URL getValue() {
-                                       return value;
-                               }
-
-                               public String getKey() {
-                                       return key;
-                               }
-                       });
+                       urls.add(new AbstractMap.SimpleEntry<String, URL>(Integer
+                                       .toString(i), new URL(source.toString() + "?page=" + i)));
                }
 
                return urls;
        }
 
        @Override
-       protected String getChapterContent(URL source, InputStream in, int number)
-                       throws IOException {
+       protected String getChapterContent(URL source, InputStream in, int number,
+                       Progress pg) throws IOException {
                StringBuilder builder = new StringBuilder();
                String staticSite = "https://static1.e621.net";
                if (source.getHost().contains("e926")) {
@@ -268,7 +366,7 @@ class E621 extends BasicSupport {
                                                        id = id.substring(0, dotPos);
                                                        builder.append("[");
                                                        builder.append(id);
-                                                       builder.append("]\n");
+                                                       builder.append("]<br/>");
                                                }
                                        }
                                }
@@ -277,4 +375,34 @@ class E621 extends BasicSupport {
 
                return builder.toString();
        }
+
+       @Override
+       protected URL getCanonicalUrl(URL source) {
+               if (isSearch(source)) {
+                       // /post?tags=tag1+tag2 -> ../post/index/1/tag1%32tag2
+                       String key = "post?tags=";
+                       if (source.toString().contains(key)) {
+                               int pos = source.toString().indexOf(key);
+                               String tags = source.toString().substring(pos + key.length());
+                               tags = tags.replace("+", "%20");
+                               try {
+                                       return new URL(source.toString().substring(0, pos)
+                                                       + "post/index/1/" + tags);
+                               } catch (MalformedURLException e) {
+                                       Instance.getTraceHandler().error(e);
+                               }
+                       }
+               }
+               return super.getCanonicalUrl(source);
+       }
+
+       private boolean isPool(URL url) {
+               return url.getPath().startsWith("/pool/");
+       }
+
+       private boolean isSearch(URL url) {
+               return url.getPath().startsWith("/post/index/")
+                               || (url.getPath().equals("/post") && url.getQuery().startsWith(
+                                               "tags="));
+       }
 }