code cleanup / jdoc
[nikiroo-utils.git] / src / be / nikiroo / fanfix / supported / E621.java
index 5fe99a8fe0db698f02586af65b7e90643a8fa345..f3a7238e534e7bf74ca0c1471da3cba6aca28b5e 100644 (file)
@@ -1,24 +1,35 @@
 package be.nikiroo.fanfix.supported;
 
 import java.io.IOException;
-import java.io.InputStream;
+import java.io.UnsupportedEncodingException;
+import java.net.MalformedURLException;
 import java.net.URL;
+import java.net.URLDecoder;
+import java.util.AbstractMap;
 import java.util.ArrayList;
+import java.util.Date;
+import java.util.LinkedList;
 import java.util.List;
 import java.util.Map.Entry;
-import java.util.Scanner;
+
+import org.json.JSONArray;
+import org.json.JSONException;
+import org.json.JSONObject;
+import org.jsoup.helper.DataUtil;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
 
 import be.nikiroo.fanfix.Instance;
-import be.nikiroo.fanfix.data.Chapter;
+import be.nikiroo.fanfix.bundles.Config;
 import be.nikiroo.fanfix.data.MetaData;
-import be.nikiroo.fanfix.data.Story;
 import be.nikiroo.utils.Image;
 import be.nikiroo.utils.Progress;
 import be.nikiroo.utils.StringUtils;
+import be.nikiroo.utils.Version;
 
 /**
- * Support class for <a href="http://e621.net/">e621.net</a> and <a
- * href="http://e926.net/">e926.net</a>, a Furry website supporting comics,
+ * Support class for <a href="http://e621.net/">e621.net</a> and
+ * <a href="http://e926.net/">e926.net</a>, a Furry website supporting comics,
  * including some of MLP.
  * <p>
  * <a href="http://e926.net/">e926.net</a> only shows the "clean" images and
@@ -26,258 +37,398 @@ import be.nikiroo.utils.StringUtils;
  * 
  * @author niki
  */
-class E621 extends BasicSupport_Deprecated {
+class E621 extends BasicSupport {
        @Override
-       public String getSourceName() {
-               return "e621.net";
+       protected boolean supports(URL url) {
+               String host = url.getHost();
+               if (host.startsWith("www.")) {
+                       host = host.substring("www.".length());
+               }
+
+               return ("e621.net".equals(host) || "e926.net".equals(host))
+                               && (isPool(url) || isSearchOrSet(url));
+       }
+
+       @Override
+       protected boolean isHtml() {
+               return true;
        }
 
        @Override
-       protected MetaData getMeta(URL source, InputStream in) throws IOException {
+       protected MetaData getMeta() throws IOException {
                MetaData meta = new MetaData();
 
-               meta.setTitle(getTitle(reset(in)));
-               meta.setAuthor(getAuthor(source, reset(in)));
-               meta.setDate("");
-               meta.setTags(new ArrayList<String>()); // TODDO ???
-               meta.setSource(getSourceName());
-               meta.setUrl(source.toString());
-               meta.setPublisher(getSourceName());
-               meta.setUuid(source.toString());
+               meta.setTitle(getTitle());
+               meta.setAuthor(getAuthor());
+               meta.setDate(bsHelper.formatDate(getDate()));
+               meta.setTags(getTags());
+               meta.setUrl(getSource().toString());
+               meta.setUuid(getSource().toString());
                meta.setLuid("");
-               meta.setLang("EN");
+               meta.setLang("en");
                meta.setSubject("Furry");
-               meta.setType(getType().toString());
                meta.setImageDocument(true);
-               meta.setCover(getCover(source));
+               meta.setCover(getCover());
                meta.setFakeCover(true);
 
                return meta;
        }
 
        @Override
-       public Story process(URL url, Progress pg) throws IOException {
-               // There is no chapters on e621, just pagination...
-               Story story = super.process(url, pg);
+       protected String getDesc() throws IOException {
+               if (isSearchOrSet(getSource())) {
+                       StringBuilder builder = new StringBuilder();
+                       builder.append("<div>");
+                       builder.append("A collection of images from ")
+                                       .append(getSource().getHost()) //
+                                       .append("<br/>\n") //
+                                       .append("&nbsp;&nbsp;&nbsp;&nbsp;Time of creation: "
+                                                       + StringUtils.fromTime(new Date().getTime()))
+                                       .append("<br/>\n") //
+                                       .append("&nbsp;&nbsp;&nbsp;&nbsp;tTags: ");//
+                       for (String tag : getTags()) {
+                               builder.append(
+                                               "\n<br/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;")
+                                               .append(tag);
+                       }
+                       builder.append("\n</div>");
 
-               Chapter only = new Chapter(1, null);
-               for (Chapter chap : story) {
-                       only.getParagraphs().addAll(chap.getParagraphs());
+                       return builder.toString();
                }
 
-               story.getChapters().clear();
-               story.getChapters().add(only);
+               if (isPool(getSource())) {
+                       Element el = getSourceNode().getElementById("description");
+                       if (el != null) {
+                               return el.html();
+                       }
+               }
 
-               return story;
+               return null;
        }
 
        @Override
-       protected boolean supports(URL url) {
-               String host = url.getHost();
-               if (host.startsWith("www.")) {
-                       host = host.substring("www.".length());
+       protected List<Entry<String, URL>> getChapters(Progress pg)
+                       throws IOException {
+               int i = 1;
+               String jsonUrl = getJsonUrl();
+               if (jsonUrl != null) {
+                       for (i = 1; true; i++) {
+                               if (i > 1) {
+                                       try {
+                                               // The API does not accept more than 2 request per sec,
+                                               // and asks us to limit at one per sec when possible
+                                               Thread.sleep(1000);
+                                       } catch (InterruptedException e) {
+                                       }
+                               }
+
+                               try {
+                                       JSONObject json = getJson(jsonUrl + "&page=" + i, false);
+                                       if (!json.has("posts"))
+                                               break;
+                                       JSONArray posts = json.getJSONArray("posts");
+                                       if (posts.isEmpty())
+                                               break;
+                               } catch (Exception e) {
+                                       e.printStackTrace();
+                               }
+                       }
+
+                       // The last page was empty:
+                       i--;
                }
 
-               return ("e621.net".equals(host) || "e926.net".equals(host))
-                               && url.getPath().startsWith("/pool/");
+               // The pages and images are in reverse order on /posts/
+               List<Entry<String, URL>> chapters = new LinkedList<Entry<String, URL>>();
+               for (int page = i; page > 0; page--) {
+                       chapters.add(new AbstractMap.SimpleEntry<String, URL>(
+                                       "Page " + Integer.toString(i - page + 1),
+                                       new URL(jsonUrl + "&page=" + page)));
+               }
+
+               return chapters;
        }
 
        @Override
-       protected boolean isHtml() {
-               return true;
-       }
+       protected String getChapterContent(URL chapUrl, int number, Progress pg)
+                       throws IOException {
+               StringBuilder builder = new StringBuilder();
+
+               JSONObject json = getJson(chapUrl, false);
+               JSONArray postsArr = json.getJSONArray("posts");
 
-       private Image getCover(URL source) throws IOException {
-               InputStream in = Instance.getCache().open(source, this, true);
-               String images = getChapterContent(new URL(source.toString() + "?page="
-                               + 1), in, 1, null);
-               if (!images.isEmpty()) {
-                       int pos = images.indexOf("<br/>");
-                       if (pos >= 0) {
-                               images = images.substring(1, pos - 1);
-                               return getImage(this, null, images);
+               // The pages and images are in reverse order on /posts/
+               List<JSONObject> posts = new ArrayList<JSONObject>(postsArr.length());
+               for (int i = postsArr.length() - 1; i >= 0; i--) {
+                       Object o = postsArr.get(i);
+                       if (o instanceof JSONObject)
+                               posts.add((JSONObject) o);
+               }
+
+               for (JSONObject post : posts) {
+                       if (!post.has("file"))
+                               continue;
+                       JSONObject file = post.getJSONObject("file");
+                       if (!file.has("url"))
+                               continue;
+
+                       try {
+                               String url = file.getString("url");
+                               builder.append("[");
+                               builder.append(url);
+                               builder.append("]<br/>");
+                       } catch (JSONException e) {
+                               // Can be NULL if filtered
+                               // When the value is NULL, we get an exception
+                               // but the "has" method still returns true
+                               Instance.getInstance().getTraceHandler()
+                                               .error("Cannot get image for chapter " + number + " of "
+                                                               + getSource());
                        }
                }
 
-               return null;
+               return builder.toString();
        }
 
-       private String getAuthor(URL source, InputStream in) {
-               String author = getLine(in, "href=\"/post/show/", 0);
-               if (author != null) {
-                       String key = "href=\"";
-                       int pos = author.indexOf(key);
-                       if (pos >= 0) {
-                               author = author.substring(pos + key.length());
-                               pos = author.indexOf("\"");
-                               if (pos >= 0) {
-                                       author = author.substring(0, pos - 1);
-                                       String page = source.getProtocol() + "://"
-                                                       + source.getHost() + author;
-                                       try {
-                                               InputStream pageIn = Instance.getCache().open(
-                                                               new URL(page), this, false);
-                                               try {
-                                                       key = "class=\"tag-type-artist\"";
-                                                       author = getLine(pageIn, key, 0);
-                                                       if (author != null) {
-                                                               pos = author.indexOf("<a href=\"");
-                                                               if (pos >= 0) {
-                                                                       author = author.substring(pos);
-                                                                       pos = author.indexOf("</a>");
-                                                                       if (pos >= 0) {
-                                                                               author = author.substring(0, pos);
-                                                                               return StringUtils.unhtml(author);
-                                                                       }
-                                                               }
-                                                       }
-                                               } finally {
-                                                       pageIn.close();
-                                               }
-                                       } catch (Exception e) {
-                                               // No author found
+       @Override
+       protected URL getCanonicalUrl(URL source) {
+               // Convert search-pools into proper pools
+               if (source.getPath().equals("/posts") && source.getQuery() != null
+                               && source.getQuery().startsWith("tags=pool%3A")) {
+                       String poolNumber = source.getQuery()
+                                       .substring("tags=pool%3A".length());
+                       try {
+                               Integer.parseInt(poolNumber);
+                               String base = source.getProtocol() + "://" + source.getHost();
+                               if (source.getPort() != -1) {
+                                       base = base + ":" + source.getPort();
+                               }
+                               source = new URL(base + "/pools/" + poolNumber);
+                       } catch (NumberFormatException e) {
+                               // Not a simple pool, skip
+                       } catch (MalformedURLException e) {
+                               // Cannot happen
+                       }
+               }
+
+               if (isSetOriginalUrl(source)) {
+                       try {
+                               Document doc = DataUtil.load(Instance.getInstance().getCache()
+                                               .open(source, this, false), "UTF-8", source.toString());
+                               for (Element shortname : doc
+                                               .getElementsByClass("set-shortname")) {
+                                       for (Element el : shortname.getElementsByTag("a")) {
+                                               if (!el.attr("href").isEmpty())
+                                                       return new URL(el.absUrl("href"));
                                        }
                                }
+                       } catch (IOException e) {
+                               Instance.getInstance().getTraceHandler().error(e);
                        }
                }
 
-               return null;
+               if (isPool(source)) {
+                       try {
+                               return new URL(
+                                               source.toString().replace("/pool/show/", "/pools/"));
+                       } catch (MalformedURLException e) {
+                       }
+               }
+
+               return super.getCanonicalUrl(source);
        }
 
-       private String getTitle(InputStream in) {
-               String title = getLine(in, "<title>", 0);
-               if (title != null) {
-                       int pos = title.indexOf('>');
-                       if (pos >= 0) {
-                               title = title.substring(pos + 1);
-                               pos = title.indexOf('<');
-                               if (pos >= 0) {
-                                       title = title.substring(0, pos);
-                               }
-                       }
+       private String getTitle() {
+               String title = "";
 
-                       if (title.startsWith("Pool:")) {
-                               title = title.substring("Pool:".length());
+               Element el = getSourceNode().getElementsByTag("title").first();
+               if (el != null) {
+                       title = el.text().trim();
+               }
+
+               for (String s : new String[] { "e621", "-", "e621", "Pool", "-" }) {
+                       if (title.startsWith(s)) {
+                               title = title.substring(s.length()).trim();
+                       }
+                       if (title.endsWith(s)) {
+                               title = title.substring(0, title.length() - s.length()).trim();
                        }
+               }
 
-                       title = StringUtils.unhtml(title).trim();
+               if (isSearchOrSet(getSource())) {
+                       title = title.isEmpty() ? "e621" : "[e621] " + title;
                }
 
                return title;
        }
 
-       @Override
-       protected String getDesc(URL source, InputStream in) throws IOException {
-               String desc = getLine(in, "margin-bottom: 2em;", 0);
+       private String getAuthor() {
+               List<String> list = new ArrayList<String>();
+               String jsonUrl = getJsonUrl();
+               if (jsonUrl != null) {
+                       try {
+                               JSONObject json = getJson(jsonUrl, false);
+                               JSONArray posts = json.getJSONArray("posts");
+                               for (Object obj : posts) {
+                                       if (!(obj instanceof JSONObject))
+                                               continue;
+
+                                       JSONObject post = (JSONObject) obj;
+                                       if (!post.has("tags"))
+                                               continue;
+
+                                       JSONObject tags = post.getJSONObject("tags");
+                                       if (!tags.has("artist"))
+                                               continue;
+
+                                       JSONArray artists = tags.getJSONArray("artist");
+                                       for (Object artist : artists) {
+                                               if (list.contains(artist.toString()))
+                                                       continue;
+
+                                               list.add(artist.toString());
+                                       }
+                               }
+                       } catch (Exception e) {
+                               e.printStackTrace();
+                       }
+               }
 
-               if (desc != null) {
-                       StringBuilder builder = new StringBuilder();
+               StringBuilder builder = new StringBuilder();
+               for (String artist : list) {
+                       if (builder.length() > 0) {
+                               builder.append(", ");
+                       }
+                       builder.append(artist);
+               }
 
-                       boolean inTags = false;
-                       for (char car : desc.toCharArray()) {
-                               if ((inTags && car == '>') || (!inTags && car == '<')) {
-                                       inTags = !inTags;
-                               }
+               return builder.toString();
+       }
 
-                               if (inTags) {
-                                       builder.append(car);
+       private String getDate() {
+               String jsonUrl = getJsonUrl();
+               if (jsonUrl != null) {
+                       try {
+                               JSONObject json = getJson(jsonUrl, false);
+                               JSONArray posts = json.getJSONArray("posts");
+                               for (Object obj : posts) {
+                                       if (!(obj instanceof JSONObject))
+                                               continue;
+
+                                       JSONObject post = (JSONObject) obj;
+                                       if (!post.has("created_at"))
+                                               continue;
+
+                                       return post.getString("created_at");
                                }
+                       } catch (Exception e) {
+                               e.printStackTrace();
                        }
-
-                       return builder.toString().trim();
                }
 
-               return null;
+               return "";
        }
 
-       @Override
-       protected List<Entry<String, URL>> getChapters(URL source, InputStream in,
-                       Progress pg) throws IOException {
-               List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
-               int last = 1; // no pool/show when only one page
-
-               @SuppressWarnings("resource")
-               Scanner scan = new Scanner(in, "UTF-8");
-               scan.useDelimiter("\\n");
-               while (scan.hasNext()) {
-                       String line = scan.next();
-                       for (int pos = line.indexOf(source.getPath()); pos >= 0; pos = line
-                                       .indexOf(source.getPath(), pos + source.getPath().length())) {
-                               int equalPos = line.indexOf("=", pos);
-                               int quotePos = line.indexOf("\"", pos);
-                               if (equalPos >= 0 && quotePos > equalPos) {
-                                       String snum = line.substring(equalPos + 1, quotePos);
-                                       try {
-                                               int num = Integer.parseInt(snum);
-                                               if (num > last) {
-                                                       last = num;
-                                               }
-                                       } catch (NumberFormatException e) {
-                                       }
+       // no tags for pools
+       private List<String> getTags() {
+               List<String> tags = new ArrayList<String>();
+               if (isSearchOrSet(getSource())) {
+                       String str = getTagsFromUrl(getSource());
+                       for (String tag : str.split("\\+")) {
+                               try {
+                                       tags.add(URLDecoder.decode(tag.trim(), "UTF-8").trim());
+                               } catch (UnsupportedEncodingException e) {
                                }
                        }
                }
 
-               for (int i = 1; i <= last; i++) {
-                       final String key = Integer.toString(i);
-                       final URL value = new URL(source.toString() + "?page=" + i);
-                       urls.add(new Entry<String, URL>() {
-                               @Override
-                               public URL setValue(URL value) {
-                                       return null;
-                               }
+               return tags;
+       }
 
-                               @Override
-                               public URL getValue() {
-                                       return value;
-                               }
+       // returns "xxx+ddd+ggg" if "tags=xxx+ddd+ggg" was present in the query
+       private String getTagsFromUrl(URL url) {
+               String tags = url == null ? "" : url.getQuery();
+               int pos = tags.indexOf("tags=");
 
-                               @Override
-                               public String getKey() {
-                                       return key;
-                               }
-                       });
+               if (pos >= 0) {
+                       tags = tags.substring(pos).substring("tags=".length());
+               } else {
+                       return "";
+               }
+
+               pos = tags.indexOf('&');
+               if (pos > 0) {
+                       tags = tags.substring(0, pos);
+               }
+               pos = tags.indexOf('/');
+               if (pos > 0) {
+                       tags = tags.substring(0, pos);
                }
 
-               return urls;
+               return tags;
        }
 
-       @Override
-       protected String getChapterContent(URL source, InputStream in, int number,
-                       Progress pg) throws IOException {
-               StringBuilder builder = new StringBuilder();
-               String staticSite = "https://static1.e621.net";
-               if (source.getHost().contains("e926")) {
-                       staticSite = staticSite.replace("e621", "e926");
+       private Image getCover() throws IOException {
+               Image image = null;
+               List<Entry<String, URL>> chapters = getChapters(null);
+               if (!chapters.isEmpty()) {
+                       URL chap1Url = chapters.get(0).getValue();
+                       String imgsChap1 = getChapterContent(chap1Url, 1, null);
+                       if (!imgsChap1.isEmpty()) {
+                               imgsChap1 = imgsChap1.split("]")[0].substring(1).trim();
+                               image = bsImages.getImage(this, new URL(imgsChap1));
+                       }
                }
 
-               String key = staticSite + "/data/preview/";
-
-               @SuppressWarnings("resource")
-               Scanner scan = new Scanner(in, "UTF-8");
-               scan.useDelimiter("\\n");
-               while (scan.hasNext()) {
-                       String line = scan.next();
-                       if (line.contains("class=\"preview")) {
-                               for (int pos = line.indexOf(key); pos >= 0; pos = line.indexOf(
-                                               key, pos + key.length())) {
-                                       int endPos = line.indexOf("\"", pos);
-                                       if (endPos >= 0) {
-                                               String id = line.substring(pos + key.length(), endPos);
-                                               id = staticSite + "/data/" + id;
-
-                                               int dotPos = id.lastIndexOf(".");
-                                               if (dotPos >= 0) {
-                                                       id = id.substring(0, dotPos);
-                                                       builder.append("[");
-                                                       builder.append(id);
-                                                       builder.append("]<br/>");
-                                               }
-                                       }
-                               }
+               return image;
+       }
+
+       // always /posts.json/ url
+       private String getJsonUrl() {
+               String url = null;
+               if (isSearchOrSet(getSource())) {
+                       url = getSource().toString().replace("/posts", "/posts.json");
+               }
+
+               if (isPool(getSource())) {
+                       String poolNumber = getSource().getPath()
+                                       .substring("/pools/".length());
+                       url = "https://e621.net/posts.json" + "?tags=pool%3A" + poolNumber;
+               }
+
+               if (url != null) {
+                       // Note: one way to override the blacklist
+                       String login = Instance.getInstance().getConfig()
+                                       .getString(Config.LOGIN_E621_LOGIN);
+                       String apk = Instance.getInstance().getConfig()
+                                       .getString(Config.LOGIN_E621_APIKEY);
+
+                       if (login != null && !login.isEmpty() && apk != null
+                                       && !apk.isEmpty()) {
+                               url = String.format("%s&login=%s&api_key=%s&_client=%s", url,
+                                               login, apk, "fanfix-" + Version.getCurrentVersion());
                        }
                }
 
-               return builder.toString();
+               return url;
+       }
+
+       // note: will be removed at getCanonicalUrl()
+       private boolean isSetOriginalUrl(URL originalUrl) {
+               return originalUrl.getPath().startsWith("/post_sets/");
+       }
+
+       private boolean isPool(URL url) {
+               return url.getPath().startsWith("/pools/")
+                               || url.getPath().startsWith("/pool/show/");
+       }
+
+       // set will be renamed into search by canonical url
+       private boolean isSearchOrSet(URL url) {
+               return
+               // search:
+               (url.getPath().equals("/posts") && url.getQuery().contains("tags="))
+                               // or set:
+                               || isSetOriginalUrl(url);
        }
 }