X-Git-Url: http://git.nikiroo.be/?p=nikiroo-utils.git;a=blobdiff_plain;f=src%2Fbe%2Fnikiroo%2Ffanfix%2Fsupported%2FE621.java;h=f3a7238e534e7bf74ca0c1471da3cba6aca28b5e;hp=dfa9e5ed6a60e4694fc8494dcf196b4328259ee6;hb=cfdaf6052ddc5ca44cf19f1f6d9f154cc8443024;hpb=12b90437b5f22c2ae6e9b9b14c3b62b60f6143e5 diff --git a/src/be/nikiroo/fanfix/supported/E621.java b/src/be/nikiroo/fanfix/supported/E621.java index dfa9e5e..f3a7238 100644 --- a/src/be/nikiroo/fanfix/supported/E621.java +++ b/src/be/nikiroo/fanfix/supported/E621.java @@ -1,30 +1,35 @@ package be.nikiroo.fanfix.supported; import java.io.IOException; -import java.io.InputStream; import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.net.URL; import java.net.URLDecoder; import java.util.AbstractMap; import java.util.ArrayList; -import java.util.Collections; +import java.util.Date; import java.util.LinkedList; import java.util.List; import java.util.Map.Entry; -import java.util.Scanner; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import org.jsoup.helper.DataUtil; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; import be.nikiroo.fanfix.Instance; -import be.nikiroo.fanfix.data.Chapter; +import be.nikiroo.fanfix.bundles.Config; import be.nikiroo.fanfix.data.MetaData; -import be.nikiroo.fanfix.data.Story; import be.nikiroo.utils.Image; import be.nikiroo.utils.Progress; import be.nikiroo.utils.StringUtils; +import be.nikiroo.utils.Version; /** - * Support class for e621.net and e926.net, a Furry website supporting comics, + * Support class for e621.net and + * e926.net, a Furry website supporting comics, * including some of MLP. *

* e926.net only shows the "clean" images and @@ -32,378 +37,398 @@ import be.nikiroo.utils.StringUtils; * * @author niki */ -class E621 extends BasicSupport_Deprecated { +class E621 extends BasicSupport { @Override - protected MetaData getMeta(URL source, InputStream in) throws IOException { + protected boolean supports(URL url) { + String host = url.getHost(); + if (host.startsWith("www.")) { + host = host.substring("www.".length()); + } + + return ("e621.net".equals(host) || "e926.net".equals(host)) + && (isPool(url) || isSearchOrSet(url)); + } + + @Override + protected boolean isHtml() { + return true; + } + + @Override + protected MetaData getMeta() throws IOException { MetaData meta = new MetaData(); - meta.setTitle(getTitle(reset(in))); - meta.setAuthor(getAuthor(source, reset(in))); - meta.setDate(""); - meta.setTags(getTags(source, reset(in), false)); - meta.setSource(getType().getSourceName()); - meta.setUrl(source.toString()); - meta.setPublisher(getType().getSourceName()); - meta.setUuid(source.toString()); + meta.setTitle(getTitle()); + meta.setAuthor(getAuthor()); + meta.setDate(bsHelper.formatDate(getDate())); + meta.setTags(getTags()); + meta.setUrl(getSource().toString()); + meta.setUuid(getSource().toString()); meta.setLuid(""); meta.setLang("en"); meta.setSubject("Furry"); - meta.setType(getType().toString()); meta.setImageDocument(true); - meta.setCover(getCover(source, reset(in))); + meta.setCover(getCover()); meta.setFakeCover(true); return meta; } - private List getTags(URL source, InputStream in, boolean authors) { - List tags = new ArrayList(); + @Override + protected String getDesc() throws IOException { + if (isSearchOrSet(getSource())) { + StringBuilder builder = new StringBuilder(); + builder.append("

"); + builder.append("A collection of images from ") + .append(getSource().getHost()) // + .append("
\n") // + .append("    Time of creation: " + + StringUtils.fromTime(new Date().getTime())) + .append("
\n") // + .append("    tTags: ");// + for (String tag : getTags()) { + builder.append( + "\n
        ") + .append(tag); + } + builder.append("\n
"); - if (isSearch(source)) { - String tagLine = getLine(in, "id=\"tag-sidebar\"", 1); - if (tagLine != null) { - String key = "href=\""; - for (int pos = tagLine.indexOf(key); pos >= 0; pos = tagLine - .indexOf(key, pos + 1)) { - int end = tagLine.indexOf("\"", pos + key.length()); - if (end >= 0) { - String href = tagLine.substring(pos, end); - String subkey; - if (authors) - subkey = "?name="; - else - subkey = "?title="; - if (href.contains(subkey)) { - String tag = href.substring(href.indexOf(subkey) - + subkey.length()); - try { - tags.add(URLDecoder.decode(tag, "UTF-8")); - } catch (UnsupportedEncodingException e) { - // supported JVMs must have UTF-8 support - e.printStackTrace(); - } - } - } - } + return builder.toString(); + } + if (isPool(getSource())) { + Element el = getSourceNode().getElementById("description"); + if (el != null) { + return el.html(); } } - return tags; + return null; } @Override - public Story process(URL url, Progress pg) throws IOException { - // There is no chapters on e621, just pagination... - Story story = super.process(url, pg); - - Chapter only = new Chapter(1, null); - for (Chapter chap : story) { - only.getParagraphs().addAll(chap.getParagraphs()); - } + protected List> getChapters(Progress pg) + throws IOException { + int i = 1; + String jsonUrl = getJsonUrl(); + if (jsonUrl != null) { + for (i = 1; true; i++) { + if (i > 1) { + try { + // The API does not accept more than 2 request per sec, + // and asks us to limit at one per sec when possible + Thread.sleep(1000); + } catch (InterruptedException e) { + } + } - story.getChapters().clear(); - story.getChapters().add(only); + try { + JSONObject json = getJson(jsonUrl + "&page=" + i, false); + if (!json.has("posts")) + break; + JSONArray posts = json.getJSONArray("posts"); + if (posts.isEmpty()) + break; + } catch (Exception e) { + e.printStackTrace(); + } + } - return story; - } + // The last page was empty: + i--; + } - @Override - protected boolean supports(URL url) { - String host = url.getHost(); - if (host.startsWith("www.")) { - host = host.substring("www.".length()); + // The pages and images are in reverse order on /posts/ + List> chapters = new LinkedList>(); + for (int page = i; page > 0; page--) { + chapters.add(new AbstractMap.SimpleEntry( + "Page " + Integer.toString(i - page + 1), + new URL(jsonUrl + "&page=" + page))); } - return ("e621.net".equals(host) || "e926.net".equals(host)) - && (isPool(url) || isSearch(url)); + return chapters; } @Override - protected boolean isHtml() { - return true; - } + protected String getChapterContent(URL chapUrl, int number, Progress pg) + throws IOException { + StringBuilder builder = new StringBuilder(); - private Image getCover(URL source, InputStream in) throws IOException { - URL urlForCover = source; - if (isPool(source)) { - urlForCover = new URL(source.toString() + "?page=1"); + JSONObject json = getJson(chapUrl, false); + JSONArray postsArr = json.getJSONArray("posts"); + + // The pages and images are in reverse order on /posts/ + List posts = new ArrayList(postsArr.length()); + for (int i = postsArr.length() - 1; i >= 0; i--) { + Object o = postsArr.get(i); + if (o instanceof JSONObject) + posts.add((JSONObject) o); } - String images = getChapterContent(urlForCover, in, 1, null); - if (!images.isEmpty()) { - int pos = images.indexOf("
"); - if (pos >= 0) { - images = images.substring(1, pos - 1); - return getImage(this, null, images); + for (JSONObject post : posts) { + if (!post.has("file")) + continue; + JSONObject file = post.getJSONObject("file"); + if (!file.has("url")) + continue; + + try { + String url = file.getString("url"); + builder.append("["); + builder.append(url); + builder.append("]
"); + } catch (JSONException e) { + // Can be NULL if filtered + // When the value is NULL, we get an exception + // but the "has" method still returns true + Instance.getInstance().getTraceHandler() + .error("Cannot get image for chapter " + number + " of " + + getSource()); } } - return null; + return builder.toString(); } - private String getAuthor(URL source, InputStream in) { - if (isSearch(source)) { - StringBuilder builder = new StringBuilder(); - for (String author : getTags(source, in, true)) { - if (builder.length() > 0) - builder.append(", "); - builder.append(author); + @Override + protected URL getCanonicalUrl(URL source) { + // Convert search-pools into proper pools + if (source.getPath().equals("/posts") && source.getQuery() != null + && source.getQuery().startsWith("tags=pool%3A")) { + String poolNumber = source.getQuery() + .substring("tags=pool%3A".length()); + try { + Integer.parseInt(poolNumber); + String base = source.getProtocol() + "://" + source.getHost(); + if (source.getPort() != -1) { + base = base + ":" + source.getPort(); + } + source = new URL(base + "/pools/" + poolNumber); + } catch (NumberFormatException e) { + // Not a simple pool, skip + } catch (MalformedURLException e) { + // Cannot happen } - - return builder.toString(); } - String author = getLine(in, "href=\"/post/show/", 0); - if (author != null) { - String key = "href=\""; - int pos = author.indexOf(key); - if (pos >= 0) { - author = author.substring(pos + key.length()); - pos = author.indexOf("\""); - if (pos >= 0) { - author = author.substring(0, pos - 1); - String page = source.getProtocol() + "://" - + source.getHost() + author; - try { - InputStream pageIn = Instance.getCache().open( - new URL(page), this, false); - try { - key = "class=\"tag-type-artist\""; - author = getLine(pageIn, key, 0); - if (author != null) { - pos = author.indexOf("= 0) { - author = author.substring(pos); - pos = author.indexOf(""); - if (pos >= 0) { - author = author.substring(0, pos); - return StringUtils.unhtml(author); - } - } - } - } finally { - pageIn.close(); - } - } catch (Exception e) { - // No author found + if (isSetOriginalUrl(source)) { + try { + Document doc = DataUtil.load(Instance.getInstance().getCache() + .open(source, this, false), "UTF-8", source.toString()); + for (Element shortname : doc + .getElementsByClass("set-shortname")) { + for (Element el : shortname.getElementsByTag("a")) { + if (!el.attr("href").isEmpty()) + return new URL(el.absUrl("href")); } } + } catch (IOException e) { + Instance.getInstance().getTraceHandler().error(e); } } - return null; + if (isPool(source)) { + try { + return new URL( + source.toString().replace("/pool/show/", "/pools/")); + } catch (MalformedURLException e) { + } + } + + return super.getCanonicalUrl(source); } - private String getTitle(InputStream in) { - String title = getLine(in, "", 0); - if (title != null) { - int pos = title.indexOf('>'); - if (pos >= 0) { - title = title.substring(pos + 1); - pos = title.indexOf('<'); - if (pos >= 0) { - title = title.substring(0, pos); - } - } + private String getTitle() { + String title = ""; - if (title.startsWith("Pool:")) { - title = title.substring("Pool:".length()); + Element el = getSourceNode().getElementsByTag("title").first(); + if (el != null) { + title = el.text().trim(); + } + + for (String s : new String[] { "e621", "-", "e621", "Pool", "-" }) { + if (title.startsWith(s)) { + title = title.substring(s.length()).trim(); } + if (title.endsWith(s)) { + title = title.substring(0, title.length() - s.length()).trim(); + } + } - title = StringUtils.unhtml(title).trim(); + if (isSearchOrSet(getSource())) { + title = title.isEmpty() ? "e621" : "[e621] " + title; } return title; } - @Override - protected String getDesc(URL source, InputStream in) throws IOException { - String desc = getLine(in, "margin-bottom: 2em;", 0); - - if (desc != null) { - StringBuilder builder = new StringBuilder(); - - boolean inTags = false; - for (char car : desc.toCharArray()) { - if ((inTags && car == '>') || (!inTags && car == '<')) { - inTags = !inTags; - } - - if (inTags) { - builder.append(car); + private String getAuthor() { + List<String> list = new ArrayList<String>(); + String jsonUrl = getJsonUrl(); + if (jsonUrl != null) { + try { + JSONObject json = getJson(jsonUrl, false); + JSONArray posts = json.getJSONArray("posts"); + for (Object obj : posts) { + if (!(obj instanceof JSONObject)) + continue; + + JSONObject post = (JSONObject) obj; + if (!post.has("tags")) + continue; + + JSONObject tags = post.getJSONObject("tags"); + if (!tags.has("artist")) + continue; + + JSONArray artists = tags.getJSONArray("artist"); + for (Object artist : artists) { + if (list.contains(artist.toString())) + continue; + + list.add(artist.toString()); + } } + } catch (Exception e) { + e.printStackTrace(); } - - return builder.toString().trim(); } - return null; - } - - @Override - protected List<Entry<String, URL>> getChapters(URL source, InputStream in, - Progress pg) throws IOException { - if (isPool(source)) { - return getChaptersPool(source, in, pg); - } else if (isSearch(source)) { - return getChaptersSearch(source, in, pg); + StringBuilder builder = new StringBuilder(); + for (String artist : list) { + if (builder.length() > 0) { + builder.append(", "); + } + builder.append(artist); } - return new LinkedList<Entry<String, URL>>(); + return builder.toString(); } - private List<Entry<String, URL>> getChaptersSearch(URL source, - InputStream in, Progress pg) throws IOException { - List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>(); + private String getDate() { + String jsonUrl = getJsonUrl(); + if (jsonUrl != null) { + try { + JSONObject json = getJson(jsonUrl, false); + JSONArray posts = json.getJSONArray("posts"); + for (Object obj : posts) { + if (!(obj instanceof JSONObject)) + continue; - String search = source.getPath(); - if (search.endsWith("/")) { - search = search.substring(0, search.length() - 1); - } + JSONObject post = (JSONObject) obj; + if (!post.has("created_at")) + continue; - int pos = search.lastIndexOf('/'); - if (pos >= 0) { - search = search.substring(pos + 1); + return post.getString("created_at"); + } + } catch (Exception e) { + e.printStackTrace(); + } } - String baseUrl = "https://e621.net/post/index/"; - if (source.getHost().contains("e926")) { - baseUrl = baseUrl.replace("e621", "e926"); - } + return ""; + } - for (int i = 1; true; i++) { - URL url = new URL(baseUrl + i + "/" + search + "/"); - try { - InputStream pageI = Instance.getCache().open(url, this, false); + // no tags for pools + private List<String> getTags() { + List<String> tags = new ArrayList<String>(); + if (isSearchOrSet(getSource())) { + String str = getTagsFromUrl(getSource()); + for (String tag : str.split("\\+")) { try { - if (getLine(pageI, "No posts matched your search.", 0) != null) - break; - urls.add(new AbstractMap.SimpleEntry<String, URL>("Page " - + Integer.toString(i), url)); - } finally { - pageI.close(); + tags.add(URLDecoder.decode(tag.trim(), "UTF-8").trim()); + } catch (UnsupportedEncodingException e) { } - } catch (Exception e) { - break; } } - // They are sorted in reverse order on the website - Collections.reverse(urls); - return urls; + return tags; } - private List<Entry<String, URL>> getChaptersPool(URL source, - InputStream in, Progress pg) throws IOException { - List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>(); - int last = 1; // no pool/show when only one page - - @SuppressWarnings("resource") - Scanner scan = new Scanner(in, "UTF-8"); - scan.useDelimiter("\\n"); - while (scan.hasNext()) { - String line = scan.next(); - for (int pos = line.indexOf(source.getPath()); pos >= 0; pos = line - .indexOf(source.getPath(), pos + source.getPath().length())) { - int equalPos = line.indexOf("=", pos); - int quotePos = line.indexOf("\"", pos); - if (equalPos >= 0 && quotePos > equalPos) { - String snum = line.substring(equalPos + 1, quotePos); - try { - int num = Integer.parseInt(snum); - if (num > last) { - last = num; - } - } catch (NumberFormatException e) { - } - } - } + // returns "xxx+ddd+ggg" if "tags=xxx+ddd+ggg" was present in the query + private String getTagsFromUrl(URL url) { + String tags = url == null ? "" : url.getQuery(); + int pos = tags.indexOf("tags="); + + if (pos >= 0) { + tags = tags.substring(pos).substring("tags=".length()); + } else { + return ""; } - for (int i = 1; i <= last; i++) { - urls.add(new AbstractMap.SimpleEntry<String, URL>(Integer - .toString(i), new URL(source.toString() + "?page=" + i))); + pos = tags.indexOf('&'); + if (pos > 0) { + tags = tags.substring(0, pos); + } + pos = tags.indexOf('/'); + if (pos > 0) { + tags = tags.substring(0, pos); } - return urls; + return tags; } - @Override - protected String getChapterContent(URL source, InputStream in, int number, - Progress pg) throws IOException { - StringBuilder builder = new StringBuilder(); - String staticSite = "https://static1.e621.net"; - if (source.getHost().contains("e926")) { - staticSite = staticSite.replace("e621", "e926"); - } - - String key = staticSite + "/data/preview/"; - - @SuppressWarnings("resource") - Scanner scan = new Scanner(in, "UTF-8"); - scan.useDelimiter("\\n"); - while (scan.hasNext()) { - String line = scan.next(); - if (line.contains("class=\"preview")) { - for (int pos = line.indexOf(key); pos >= 0; pos = line.indexOf( - key, pos + key.length())) { - int endPos = line.indexOf("\"", pos); - if (endPos >= 0) { - String id = line.substring(pos + key.length(), endPos); - id = staticSite + "/data/" + id; - - int dotPos = id.lastIndexOf("."); - if (dotPos >= 0) { - id = id.substring(0, dotPos); - builder.append("["); - builder.append(id); - builder.append("]<br/>"); - } - } - } + private Image getCover() throws IOException { + Image image = null; + List<Entry<String, URL>> chapters = getChapters(null); + if (!chapters.isEmpty()) { + URL chap1Url = chapters.get(0).getValue(); + String imgsChap1 = getChapterContent(chap1Url, 1, null); + if (!imgsChap1.isEmpty()) { + imgsChap1 = imgsChap1.split("]")[0].substring(1).trim(); + image = bsImages.getImage(this, new URL(imgsChap1)); } } - return builder.toString(); + return image; } - @Override - protected URL getCanonicalUrl(URL source) { - if (isSearch(source)) { - // /post?tags=tag1+tag2 -> ../post/index/1/tag1%32tag2 - String key = "?tags="; - if (source.toString().contains(key)) { - int pos = source.toString().indexOf(key); - String tags = source.toString().substring(pos + key.length()); - tags = tags.replace("+", "%20"); - - String base = source.toString().substring(0, pos); - if (!base.endsWith("/")) { - base += "/"; - } - if (base.endsWith("/search/")) { - base = base.substring(0, base.indexOf("/search/") + 1); - } + // always /posts.json/ url + private String getJsonUrl() { + String url = null; + if (isSearchOrSet(getSource())) { + url = getSource().toString().replace("/posts", "/posts.json"); + } - try { - return new URL(base + "index/1/" + tags); - } catch (MalformedURLException e) { - Instance.getTraceHandler().error(e); - } + if (isPool(getSource())) { + String poolNumber = getSource().getPath() + .substring("/pools/".length()); + url = "https://e621.net/posts.json" + "?tags=pool%3A" + poolNumber; + } + + if (url != null) { + // Note: one way to override the blacklist + String login = Instance.getInstance().getConfig() + .getString(Config.LOGIN_E621_LOGIN); + String apk = Instance.getInstance().getConfig() + .getString(Config.LOGIN_E621_APIKEY); + + if (login != null && !login.isEmpty() && apk != null + && !apk.isEmpty()) { + url = String.format("%s&login=%s&api_key=%s&_client=%s", url, + login, apk, "fanfix-" + Version.getCurrentVersion()); } } - return super.getCanonicalUrl(source); + return url; + } + + // note: will be removed at getCanonicalUrl() + private boolean isSetOriginalUrl(URL originalUrl) { + return originalUrl.getPath().startsWith("/post_sets/"); } private boolean isPool(URL url) { - return url.getPath().startsWith("/pool/"); + return url.getPath().startsWith("/pools/") + || url.getPath().startsWith("/pool/show/"); } - private boolean isSearch(URL url) { - return url.getPath().startsWith("/post/index/") - || (url.getPath().equals("/post/search") && url.getQuery() - .startsWith("tags=")); + // set will be renamed into search by canonical url + private boolean isSearchOrSet(URL url) { + return + // search: + (url.getPath().equals("/posts") && url.getQuery().contains("tags=")) + // or set: + || isSetOriginalUrl(url); } }