X-Git-Url: http://git.nikiroo.be/?a=blobdiff_plain;f=src%2Fbe%2Fnikiroo%2Ffanfix%2Fsupported%2FE621.java;h=f3a7238e534e7bf74ca0c1471da3cba6aca28b5e;hb=5a6481dc6e87db089f93ef04bd03686916d42a88;hp=476e88b77765e741235909c53f3d82166302399c;hpb=595dfa7a6a1dc8041b3a5a4fe7ee2fae89029a69;p=fanfix.git
diff --git a/src/be/nikiroo/fanfix/supported/E621.java b/src/be/nikiroo/fanfix/supported/E621.java
index 476e88b..f3a7238 100644
--- a/src/be/nikiroo/fanfix/supported/E621.java
+++ b/src/be/nikiroo/fanfix/supported/E621.java
@@ -1,23 +1,35 @@
package be.nikiroo.fanfix.supported;
-import java.awt.image.BufferedImage;
import java.io.IOException;
-import java.io.InputStream;
+import java.io.UnsupportedEncodingException;
+import java.net.MalformedURLException;
import java.net.URL;
+import java.net.URLDecoder;
+import java.util.AbstractMap;
import java.util.ArrayList;
+import java.util.Date;
+import java.util.LinkedList;
import java.util.List;
import java.util.Map.Entry;
-import java.util.Scanner;
+
+import org.json.JSONArray;
+import org.json.JSONException;
+import org.json.JSONObject;
+import org.jsoup.helper.DataUtil;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
import be.nikiroo.fanfix.Instance;
-import be.nikiroo.fanfix.data.Chapter;
+import be.nikiroo.fanfix.bundles.Config;
import be.nikiroo.fanfix.data.MetaData;
-import be.nikiroo.fanfix.data.Story;
+import be.nikiroo.utils.Image;
+import be.nikiroo.utils.Progress;
import be.nikiroo.utils.StringUtils;
+import be.nikiroo.utils.Version;
/**
- * Support class for e621.net and e926.net, a Furry website supporting comics,
+ * Support class for e621.net and
+ * e926.net, a Furry website supporting comics,
* including some of MLP.
*
* e926.net only shows the "clean" images and
@@ -27,252 +39,396 @@ import be.nikiroo.utils.StringUtils;
*/
class E621 extends BasicSupport {
@Override
- public String getSourceName() {
- return "e621.net";
+ protected boolean supports(URL url) {
+ String host = url.getHost();
+ if (host.startsWith("www.")) {
+ host = host.substring("www.".length());
+ }
+
+ return ("e621.net".equals(host) || "e926.net".equals(host))
+ && (isPool(url) || isSearchOrSet(url));
+ }
+
+ @Override
+ protected boolean isHtml() {
+ return true;
}
@Override
- protected MetaData getMeta(URL source, InputStream in) throws IOException {
+ protected MetaData getMeta() throws IOException {
MetaData meta = new MetaData();
- meta.setTitle(getTitle(reset(in)));
- meta.setAuthor(getAuthor(source, reset(in)));
- meta.setDate("");
- meta.setTags(new ArrayList()); // TODDO ???
- meta.setSource(getSourceName());
- meta.setUrl(source.toString());
- meta.setPublisher(getSourceName());
- meta.setUuid(source.toString());
+ meta.setTitle(getTitle());
+ meta.setAuthor(getAuthor());
+ meta.setDate(bsHelper.formatDate(getDate()));
+ meta.setTags(getTags());
+ meta.setUrl(getSource().toString());
+ meta.setUuid(getSource().toString());
meta.setLuid("");
- meta.setLang("EN");
- meta.setSubject("");
- meta.setType(getType().toString());
+ meta.setLang("en");
+ meta.setSubject("Furry");
meta.setImageDocument(true);
- meta.setCover(getCover(source));
+ meta.setCover(getCover());
+ meta.setFakeCover(true);
return meta;
}
@Override
- public Story process(URL url) throws IOException {
- // There is no chapters on e621, just pagination...
- Story story = super.process(url);
+ protected String getDesc() throws IOException {
+ if (isSearchOrSet(getSource())) {
+ StringBuilder builder = new StringBuilder();
+ builder.append("");
+ builder.append("A collection of images from ")
+ .append(getSource().getHost()) //
+ .append("
\n") //
+ .append(" Time of creation: "
+ + StringUtils.fromTime(new Date().getTime()))
+ .append("
\n") //
+ .append(" tTags: ");//
+ for (String tag : getTags()) {
+ builder.append(
+ "\n
")
+ .append(tag);
+ }
+ builder.append("\n
");
- Chapter only = new Chapter(1, null);
- for (Chapter chap : story) {
- only.getParagraphs().addAll(chap.getParagraphs());
+ return builder.toString();
}
- story.getChapters().clear();
- story.getChapters().add(only);
+ if (isPool(getSource())) {
+ Element el = getSourceNode().getElementById("description");
+ if (el != null) {
+ return el.html();
+ }
+ }
- return story;
+ return null;
}
@Override
- protected boolean supports(URL url) {
- String host = url.getHost();
- if (host.startsWith("www.")) {
- host = host.substring("www.".length());
+ protected List> getChapters(Progress pg)
+ throws IOException {
+ int i = 1;
+ String jsonUrl = getJsonUrl();
+ if (jsonUrl != null) {
+ for (i = 1; true; i++) {
+ if (i > 1) {
+ try {
+ // The API does not accept more than 2 request per sec,
+ // and asks us to limit at one per sec when possible
+ Thread.sleep(1000);
+ } catch (InterruptedException e) {
+ }
+ }
+
+ try {
+ JSONObject json = getJson(jsonUrl + "&page=" + i, false);
+ if (!json.has("posts"))
+ break;
+ JSONArray posts = json.getJSONArray("posts");
+ if (posts.isEmpty())
+ break;
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ // The last page was empty:
+ i--;
}
- return ("e621.net".equals(host) || "e926.net".equals(host))
- && url.getPath().startsWith("/pool/");
+ // The pages and images are in reverse order on /posts/
+ List> chapters = new LinkedList>();
+ for (int page = i; page > 0; page--) {
+ chapters.add(new AbstractMap.SimpleEntry(
+ "Page " + Integer.toString(i - page + 1),
+ new URL(jsonUrl + "&page=" + page)));
+ }
+
+ return chapters;
}
@Override
- protected boolean isHtml() {
- return true;
- }
+ protected String getChapterContent(URL chapUrl, int number, Progress pg)
+ throws IOException {
+ StringBuilder builder = new StringBuilder();
+
+ JSONObject json = getJson(chapUrl, false);
+ JSONArray postsArr = json.getJSONArray("posts");
- private BufferedImage getCover(URL source) throws IOException {
- InputStream in = Instance.getCache().open(source, this, true);
- String images = getChapterContent(new URL(source.toString() + "?page="
- + 1), in, 1);
- if (!images.isEmpty()) {
- int pos = images.indexOf('\n');
- if (pos >= 0) {
- images = images.substring(1, pos - 1);
- return getImage(this, null, images);
+ // The pages and images are in reverse order on /posts/
+ List posts = new ArrayList(postsArr.length());
+ for (int i = postsArr.length() - 1; i >= 0; i--) {
+ Object o = postsArr.get(i);
+ if (o instanceof JSONObject)
+ posts.add((JSONObject) o);
+ }
+
+ for (JSONObject post : posts) {
+ if (!post.has("file"))
+ continue;
+ JSONObject file = post.getJSONObject("file");
+ if (!file.has("url"))
+ continue;
+
+ try {
+ String url = file.getString("url");
+ builder.append("[");
+ builder.append(url);
+ builder.append("]
");
+ } catch (JSONException e) {
+ // Can be NULL if filtered
+ // When the value is NULL, we get an exception
+ // but the "has" method still returns true
+ Instance.getInstance().getTraceHandler()
+ .error("Cannot get image for chapter " + number + " of "
+ + getSource());
}
}
- return null;
+ return builder.toString();
}
- private String getAuthor(URL source, InputStream in) throws IOException {
- String author = getLine(in, "href=\"/post/show/", 0);
- if (author != null) {
- String key = "href=\"";
- int pos = author.indexOf(key);
- if (pos >= 0) {
- author = author.substring(pos + key.length());
- pos = author.indexOf("\"");
- if (pos >= 0) {
- author = author.substring(0, pos - 1);
- String page = source.getProtocol() + "://"
- + source.getHost() + author;
- try {
- InputStream pageIn = Instance.getCache().open(
- new URL(page), this, false);
- try {
- key = "class=\"tag-type-artist\"";
- author = getLine(pageIn, key, 0);
- if (author != null) {
- pos = author.indexOf("= 0) {
- author = author.substring(pos);
- pos = author.indexOf("");
- if (pos >= 0) {
- author = author.substring(0, pos);
- return StringUtils.unhtml(author);
- }
- }
- }
- } finally {
- pageIn.close();
- }
- } catch (Exception e) {
- // No author found
+ @Override
+ protected URL getCanonicalUrl(URL source) {
+ // Convert search-pools into proper pools
+ if (source.getPath().equals("/posts") && source.getQuery() != null
+ && source.getQuery().startsWith("tags=pool%3A")) {
+ String poolNumber = source.getQuery()
+ .substring("tags=pool%3A".length());
+ try {
+ Integer.parseInt(poolNumber);
+ String base = source.getProtocol() + "://" + source.getHost();
+ if (source.getPort() != -1) {
+ base = base + ":" + source.getPort();
+ }
+ source = new URL(base + "/pools/" + poolNumber);
+ } catch (NumberFormatException e) {
+ // Not a simple pool, skip
+ } catch (MalformedURLException e) {
+ // Cannot happen
+ }
+ }
+
+ if (isSetOriginalUrl(source)) {
+ try {
+ Document doc = DataUtil.load(Instance.getInstance().getCache()
+ .open(source, this, false), "UTF-8", source.toString());
+ for (Element shortname : doc
+ .getElementsByClass("set-shortname")) {
+ for (Element el : shortname.getElementsByTag("a")) {
+ if (!el.attr("href").isEmpty())
+ return new URL(el.absUrl("href"));
}
}
+ } catch (IOException e) {
+ Instance.getInstance().getTraceHandler().error(e);
}
}
- return null;
+ if (isPool(source)) {
+ try {
+ return new URL(
+ source.toString().replace("/pool/show/", "/pools/"));
+ } catch (MalformedURLException e) {
+ }
+ }
+
+ return super.getCanonicalUrl(source);
}
- private String getTitle(InputStream in) throws IOException {
- String title = getLine(in, "", 0);
- if (title != null) {
- int pos = title.indexOf('>');
- if (pos >= 0) {
- title = title.substring(pos + 1);
- pos = title.indexOf('<');
- if (pos >= 0) {
- title = title.substring(0, pos);
- }
- }
+ private String getTitle() {
+ String title = "";
- if (title.startsWith("Pool:")) {
- title = title.substring("Pool:".length());
+ Element el = getSourceNode().getElementsByTag("title").first();
+ if (el != null) {
+ title = el.text().trim();
+ }
+
+ for (String s : new String[] { "e621", "-", "e621", "Pool", "-" }) {
+ if (title.startsWith(s)) {
+ title = title.substring(s.length()).trim();
+ }
+ if (title.endsWith(s)) {
+ title = title.substring(0, title.length() - s.length()).trim();
}
+ }
- title = StringUtils.unhtml(title).trim();
+ if (isSearchOrSet(getSource())) {
+ title = title.isEmpty() ? "e621" : "[e621] " + title;
}
return title;
}
- @Override
- protected String getDesc(URL source, InputStream in) throws IOException {
- String desc = getLine(in, "margin-bottom: 2em;", 0);
+ private String getAuthor() {
+ List list = new ArrayList();
+ String jsonUrl = getJsonUrl();
+ if (jsonUrl != null) {
+ try {
+ JSONObject json = getJson(jsonUrl, false);
+ JSONArray posts = json.getJSONArray("posts");
+ for (Object obj : posts) {
+ if (!(obj instanceof JSONObject))
+ continue;
+
+ JSONObject post = (JSONObject) obj;
+ if (!post.has("tags"))
+ continue;
+
+ JSONObject tags = post.getJSONObject("tags");
+ if (!tags.has("artist"))
+ continue;
+
+ JSONArray artists = tags.getJSONArray("artist");
+ for (Object artist : artists) {
+ if (list.contains(artist.toString()))
+ continue;
+
+ list.add(artist.toString());
+ }
+ }
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
- if (desc != null) {
- StringBuilder builder = new StringBuilder();
+ StringBuilder builder = new StringBuilder();
+ for (String artist : list) {
+ if (builder.length() > 0) {
+ builder.append(", ");
+ }
+ builder.append(artist);
+ }
- boolean inTags = false;
- for (char car : desc.toCharArray()) {
- if ((inTags && car == '>') || (!inTags && car == '<')) {
- inTags = !inTags;
- }
+ return builder.toString();
+ }
- if (inTags) {
- builder.append(car);
+ private String getDate() {
+ String jsonUrl = getJsonUrl();
+ if (jsonUrl != null) {
+ try {
+ JSONObject json = getJson(jsonUrl, false);
+ JSONArray posts = json.getJSONArray("posts");
+ for (Object obj : posts) {
+ if (!(obj instanceof JSONObject))
+ continue;
+
+ JSONObject post = (JSONObject) obj;
+ if (!post.has("created_at"))
+ continue;
+
+ return post.getString("created_at");
}
+ } catch (Exception e) {
+ e.printStackTrace();
}
-
- return builder.toString().trim();
}
- return null;
+ return "";
}
- @Override
- protected List> getChapters(URL source, InputStream in)
- throws IOException {
- List> urls = new ArrayList>();
- int last = 1; // no pool/show when only one page
-
- @SuppressWarnings("resource")
- Scanner scan = new Scanner(in, "UTF-8");
- scan.useDelimiter("\\n");
- while (scan.hasNext()) {
- String line = scan.next();
- for (int pos = line.indexOf(source.getPath()); pos >= 0; pos = line
- .indexOf(source.getPath(), pos + source.getPath().length())) {
- int equalPos = line.indexOf("=", pos);
- int quotePos = line.indexOf("\"", pos);
- if (equalPos >= 0 && quotePos > equalPos) {
- String snum = line.substring(equalPos + 1, quotePos);
- try {
- int num = Integer.parseInt(snum);
- if (num > last) {
- last = num;
- }
- } catch (NumberFormatException e) {
- }
+ // no tags for pools
+ private List getTags() {
+ List tags = new ArrayList();
+ if (isSearchOrSet(getSource())) {
+ String str = getTagsFromUrl(getSource());
+ for (String tag : str.split("\\+")) {
+ try {
+ tags.add(URLDecoder.decode(tag.trim(), "UTF-8").trim());
+ } catch (UnsupportedEncodingException e) {
}
}
}
- for (int i = 1; i <= last; i++) {
- final String key = Integer.toString(i);
- final URL value = new URL(source.toString() + "?page=" + i);
- urls.add(new Entry() {
- public URL setValue(URL value) {
- return null;
- }
+ return tags;
+ }
- public URL getValue() {
- return value;
- }
+ // returns "xxx+ddd+ggg" if "tags=xxx+ddd+ggg" was present in the query
+ private String getTagsFromUrl(URL url) {
+ String tags = url == null ? "" : url.getQuery();
+ int pos = tags.indexOf("tags=");
- public String getKey() {
- return key;
- }
- });
+ if (pos >= 0) {
+ tags = tags.substring(pos).substring("tags=".length());
+ } else {
+ return "";
}
- return urls;
+ pos = tags.indexOf('&');
+ if (pos > 0) {
+ tags = tags.substring(0, pos);
+ }
+ pos = tags.indexOf('/');
+ if (pos > 0) {
+ tags = tags.substring(0, pos);
+ }
+
+ return tags;
}
- @Override
- protected String getChapterContent(URL source, InputStream in, int number)
- throws IOException {
- StringBuilder builder = new StringBuilder();
- String staticSite = "https://static1.e621.net";
- if (source.getHost().contains("e926")) {
- staticSite = staticSite.replace("e621", "e926");
+ private Image getCover() throws IOException {
+ Image image = null;
+ List> chapters = getChapters(null);
+ if (!chapters.isEmpty()) {
+ URL chap1Url = chapters.get(0).getValue();
+ String imgsChap1 = getChapterContent(chap1Url, 1, null);
+ if (!imgsChap1.isEmpty()) {
+ imgsChap1 = imgsChap1.split("]")[0].substring(1).trim();
+ image = bsImages.getImage(this, new URL(imgsChap1));
+ }
}
- String key = staticSite + "/data/preview/";
-
- @SuppressWarnings("resource")
- Scanner scan = new Scanner(in, "UTF-8");
- scan.useDelimiter("\\n");
- while (scan.hasNext()) {
- String line = scan.next();
- if (line.contains("class=\"preview\"")) {
- for (int pos = line.indexOf(key); pos >= 0; pos = line.indexOf(
- key, pos + key.length())) {
- int endPos = line.indexOf("\"", pos);
- if (endPos >= 0) {
- String id = line.substring(pos + key.length(), endPos);
- id = staticSite + "/data/" + id;
-
- int dotPos = id.lastIndexOf(".");
- if (dotPos >= 0) {
- id = id.substring(0, dotPos);
- builder.append("[");
- builder.append(id);
- builder.append("]\n");
- }
- }
- }
+ return image;
+ }
+
+ // always /posts.json/ url
+ private String getJsonUrl() {
+ String url = null;
+ if (isSearchOrSet(getSource())) {
+ url = getSource().toString().replace("/posts", "/posts.json");
+ }
+
+ if (isPool(getSource())) {
+ String poolNumber = getSource().getPath()
+ .substring("/pools/".length());
+ url = "https://e621.net/posts.json" + "?tags=pool%3A" + poolNumber;
+ }
+
+ if (url != null) {
+ // Note: one way to override the blacklist
+ String login = Instance.getInstance().getConfig()
+ .getString(Config.LOGIN_E621_LOGIN);
+ String apk = Instance.getInstance().getConfig()
+ .getString(Config.LOGIN_E621_APIKEY);
+
+ if (login != null && !login.isEmpty() && apk != null
+ && !apk.isEmpty()) {
+ url = String.format("%s&login=%s&api_key=%s&_client=%s", url,
+ login, apk, "fanfix-" + Version.getCurrentVersion());
}
}
- return builder.toString();
+ return url;
+ }
+
+ // note: will be removed at getCanonicalUrl()
+ private boolean isSetOriginalUrl(URL originalUrl) {
+ return originalUrl.getPath().startsWith("/post_sets/");
+ }
+
+ private boolean isPool(URL url) {
+ return url.getPath().startsWith("/pools/")
+ || url.getPath().startsWith("/pool/show/");
+ }
+
+ // set will be renamed into search by canonical url
+ private boolean isSearchOrSet(URL url) {
+ return
+ // search:
+ (url.getPath().equals("/posts") && url.getQuery().contains("tags="))
+ // or set:
+ || isSetOriginalUrl(url);
}
}