X-Git-Url: http://git.nikiroo.be/?a=blobdiff_plain;f=src%2Fbe%2Fnikiroo%2Ffanfix%2Fsupported%2FE621.java;h=39bbe867d9eb2cdfc630c80df9393bc9bc47aba4;hb=95c926ea1d5b7c75d5bbc81c50d80f5509d28a4d;hp=4b2fb9028ba04c5ea50286eea031ebc445c39a34;hpb=c4b18c94e7f8413abdc5b760c9bbbd9ae5226f13;p=fanfix.git diff --git a/src/be/nikiroo/fanfix/supported/E621.java b/src/be/nikiroo/fanfix/supported/E621.java deleted file mode 100644 index 4b2fb90..0000000 --- a/src/be/nikiroo/fanfix/supported/E621.java +++ /dev/null @@ -1,309 +0,0 @@ -package be.nikiroo.fanfix.supported; - -import java.io.IOException; -import java.io.InputStream; -import java.io.UnsupportedEncodingException; -import java.net.MalformedURLException; -import java.net.URL; -import java.net.URLDecoder; -import java.util.AbstractMap; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Date; -import java.util.LinkedList; -import java.util.List; -import java.util.Map.Entry; - -import org.jsoup.helper.DataUtil; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; - -import be.nikiroo.fanfix.Instance; -import be.nikiroo.fanfix.data.MetaData; -import be.nikiroo.utils.IOUtils; -import be.nikiroo.utils.Image; -import be.nikiroo.utils.Progress; -import be.nikiroo.utils.StringUtils; - -/** - * Support class for e621.net and - * e926.net, a Furry website supporting comics, - * including some of MLP. - *

- * e926.net only shows the "clean" images and - * comics, but it can be difficult to browse. - * - * @author niki - */ -class E621 extends BasicSupport { - @Override - protected boolean supports(URL url) { - String host = url.getHost(); - if (host.startsWith("www.")) { - host = host.substring("www.".length()); - } - - return ("e621.net".equals(host) || "e926.net".equals(host)) && (isPool(url) || isSearchOrSet(url)); - } - - @Override - protected boolean isHtml() { - return true; - } - - @Override - protected MetaData getMeta() throws IOException { - MetaData meta = new MetaData(); - - meta.setTitle(getTitle()); - meta.setAuthor(getAuthor()); - meta.setDate(""); - meta.setTags(getTags()); - meta.setSource(getType().getSourceName()); - meta.setUrl(getSource().toString()); - meta.setPublisher(getType().getSourceName()); - meta.setUuid(getSource().toString()); - meta.setLuid(""); - meta.setLang("en"); - meta.setSubject("Furry"); - meta.setType(getType().toString()); - meta.setImageDocument(true); - meta.setCover(getCover()); - meta.setFakeCover(true); - - return meta; - } - - @Override - protected String getDesc() throws IOException { - if (isSearchOrSet(getSource())) { - StringBuilder builder = new StringBuilder(); - builder.append("A collection of images from ").append(getSource().getHost()).append("\n") // - .append("\tTime of creation: " + StringUtils.fromTime(new Date().getTime())).append("\n") // - .append("\tTags: ");// - for (String tag : getTags()) { - builder.append("\t\t").append(tag); - } - - return builder.toString(); - } - - if (isPool(getSource())) { - Element el = getSourceNode().getElementById("description"); - if (el != null) { - return el.text(); - } - } - - return null; - } - - @Override - protected List> getChapters(Progress pg) throws IOException { - if (isPool(getSource())) { - String baseUrl = "https://e621.net/" + getSource().getPath() + "?page="; - return getChapters(getSource(), pg, baseUrl, ""); - } else if (isSearchOrSet(getSource())) { - String baseUrl = "https://e621.net/posts/?page="; - String search = "&tags=" + getTagsFromUrl(getSource()); - return getChapters(getSource(), pg, baseUrl, search); - } - - return new LinkedList>(); - } - - private List> getChapters(URL source, Progress pg, String baseUrl, String parameters) - throws IOException { - List> urls = new ArrayList>(); - - if (source.getHost().contains("e926")) { - baseUrl = baseUrl.replace("e621", "e926"); - } - - for (int i = 1; true; i++) { - URL url = new URL(baseUrl + i + parameters); - try { - InputStream pageI = Instance.getCache().open(url, this, false); - try { - if (IOUtils.readSmallStream(pageI).contains("Nobody here but us chickens!")) { - break; - } - urls.add(new AbstractMap.SimpleEntry("Page " + Integer.toString(i), url)); - } finally { - pageI.close(); - } - } catch (Exception e) { - break; - } - } - - // They are sorted in reverse order on the website - Collections.reverse(urls); - return urls; - } - - @Override - protected String getChapterContent(URL chapUrl, int number, Progress pg) throws IOException { - StringBuilder builder = new StringBuilder(); - Document chapterNode = loadDocument(chapUrl); - for (Element el : chapterNode.getElementsByTag("article")) { - builder.append("["); - builder.append(el.attr("data-file-url")); - builder.append("]
"); - } - - return builder.toString(); - } - - @Override - protected URL getCanonicalUrl(URL source) { - if (isSetOriginalUrl(source)) { - try { - Document doc = DataUtil.load(Instance.getCache().open(source, this, false), "UTF-8", source.toString()); - for (Element shortname : doc.getElementsByClass("set-shortname")) { - for (Element el : shortname.getElementsByTag("a")) { - if (!el.attr("href").isEmpty()) - return new URL(el.absUrl("href")); - } - } - } catch (IOException e) { - Instance.getTraceHandler().error(e); - } - } - - if (isPool(source)) { - try { - return new URL(source.toString().replace("/pool/show/", "/pools/")); - } catch (MalformedURLException e) { - } - } - - return super.getCanonicalUrl(source); - } - - // returns "xxx+ddd+ggg" if "tags=xxx+ddd+ggg" was present in the query - private String getTagsFromUrl(URL url) { - String tags = url == null ? "" : url.getQuery(); - int pos = tags.indexOf("tags="); - - if (pos >= 0) { - tags = tags.substring(pos).substring("tags=".length()); - } else { - return ""; - } - - pos = tags.indexOf('&'); - if (pos > 0) { - tags = tags.substring(0, pos); - } - pos = tags.indexOf('/'); - if (pos > 0) { - tags = tags.substring(0, pos); - } - - return tags; - } - - private String getTitle() { - String title = ""; - - Element el = getSourceNode().getElementsByTag("title").first(); - if (el != null) { - title = el.text().trim(); - } - - for (String s : new String[] { "e621", "-", "e621" }) { - if (title.startsWith(s)) { - title = title.substring(s.length()).trim(); - } - if (title.endsWith(s)) { - title = title.substring(0, title.length() - s.length()).trim(); - } - - } - - if (isSearchOrSet(getSource())) { - title = title.isEmpty() ? "e621" : "[e621] " + title; - } - return title; - } - - private String getAuthor() throws IOException { - StringBuilder builder = new StringBuilder(); - - if (isSearchOrSet(getSource())) { - for (Element el : getSourceNode().getElementsByClass("search-tag")) { - if (el.attr("itemprop").equals("author")) { - if (builder.length() > 0) { - builder.append(", "); - } - builder.append(el.text().trim()); - } - } - } - - if (isPool(getSource())) { - String desc = getDesc(); - String descL = desc.toLowerCase(); - - if (descL.startsWith("by:") || descL.startsWith("by ")) { - desc = desc.substring(3).trim(); - desc = desc.split("\n")[0]; - - String tab[] = desc.split(" "); - for (int i = 0; i < Math.min(tab.length, 5); i++) { - if (tab[i].startsWith("http")) - break; - builder.append(" ").append(tab[i]); - } - } - } - - return builder.toString(); - } - - // no tags for pools - private List getTags() { - List tags = new ArrayList(); - if (isSearchOrSet(getSource())) { - String str = getTagsFromUrl(getSource()); - for (String tag : str.split("\\+")) { - try { - tags.add(URLDecoder.decode(tag.trim(), "UTF-8").trim()); - } catch (UnsupportedEncodingException e) { - } - } - } - - return tags; - } - - private Image getCover() throws IOException { - Image image = null; - List> chapters = getChapters(null); - if (!chapters.isEmpty()) { - URL url = chapters.get(0).getValue(); - image = bsImages.getImage(this, url); - } - - return image; - } - - // note: will be removed at getCanonicalUrl() - private boolean isSetOriginalUrl(URL originalUrl) { - return originalUrl.getPath().startsWith("/post_sets/"); - } - - private boolean isPool(URL url) { - return url.getPath().startsWith("/pools/") || url.getPath().startsWith("/pool/show/"); - } - - // set will be renamed into search by canonical url - private boolean isSearchOrSet(URL url) { - return - // search: - (url.getPath().equals("/posts") && url.getQuery().contains("tags=")) - // or set: - || isSetOriginalUrl(url); - } -}