import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
-import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLDecoder;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Collections;
+import java.util.Date;
import java.util.LinkedList;
import java.util.List;
import java.util.Map.Entry;
-import java.util.Scanner;
+
+import org.jsoup.helper.DataUtil;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
import be.nikiroo.fanfix.Instance;
-import be.nikiroo.fanfix.data.Chapter;
import be.nikiroo.fanfix.data.MetaData;
-import be.nikiroo.fanfix.data.Story;
+import be.nikiroo.utils.IOUtils;
import be.nikiroo.utils.Image;
import be.nikiroo.utils.Progress;
import be.nikiroo.utils.StringUtils;
/**
- * Support class for <a href="http://e621.net/">e621.net</a> and <a
- * href="http://e926.net/">e926.net</a>, a Furry website supporting comics,
+ * Support class for <a href="http://e621.net/">e621.net</a> and
+ * <a href="http://e926.net/">e926.net</a>, a Furry website supporting comics,
* including some of MLP.
* <p>
* <a href="http://e926.net/">e926.net</a> only shows the "clean" images and
*
* @author niki
*/
-class E621 extends BasicSupport_Deprecated {
- @Override
- protected MetaData getMeta(URL source, InputStream in) throws IOException {
- MetaData meta = new MetaData();
-
- meta.setTitle(getTitle(reset(in)));
- meta.setAuthor(getAuthor(source, reset(in)));
- meta.setDate("");
- meta.setTags(getTags(source, reset(in), false));
- meta.setSource(getType().getSourceName());
- meta.setUrl(source.toString());
- meta.setPublisher(getType().getSourceName());
- meta.setUuid(source.toString());
- meta.setLuid("");
- meta.setLang("en");
- meta.setSubject("Furry");
- meta.setType(getType().toString());
- meta.setImageDocument(true);
- meta.setCover(getCover(source, reset(in)));
- meta.setFakeCover(true);
-
- return meta;
- }
-
- private List<String> getTags(URL source, InputStream in, boolean authors) {
- List<String> tags = new ArrayList<String>();
-
- if (isSearch(source)) {
- String tagLine = getLine(in, "id=\"tag-sidebar\"", 1);
- if (tagLine != null) {
- String key = "href=\"";
- for (int pos = tagLine.indexOf(key); pos >= 0; pos = tagLine
- .indexOf(key, pos + 1)) {
- int end = tagLine.indexOf("\"", pos + key.length());
- if (end >= 0) {
- String href = tagLine.substring(pos, end);
- String subkey;
- if (authors)
- subkey = "?name=";
- else
- subkey = "?title=";
- if (href.contains(subkey)) {
- String tag = href.substring(href.indexOf(subkey)
- + subkey.length());
- try {
- tags.add(URLDecoder.decode(tag, "UTF-8"));
- } catch (UnsupportedEncodingException e) {
- // supported JVMs must have UTF-8 support
- e.printStackTrace();
- }
- }
- }
- }
-
- }
- }
-
- return tags;
- }
-
- @Override
- public Story process(URL url, Progress pg) throws IOException {
- // There is no chapters on e621, just pagination...
- Story story = super.process(url, pg);
-
- Chapter only = new Chapter(1, null);
- for (Chapter chap : story) {
- only.getParagraphs().addAll(chap.getParagraphs());
- }
-
- story.getChapters().clear();
- story.getChapters().add(only);
-
- return story;
- }
-
+class E621 extends BasicSupport {
@Override
protected boolean supports(URL url) {
String host = url.getHost();
host = host.substring("www.".length());
}
- return ("e621.net".equals(host) || "e926.net".equals(host))
- && (isPool(url) || isSearch(url));
+ return ("e621.net".equals(host) || "e926.net".equals(host)) && (isPool(url) || isSearchOrSet(url));
}
@Override
return true;
}
- private Image getCover(URL source, InputStream in) throws IOException {
- URL urlForCover = source;
- if (isPool(source)) {
- urlForCover = new URL(source.toString() + "?page=1");
- }
+ @Override
+ protected MetaData getMeta() throws IOException {
+ MetaData meta = new MetaData();
- String images = getChapterContent(urlForCover, in, 1, null);
- if (!images.isEmpty()) {
- int pos = images.indexOf("<br/>");
- if (pos >= 0) {
- images = images.substring(1, pos - 1);
- return getImage(this, null, images);
- }
- }
+ meta.setTitle(getTitle());
+ meta.setAuthor(getAuthor());
+ meta.setDate("");
+ meta.setTags(getTags());
+ meta.setSource(getType().getSourceName());
+ meta.setUrl(getSource().toString());
+ meta.setPublisher(getType().getSourceName());
+ meta.setUuid(getSource().toString());
+ meta.setLuid("");
+ meta.setLang("en");
+ meta.setSubject("Furry");
+ meta.setType(getType().toString());
+ meta.setImageDocument(true);
+ meta.setCover(getCover());
+ meta.setFakeCover(true);
- return null;
+ return meta;
}
- private String getAuthor(URL source, InputStream in) {
- if (isSearch(source)) {
+ @Override
+ protected String getDesc() throws IOException {
+ if (isSearchOrSet(getSource())) {
StringBuilder builder = new StringBuilder();
- for (String author : getTags(source, in, true)) {
- if (builder.length() > 0)
- builder.append(", ");
- builder.append(author);
+ builder.append("A collection of images from ").append(getSource().getHost()).append("\n") //
+ .append("\tTime of creation: " + StringUtils.fromTime(new Date().getTime())).append("\n") //
+ .append("\tTags: ");//
+ for (String tag : getTags()) {
+ builder.append("\t\t").append(tag);
}
return builder.toString();
}
- String author = getLine(in, "href=\"/post/show/", 0);
- if (author != null) {
- String key = "href=\"";
- int pos = author.indexOf(key);
- if (pos >= 0) {
- author = author.substring(pos + key.length());
- pos = author.indexOf("\"");
- if (pos >= 0) {
- author = author.substring(0, pos - 1);
- String page = source.getProtocol() + "://"
- + source.getHost() + author;
- try {
- InputStream pageIn = Instance.getCache().open(
- new URL(page), this, false);
- try {
- key = "class=\"tag-type-artist\"";
- author = getLine(pageIn, key, 0);
- if (author != null) {
- pos = author.indexOf("<a href=\"");
- if (pos >= 0) {
- author = author.substring(pos);
- pos = author.indexOf("</a>");
- if (pos >= 0) {
- author = author.substring(0, pos);
- return StringUtils.unhtml(author);
- }
- }
- }
- } finally {
- pageIn.close();
- }
- } catch (Exception e) {
- // No author found
- }
- }
+ if (isPool(getSource())) {
+ Element el = getSourceNode().getElementById("description");
+ if (el != null) {
+ return el.text();
}
}
return null;
}
- private String getTitle(InputStream in) {
- String title = getLine(in, "<title>", 0);
- if (title != null) {
- int pos = title.indexOf('>');
- if (pos >= 0) {
- title = title.substring(pos + 1);
- pos = title.indexOf('<');
- if (pos >= 0) {
- title = title.substring(0, pos);
- }
- }
-
- if (title.startsWith("Pool:")) {
- title = title.substring("Pool:".length());
- }
-
- title = StringUtils.unhtml(title).trim();
- }
-
- return title;
- }
-
@Override
- protected String getDesc(URL source, InputStream in) throws IOException {
- String desc = getLine(in, "margin-bottom: 2em;", 0);
-
- if (desc != null) {
- StringBuilder builder = new StringBuilder();
-
- boolean inTags = false;
- for (char car : desc.toCharArray()) {
- if ((inTags && car == '>') || (!inTags && car == '<')) {
- inTags = !inTags;
- }
-
- if (inTags) {
- builder.append(car);
- }
- }
-
- return builder.toString().trim();
- }
-
- return null;
- }
-
- @Override
- protected List<Entry<String, URL>> getChapters(URL source, InputStream in,
- Progress pg) throws IOException {
- if (isPool(source)) {
- return getChaptersPool(source, in, pg);
- } else if (isSearch(source)) {
- return getChaptersSearch(source, in, pg);
+ protected List<Entry<String, URL>> getChapters(Progress pg) throws IOException {
+ if (isPool(getSource())) {
+ String baseUrl = "https://e621.net/" + getSource().getPath() + "?page=";
+ return getChapters(getSource(), pg, baseUrl, "");
+ } else if (isSearchOrSet(getSource())) {
+ String baseUrl = "https://e621.net/posts/?page=";
+ String search = "&tags=" + getTagsFromUrl(getSource());
+ return getChapters(getSource(), pg, baseUrl, search);
}
return new LinkedList<Entry<String, URL>>();
}
- private List<Entry<String, URL>> getChaptersSearch(URL source,
- InputStream in, Progress pg) throws IOException {
+ private List<Entry<String, URL>> getChapters(URL source, Progress pg, String baseUrl, String parameters)
+ throws IOException {
List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
- String search = source.getPath();
- if (search.endsWith("/")) {
- search = search.substring(0, search.length() - 1);
- }
-
- int pos = search.lastIndexOf('/');
- if (pos >= 0) {
- search = search.substring(pos + 1);
- }
-
- String baseUrl = "https://e621.net/post/index/";
if (source.getHost().contains("e926")) {
baseUrl = baseUrl.replace("e621", "e926");
}
for (int i = 1; true; i++) {
- URL url = new URL(baseUrl + i + "/" + search + "/");
+ URL url = new URL(baseUrl + i + parameters);
try {
InputStream pageI = Instance.getCache().open(url, this, false);
try {
- if (getLine(pageI, "No posts matched your search.", 0) != null)
+ if (IOUtils.readSmallStream(pageI).contains("Nobody here but us chickens!")) {
break;
- urls.add(new AbstractMap.SimpleEntry<String, URL>("Page "
- + Integer.toString(i), url));
+ }
+ urls.add(new AbstractMap.SimpleEntry<String, URL>("Page " + Integer.toString(i), url));
} finally {
pageI.close();
}
return urls;
}
- private List<Entry<String, URL>> getChaptersPool(URL source,
- InputStream in, Progress pg) throws IOException {
- List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
- int last = 1; // no pool/show when only one page
-
- @SuppressWarnings("resource")
- Scanner scan = new Scanner(in, "UTF-8");
- scan.useDelimiter("\\n");
- while (scan.hasNext()) {
- String line = scan.next();
- for (int pos = line.indexOf(source.getPath()); pos >= 0; pos = line
- .indexOf(source.getPath(), pos + source.getPath().length())) {
- int equalPos = line.indexOf("=", pos);
- int quotePos = line.indexOf("\"", pos);
- if (equalPos >= 0 && quotePos > equalPos) {
- String snum = line.substring(equalPos + 1, quotePos);
- try {
- int num = Integer.parseInt(snum);
- if (num > last) {
- last = num;
- }
- } catch (NumberFormatException e) {
+ @Override
+ protected String getChapterContent(URL chapUrl, int number, Progress pg) throws IOException {
+ StringBuilder builder = new StringBuilder();
+ Document chapterNode = loadDocument(chapUrl);
+ for (Element el : chapterNode.getElementsByTag("article")) {
+ builder.append("[");
+ builder.append(el.attr("data-file-url"));
+ builder.append("]<br/>");
+ }
+
+ return builder.toString();
+ }
+
+ @Override
+ protected URL getCanonicalUrl(URL source) {
+ if (isSetOriginalUrl(source)) {
+ try {
+ Document doc = DataUtil.load(Instance.getCache().open(source, this, false), "UTF-8", source.toString());
+ for (Element shortname : doc.getElementsByClass("set-shortname")) {
+ for (Element el : shortname.getElementsByTag("a")) {
+ if (!el.attr("href").isEmpty())
+ return new URL(el.absUrl("href"));
}
}
+ } catch (IOException e) {
+ Instance.getTraceHandler().error(e);
}
}
- for (int i = 1; i <= last; i++) {
- urls.add(new AbstractMap.SimpleEntry<String, URL>(Integer
- .toString(i), new URL(source.toString() + "?page=" + i)));
+ return super.getCanonicalUrl(source);
+ }
+
+ // returns "xxx+ddd+ggg" if "tags=xxx+ddd+ggg" was present in the query
+ private String getTagsFromUrl(URL url) {
+ String tags = url == null ? "" : url.getQuery();
+ int pos = tags.indexOf("tags=");
+
+ if (pos >= 0) {
+ tags = tags.substring(pos).substring("tags=".length());
+ } else {
+ return "";
}
- return urls;
+ pos = tags.indexOf('&');
+ if (pos > 0) {
+ tags = tags.substring(0, pos);
+ }
+ pos = tags.indexOf('/');
+ if (pos > 0) {
+ tags = tags.substring(0, pos);
+ }
+
+ return tags;
}
- @Override
- protected String getChapterContent(URL source, InputStream in, int number,
- Progress pg) throws IOException {
- StringBuilder builder = new StringBuilder();
- String staticSite = "https://static1.e621.net";
- if (source.getHost().contains("e926")) {
- staticSite = staticSite.replace("e621", "e926");
+ private String getTitle() {
+ String title = "";
+
+ Element el = getSourceNode().getElementsByTag("title").first();
+ if (el != null) {
+ title = el.text().trim();
}
- String key = staticSite + "/data/preview/";
-
- @SuppressWarnings("resource")
- Scanner scan = new Scanner(in, "UTF-8");
- scan.useDelimiter("\\n");
- while (scan.hasNext()) {
- String line = scan.next();
- if (line.contains("class=\"preview")) {
- for (int pos = line.indexOf(key); pos >= 0; pos = line.indexOf(
- key, pos + key.length())) {
- int endPos = line.indexOf("\"", pos);
- if (endPos >= 0) {
- String id = line.substring(pos + key.length(), endPos);
- id = staticSite + "/data/" + id;
-
- int dotPos = id.lastIndexOf(".");
- if (dotPos >= 0) {
- id = id.substring(0, dotPos);
- builder.append("[");
- builder.append(id);
- builder.append("]<br/>");
- }
- }
- }
+ for (String s : new String[] { "e621", "-", "e621" }) {
+ if (title.startsWith(s)) {
+ title = title.substring(s.length()).trim();
}
+ if (title.endsWith(s)) {
+ title = title.substring(0, title.length() - s.length()).trim();
+ }
+
}
- return builder.toString();
+ if (isSearchOrSet(getSource())) {
+ title = title.isEmpty() ? "e621" : "[e621] " + title;
+ }
+ return title;
}
- @Override
- protected URL getCanonicalUrl(URL source) {
- if (isSearch(source)) {
- // /post?tags=tag1+tag2 -> ../post/index/1/tag1%32tag2
- String key = "?tags=";
- if (source.toString().contains(key)) {
- int pos = source.toString().indexOf(key);
- String tags = source.toString().substring(pos + key.length());
- tags = tags.replace("+", "%20");
-
- String base = source.toString().substring(0, pos);
- if (!base.endsWith("/")) {
- base += "/";
+ private String getAuthor() throws IOException {
+ StringBuilder builder = new StringBuilder();
+
+ if (isSearchOrSet(getSource())) {
+ for (Element el : getSourceNode().getElementsByClass("search-tag")) {
+ if (el.attr("itemprop").equals("author")) {
+ if (builder.length() > 0) {
+ builder.append(", ");
+ }
+ builder.append(el.text().trim());
}
- if (base.endsWith("/search/")) {
- base = base.substring(0, base.indexOf("/search/") + 1);
+ }
+ }
+
+ if (isPool(getSource())) {
+ String desc = getDesc();
+ String descL = desc.toLowerCase();
+
+ if (descL.startsWith("by:") || descL.startsWith("by ")) {
+ desc = desc.substring(3).trim();
+ desc = desc.split("\n")[0];
+
+ String tab[] = desc.split(" ");
+ for (int i = 0; i < Math.min(tab.length, 5); i++) {
+ if (tab[i].startsWith("http"))
+ break;
+ builder.append(" ").append(tab[i]);
}
+ }
+ }
+ return builder.toString();
+ }
+
+ // no tags for pools
+ private List<String> getTags() {
+ List<String> tags = new ArrayList<String>();
+ if (isSearchOrSet(getSource())) {
+ String str = getTagsFromUrl(getSource());
+ for (String tag : str.split("\\+")) {
try {
- return new URL(base + "index/1/" + tags);
- } catch (MalformedURLException e) {
- Instance.getTraceHandler().error(e);
+ tags.add(URLDecoder.decode(tag.trim(), "UTF-8").trim());
+ } catch (UnsupportedEncodingException e) {
}
}
}
- return super.getCanonicalUrl(source);
+ return tags;
+ }
+
+ private Image getCover() throws IOException {
+ Image image = null;
+ List<Entry<String, URL>> chapters = getChapters(null);
+ if (!chapters.isEmpty()) {
+ URL url = chapters.get(0).getValue();
+ image = bsImages.getImage(this, url);
+ }
+
+ return image;
+ }
+
+ // note: will be removed at getCanonicalUrl()
+ private boolean isSetOriginalUrl(URL originalUrl) {
+ return originalUrl.getPath().startsWith("/post_sets/");
}
private boolean isPool(URL url) {
- return url.getPath().startsWith("/pool/");
+ return url.getPath().startsWith("/pools/");
}
- private boolean isSearch(URL url) {
- return url.getPath().startsWith("/post/index/")
- || (url.getPath().equals("/post/search") && url.getQuery()
- .startsWith("tags="));
+ // set will be renamed into search by canonical url
+ private boolean isSearchOrSet(URL url) {
+ return
+ // search:
+ (url.getPath().equals("/posts") && url.getQuery().contains("tags="))
+ // or set:
+ || isSetOriginalUrl(url);
}
}