src/be/nikiroo/fanfix/supported/E621.java

   1 package be.nikiroo.fanfix.supported;
   2
   3 import java.io.IOException;
   4 import java.io.InputStream;
   5 import java.io.UnsupportedEncodingException;
   6 import java.net.MalformedURLException;
   7 import java.net.URL;
   8 import java.net.URLDecoder;
   9 import java.util.AbstractMap;
  10 import java.util.ArrayList;
  11 import java.util.Collections;
  12 import java.util.Date;
  13 import java.util.LinkedList;
  14 import java.util.List;
  15 import java.util.Map.Entry;
  16
  17 import org.jsoup.helper.DataUtil;
  18 import org.jsoup.nodes.Document;
  19 import org.jsoup.nodes.Element;
  20
  21 import be.nikiroo.fanfix.Instance;
  22 import be.nikiroo.fanfix.data.MetaData;
  23 import be.nikiroo.utils.IOUtils;
  24 import be.nikiroo.utils.Image;
  25 import be.nikiroo.utils.Progress;
  26 import be.nikiroo.utils.StringUtils;
  27
  28 /**
  29  * Support class for <a href="http://e621.net/">e621.net</a> and
  30  * <a href="http://e926.net/">e926.net</a>, a Furry website supporting comics,
  31  * including some of MLP.
  32  * <p>
  33  * <a href="http://e926.net/">e926.net</a> only shows the "clean" images and
  34  * comics, but it can be difficult to browse.
  35  *
  36  * @author niki
  37  */
  38 class E621 extends BasicSupport {
  39         @Override
  40         protected boolean supports(URL url) {
  41                 String host = url.getHost();
  42                 if (host.startsWith("www.")) {
  43                         host = host.substring("www.".length());
  44                 }
  45
  46                 return ("e621.net".equals(host) || "e926.net".equals(host)) && (isPool(url) || isSearchOrSet(url));
  47         }
  48
  49         @Override
  50         protected boolean isHtml() {
  51                 return true;
  52         }
  53
  54         @Override
  55         protected MetaData getMeta() throws IOException {
  56                 MetaData meta = new MetaData();
  57
  58                 meta.setTitle(getTitle());
  59                 meta.setAuthor(getAuthor());
  60                 meta.setDate("");
  61                 meta.setTags(getTags());
  62                 meta.setSource(getType().getSourceName());
  63                 meta.setUrl(getSource().toString());
  64                 meta.setPublisher(getType().getSourceName());
  65                 meta.setUuid(getSource().toString());
  66                 meta.setLuid("");
  67                 meta.setLang("en");
  68                 meta.setSubject("Furry");
  69                 meta.setType(getType().toString());
  70                 meta.setImageDocument(true);
  71                 meta.setCover(getCover());
  72                 meta.setFakeCover(true);
  73
  74                 return meta;
  75         }
  76
  77         @Override
  78         protected String getDesc() throws IOException {
  79                 if (isSearchOrSet(getSource())) {
  80                         StringBuilder builder = new StringBuilder();
  81                         builder.append("A collection of images from ").append(getSource().getHost()).append("\n") //
  82                                         .append("\tTime of creation: " + StringUtils.fromTime(new Date().getTime())).append("\n") //
  83                                         .append("\tTags: ");//
  84                         for (String tag : getTags()) {
  85                                 builder.append("\t\t").append(tag);
  86                         }
  87
  88                         return builder.toString();
  89                 }
  90
  91                 if (isPool(getSource())) {
  92                         Element el = getSourceNode().getElementById("description");
  93                         if (el != null) {
  94                                 return el.text();
  95                         }
  96                 }
  97
  98                 return null;
  99         }
 100
 101         @Override
 102         protected List<Entry<String, URL>> getChapters(Progress pg) throws IOException {
 103                 if (isPool(getSource())) {
 104                         String baseUrl = "https://e621.net/" + getSource().getPath() + "?page=";
 105                         return getChapters(getSource(), pg, baseUrl, "");
 106                 } else if (isSearchOrSet(getSource())) {
 107                         String baseUrl = "https://e621.net/posts/?page=";
 108                         String search = "&tags=" + getTagsFromUrl(getSource());
 109                         // sets are sorted in reverse order on the website
 110                         List<Entry<String, URL>> urls = getChapters(getSource(), pg,
 111                                         baseUrl, search);
 112                         Collections.reverse(urls);
 113                         return urls;
 114                 }
 115
 116                 return new LinkedList<Entry<String, URL>>();
 117         }
 118
 119         private List<Entry<String, URL>> getChapters(URL source, Progress pg, String baseUrl, String parameters)
 120                         throws IOException {
 121                 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
 122
 123                 if (source.getHost().contains("e926")) {
 124                         baseUrl = baseUrl.replace("e621", "e926");
 125                 }
 126
 127                 for (int i = 1; true; i++) {
 128                         URL url = new URL(baseUrl + i + parameters);
 129                         try {
 130                                 InputStream pageI = Instance.getInstance().getCache().open(url, this, false);
 131                                 try {
 132                                         if (IOUtils.readSmallStream(pageI).contains("Nobody here but us chickens!")) {
 133                                                 break;
 134                                         }
 135                                         urls.add(new AbstractMap.SimpleEntry<String, URL>("Page " + Integer.toString(i), url));
 136                                 } finally {
 137                                         pageI.close();
 138                                 }
 139                         } catch (Exception e) {
 140                                 break;
 141                         }
 142                 }
 143
 144                 return urls;
 145         }
 146
 147         @Override
 148         protected String getChapterContent(URL chapUrl, int number, Progress pg) throws IOException {
 149                 StringBuilder builder = new StringBuilder();
 150                 Document chapterNode = loadDocument(chapUrl);
 151                 for (Element el : chapterNode.getElementsByTag("article")) {
 152                         builder.append("[");
 153                         builder.append(el.attr("data-file-url"));
 154                         builder.append("]<br/>");
 155                 }
 156
 157                 return builder.toString();
 158         }
 159
 160         @Override
 161         protected URL getCanonicalUrl(URL source) {
 162                 if (isSetOriginalUrl(source)) {
 163                         try {
 164                                 Document doc = DataUtil.load(Instance.getInstance().getCache().open(source, this, false), "UTF-8", source.toString());
 165                                 for (Element shortname : doc.getElementsByClass("set-shortname")) {
 166                                         for (Element el : shortname.getElementsByTag("a")) {
 167                                                 if (!el.attr("href").isEmpty())
 168                                                         return new URL(el.absUrl("href"));
 169                                         }
 170                                 }
 171                         } catch (IOException e) {
 172                                 Instance.getInstance().getTraceHandler().error(e);
 173                         }
 174                 }
 175
 176                 if (isPool(source)) {
 177                         try {
 178                                 return new URL(source.toString().replace("/pool/show/", "/pools/"));
 179                         } catch (MalformedURLException e) {
 180                         }
 181                 }
 182
 183                 return super.getCanonicalUrl(source);
 184         }
 185
 186         // returns "xxx+ddd+ggg" if "tags=xxx+ddd+ggg" was present in the query
 187         private String getTagsFromUrl(URL url) {
 188                 String tags = url == null ? "" : url.getQuery();
 189                 int pos = tags.indexOf("tags=");
 190
 191                 if (pos >= 0) {
 192                         tags = tags.substring(pos).substring("tags=".length());
 193                 } else {
 194                         return "";
 195                 }
 196
 197                 pos = tags.indexOf('&');
 198                 if (pos > 0) {
 199                         tags = tags.substring(0, pos);
 200                 }
 201                 pos = tags.indexOf('/');
 202                 if (pos > 0) {
 203                         tags = tags.substring(0, pos);
 204                 }
 205
 206                 return tags;
 207         }
 208
 209         private String getTitle() {
 210                 String title = "";
 211
 212                 Element el = getSourceNode().getElementsByTag("title").first();
 213                 if (el != null) {
 214                         title = el.text().trim();
 215                 }
 216
 217                 for (String s : new String[] { "e621", "-", "e621" }) {
 218                         if (title.startsWith(s)) {
 219                                 title = title.substring(s.length()).trim();
 220                         }
 221                         if (title.endsWith(s)) {
 222                                 title = title.substring(0, title.length() - s.length()).trim();
 223                         }
 224
 225                 }
 226
 227                 if (isSearchOrSet(getSource())) {
 228                         title = title.isEmpty() ? "e621" : "[e621] " + title;
 229                 }
 230                 return title;
 231         }
 232
 233         private String getAuthor() throws IOException {
 234                 StringBuilder builder = new StringBuilder();
 235
 236                 if (isSearchOrSet(getSource())) {
 237                         for (Element el : getSourceNode().getElementsByClass("search-tag")) {
 238                                 if (el.attr("itemprop").equals("author")) {
 239                                         if (builder.length() > 0) {
 240                                                 builder.append(", ");
 241                                         }
 242                                         builder.append(el.text().trim());
 243                                 }
 244                         }
 245                 }
 246
 247                 if (isPool(getSource())) {
 248                         String desc = getDesc();
 249                         String descL = desc.toLowerCase();
 250
 251                         if (descL.startsWith("by:") || descL.startsWith("by ")) {
 252                                 desc = desc.substring(3).trim();
 253                                 desc = desc.split("\n")[0];
 254
 255                                 String tab[] = desc.split(" ");
 256                                 for (int i = 0; i < Math.min(tab.length, 5); i++) {
 257                                         if (tab[i].startsWith("http"))
 258                                                 break;
 259                                         builder.append(" ").append(tab[i]);
 260                                 }
 261                         }
 262                 }
 263
 264                 return builder.toString();
 265         }
 266
 267         // no tags for pools
 268         private List<String> getTags() {
 269                 List<String> tags = new ArrayList<String>();
 270                 if (isSearchOrSet(getSource())) {
 271                         String str = getTagsFromUrl(getSource());
 272                         for (String tag : str.split("\\+")) {
 273                                 try {
 274                                         tags.add(URLDecoder.decode(tag.trim(), "UTF-8").trim());
 275                                 } catch (UnsupportedEncodingException e) {
 276                                 }
 277                         }
 278                 }
 279
 280                 return tags;
 281         }
 282
 283         private Image getCover() throws IOException {
 284                 Image image = null;
 285                 List<Entry<String, URL>> chapters = getChapters(null);
 286                 if (!chapters.isEmpty()) {
 287                         URL chap1Url = chapters.get(0).getValue();
 288                         String imgsChap1 = getChapterContent(chap1Url, 1, null);
 289                         if (!imgsChap1.isEmpty()) {
 290                                 imgsChap1 = imgsChap1.split("]")[0].substring(1).trim();
 291                                 image = bsImages.getImage(this, new URL(imgsChap1));
 292                         }
 293                 }
 294
 295                 return image;
 296         }
 297
 298         // note: will be removed at getCanonicalUrl()
 299         private boolean isSetOriginalUrl(URL originalUrl) {
 300                 return originalUrl.getPath().startsWith("/post_sets/");
 301         }
 302
 303         private boolean isPool(URL url) {
 304                 return url.getPath().startsWith("/pools/") || url.getPath().startsWith("/pool/show/");
 305         }
 306
 307         // set will be renamed into search by canonical url
 308         private boolean isSearchOrSet(URL url) {
 309                 return
 310                 // search:
 311                 (url.getPath().equals("/posts") && url.getQuery().contains("tags="))
 312                                 // or set:
 313                                 || isSetOriginalUrl(url);
 314         }
 315 }