supported/E621.java

   1 package be.nikiroo.fanfix.supported;
   2
   3 import java.io.IOException;
   4 import java.io.InputStream;
   5 import java.io.UnsupportedEncodingException;
   6 import java.net.MalformedURLException;
   7 import java.net.URL;
   8 import java.net.URLDecoder;
   9 import java.util.AbstractMap;
  10 import java.util.ArrayList;
  11 import java.util.Collections;
  12 import java.util.Date;
  13 import java.util.LinkedList;
  14 import java.util.List;
  15 import java.util.Map.Entry;
  16
  17 import org.jsoup.helper.DataUtil;
  18 import org.jsoup.nodes.Document;
  19 import org.jsoup.nodes.Element;
  20 import org.jsoup.select.Elements;
  21
  22 import be.nikiroo.fanfix.Instance;
  23 import be.nikiroo.fanfix.data.MetaData;
  24 import be.nikiroo.utils.IOUtils;
  25 import be.nikiroo.utils.Image;
  26 import be.nikiroo.utils.Progress;
  27 import be.nikiroo.utils.StringUtils;
  28
  29 /**
  30  * Support class for <a href="http://e621.net/">e621.net</a> and
  31  * <a href="http://e926.net/">e926.net</a>, a Furry website supporting comics,
  32  * including some of MLP.
  33  * <p>
  34  * <a href="http://e926.net/">e926.net</a> only shows the "clean" images and
  35  * comics, but it can be difficult to browse.
  36  *
  37  * @author niki
  38  */
  39 class E621 extends BasicSupport {
  40         @Override
  41         protected boolean supports(URL url) {
  42                 String host = url.getHost();
  43                 if (host.startsWith("www.")) {
  44                         host = host.substring("www.".length());
  45                 }
  46
  47                 return ("e621.net".equals(host) || "e926.net".equals(host)) && (isPool(url) || isSearchOrSet(url));
  48         }
  49
  50         @Override
  51         protected boolean isHtml() {
  52                 return true;
  53         }
  54
  55         @Override
  56         protected MetaData getMeta() throws IOException {
  57                 MetaData meta = new MetaData();
  58
  59                 meta.setTitle(getTitle());
  60                 meta.setAuthor(getAuthor());
  61                 meta.setDate("");
  62                 meta.setTags(getTags());
  63                 meta.setSource(getType().getSourceName());
  64                 meta.setUrl(getSource().toString());
  65                 meta.setPublisher(getType().getSourceName());
  66                 meta.setUuid(getSource().toString());
  67                 meta.setLuid("");
  68                 meta.setLang("en");
  69                 meta.setSubject("Furry");
  70                 meta.setType(getType().toString());
  71                 meta.setImageDocument(true);
  72                 meta.setCover(getCover());
  73                 meta.setFakeCover(true);
  74
  75                 return meta;
  76         }
  77
  78         @Override
  79         protected String getDesc() throws IOException {
  80                 if (isSearchOrSet(getSource())) {
  81                         StringBuilder builder = new StringBuilder();
  82                         builder.append("A collection of images from ").append(getSource().getHost()).append("\n") //
  83                                         .append("\tTime of creation: " + StringUtils.fromTime(new Date().getTime())).append("\n") //
  84                                         .append("\tTags: ");//
  85                         for (String tag : getTags()) {
  86                                 builder.append("\t\t").append(tag);
  87                         }
  88
  89                         return builder.toString();
  90                 }
  91
  92                 if (isPool(getSource())) {
  93                         Element el = getSourceNode().getElementById("description");
  94                         if (el != null) {
  95                                 return el.text();
  96                         }
  97                 }
  98
  99                 return null;
 100         }
 101
 102         @Override
 103         protected List<Entry<String, URL>> getChapters(Progress pg) throws IOException {
 104                 List<Entry<String, URL>> chapters = new LinkedList<Entry<String, URL>>();
 105
 106                 if (isPool(getSource())) {
 107                         String baseUrl = "https://e621.net/" + getSource().getPath() + "?page=";
 108                         chapters = getChapters(getSource(), pg, baseUrl, "");
 109                 } else if (isSearchOrSet(getSource())) {
 110                         String baseUrl = "https://e621.net/posts/?page=";
 111                         String search = "&tags=" + getTagsFromUrl(getSource());
 112
 113                         chapters = getChapters(getSource(), pg,
 114                                         baseUrl, search);
 115                 }
 116
 117                 // sets and some pools are sorted in reverse order on the website
 118                 if (getSource().getPath().startsWith("/posts")) {
 119                         Collections.reverse(chapters);
 120                 }
 121
 122                 return chapters;
 123         }
 124
 125         private List<Entry<String, URL>> getChapters(URL source, Progress pg, String baseUrl, String parameters)
 126                         throws IOException {
 127                 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
 128
 129                 if (source.getHost().contains("e926")) {
 130                         baseUrl = baseUrl.replace("e621", "e926");
 131                 }
 132
 133                 for (int i = 1; true; i++) {
 134                         URL url = new URL(baseUrl + i + parameters);
 135                         try {
 136                                 InputStream pageI = Instance.getInstance().getCache().open(url, this, false);
 137                                 try {
 138                                         if (IOUtils.readSmallStream(pageI).contains("Nobody here but us chickens!")) {
 139                                                 break;
 140                                         }
 141                                         urls.add(new AbstractMap.SimpleEntry<String, URL>("Page " + Integer.toString(i), url));
 142                                 } finally {
 143                                         pageI.close();
 144                                 }
 145                         } catch (Exception e) {
 146                                 break;
 147                         }
 148                 }
 149
 150                 return urls;
 151         }
 152
 153         @Override
 154         protected String getChapterContent(URL chapUrl, int number, Progress pg) throws IOException {
 155                 StringBuilder builder = new StringBuilder();
 156                 Document chapterNode = loadDocument(chapUrl);
 157
 158                 Elements articles = chapterNode.getElementsByTag("article");
 159
 160                 // sets and some pools are sorted in reverse order on the website
 161                 if (getSource().getPath().startsWith("/posts")) {
 162                         Collections.reverse(articles);
 163                 }
 164
 165                 for (Element el : articles) {
 166                         builder.append("[");
 167                         builder.append(el.attr("data-file-url"));
 168                         builder.append("]<br/>");
 169                 }
 170
 171                 return builder.toString();
 172         }
 173
 174         @Override
 175         protected URL getCanonicalUrl(URL source) {
 176                 // Convert search-pools into proper pools
 177                 if (source.getPath().equals("/posts") && source.getQuery() != null
 178                                 && source.getQuery().startsWith("tags=pool%3A")) {
 179                         String poolNumber = source.getQuery()
 180                                         .substring("tags=pool%3A".length());
 181                         try {
 182                                 Integer.parseInt(poolNumber);
 183                                 String base = source.getProtocol() + "://" + source.getHost();
 184                                 if (source.getPort() != -1) {
 185                                         base = base + ":" + source.getPort();
 186                                 }
 187                                 source = new URL(base + "/pools/" + poolNumber);
 188                         } catch (NumberFormatException e) {
 189                                 // Not a simple pool, skip
 190                         } catch (MalformedURLException e) {
 191                                 // Cannot happen
 192                         }
 193                 }
 194
 195                 if (isSetOriginalUrl(source)) {
 196                         try {
 197                                 Document doc = DataUtil.load(Instance.getInstance().getCache().open(source, this, false), "UTF-8", source.toString());
 198                                 for (Element shortname : doc.getElementsByClass("set-shortname")) {
 199                                         for (Element el : shortname.getElementsByTag("a")) {
 200                                                 if (!el.attr("href").isEmpty())
 201                                                         return new URL(el.absUrl("href"));
 202                                         }
 203                                 }
 204                         } catch (IOException e) {
 205                                 Instance.getInstance().getTraceHandler().error(e);
 206                         }
 207                 }
 208
 209                 if (isPool(source)) {
 210                         try {
 211                                 return new URL(source.toString().replace("/pool/show/", "/pools/"));
 212                         } catch (MalformedURLException e) {
 213                         }
 214                 }
 215
 216                 return super.getCanonicalUrl(source);
 217         }
 218
 219         // returns "xxx+ddd+ggg" if "tags=xxx+ddd+ggg" was present in the query
 220         private String getTagsFromUrl(URL url) {
 221                 String tags = url == null ? "" : url.getQuery();
 222                 int pos = tags.indexOf("tags=");
 223
 224                 if (pos >= 0) {
 225                         tags = tags.substring(pos).substring("tags=".length());
 226                 } else {
 227                         return "";
 228                 }
 229
 230                 pos = tags.indexOf('&');
 231                 if (pos > 0) {
 232                         tags = tags.substring(0, pos);
 233                 }
 234                 pos = tags.indexOf('/');
 235                 if (pos > 0) {
 236                         tags = tags.substring(0, pos);
 237                 }
 238
 239                 return tags;
 240         }
 241
 242         private String getTitle() {
 243                 String title = "";
 244
 245                 Element el = getSourceNode().getElementsByTag("title").first();
 246                 if (el != null) {
 247                         title = el.text().trim();
 248                 }
 249
 250                 for (String s : new String[] { "e621", "-", "e621", "Pool", "-" }) {
 251                         if (title.startsWith(s)) {
 252                                 title = title.substring(s.length()).trim();
 253                         }
 254                         if (title.endsWith(s)) {
 255                                 title = title.substring(0, title.length() - s.length()).trim();
 256                         }
 257                 }
 258
 259                 if (isSearchOrSet(getSource())) {
 260                         title = title.isEmpty() ? "e621" : "[e621] " + title;
 261                 }
 262
 263                 return title;
 264         }
 265
 266         private String getAuthor() throws IOException {
 267                 StringBuilder builder = new StringBuilder();
 268
 269                 if (isSearchOrSet(getSource())) {
 270                         for (Element el : getSourceNode().getElementsByClass("search-tag")) {
 271                                 if (el.attr("itemprop").equals("author")) {
 272                                         if (builder.length() > 0) {
 273                                                 builder.append(", ");
 274                                         }
 275                                         builder.append(el.text().trim());
 276                                 }
 277                         }
 278                 }
 279
 280                 if (isPool(getSource())) {
 281                         String desc = getDesc();
 282                         String descL = desc.toLowerCase();
 283
 284                         if (descL.startsWith("by:") || descL.startsWith("by ")) {
 285                                 desc = desc.substring(3).trim();
 286                                 desc = desc.split("\n")[0];
 287
 288                                 String tab[] = desc.split(" ");
 289                                 for (int i = 0; i < Math.min(tab.length, 5); i++) {
 290                                         if (tab[i].startsWith("http"))
 291                                                 break;
 292                                         builder.append(" ").append(tab[i]);
 293                                 }
 294                         }
 295
 296                         if (builder.length() == 0) {
 297                                 String url = "https://e621.net/" + getSource().getPath()
 298                                                 + "?page=1";
 299                                 Document page1 = DataUtil.load(Instance.getInstance().getCache()
 300                                                 .open(getSource(), this, false), "UTF-8",
 301                                                 url.toString());
 302                                 for (Element el : page1.getElementsByClass("search-tag")) {
 303                                         if (el.attr("itemprop").equals("author")) {
 304                                                 if (builder.length() > 0) {
 305                                                         builder.append(", ");
 306                                                 }
 307                                                 builder.append(el.text().trim());
 308                                         }
 309                                 }
 310                         }
 311                 }
 312
 313                 return builder.toString();
 314         }
 315
 316         // no tags for pools
 317         private List<String> getTags() {
 318                 List<String> tags = new ArrayList<String>();
 319                 if (isSearchOrSet(getSource())) {
 320                         String str = getTagsFromUrl(getSource());
 321                         for (String tag : str.split("\\+")) {
 322                                 try {
 323                                         tags.add(URLDecoder.decode(tag.trim(), "UTF-8").trim());
 324                                 } catch (UnsupportedEncodingException e) {
 325                                 }
 326                         }
 327                 }
 328
 329                 return tags;
 330         }
 331
 332         private Image getCover() throws IOException {
 333                 Image image = null;
 334                 List<Entry<String, URL>> chapters = getChapters(null);
 335                 if (!chapters.isEmpty()) {
 336                         URL chap1Url = chapters.get(0).getValue();
 337                         String imgsChap1 = getChapterContent(chap1Url, 1, null);
 338                         if (!imgsChap1.isEmpty()) {
 339                                 imgsChap1 = imgsChap1.split("]")[0].substring(1).trim();
 340                                 image = bsImages.getImage(this, new URL(imgsChap1));
 341                         }
 342                 }
 343
 344                 return image;
 345         }
 346
 347         // note: will be removed at getCanonicalUrl()
 348         private boolean isSetOriginalUrl(URL originalUrl) {
 349                 return originalUrl.getPath().startsWith("/post_sets/");
 350         }
 351
 352         private boolean isPool(URL url) {
 353                 return url.getPath().startsWith("/pools/") || url.getPath().startsWith("/pool/show/");
 354         }
 355
 356         // set will be renamed into search by canonical url
 357         private boolean isSearchOrSet(URL url) {
 358                 return
 359                 // search:
 360                 (url.getPath().equals("/posts") && url.getQuery().contains("tags="))
 361                                 // or set:
 362                                 || isSetOriginalUrl(url);
 363         }
 364 }