src/be/nikiroo/fanfix/supported/E621.java

   1 package be.nikiroo.fanfix.supported;
   2
   3 import java.io.IOException;
   4 import java.io.InputStream;
   5 import java.io.UnsupportedEncodingException;
   6 import java.net.URL;
   7 import java.net.URLDecoder;
   8 import java.net.URLEncoder;
   9 import java.util.AbstractMap;
  10 import java.util.ArrayList;
  11 import java.util.LinkedList;
  12 import java.util.List;
  13 import java.util.AbstractMap.SimpleEntry;
  14 import java.util.Map.Entry;
  15 import java.util.Scanner;
  16
  17 import be.nikiroo.fanfix.Instance;
  18 import be.nikiroo.fanfix.data.Chapter;
  19 import be.nikiroo.fanfix.data.MetaData;
  20 import be.nikiroo.fanfix.data.Story;
  21 import be.nikiroo.utils.Image;
  22 import be.nikiroo.utils.Progress;
  23 import be.nikiroo.utils.StringUtils;
  24
  25 /**
  26  * Support class for <a href="http://e621.net/">e621.net</a> and <a
  27  * href="http://e926.net/">e926.net</a>, a Furry website supporting comics,
  28  * including some of MLP.
  29  * <p>
  30  * <a href="http://e926.net/">e926.net</a> only shows the "clean" images and
  31  * comics, but it can be difficult to browse.
  32  *
  33  * @author niki
  34  */
  35 class E621 extends BasicSupport_Deprecated {
  36         @Override
  37         public String getSourceName() {
  38                 return "e621.net";
  39         }
  40
  41         @Override
  42         protected MetaData getMeta(URL source, InputStream in) throws IOException {
  43                 MetaData meta = new MetaData();
  44
  45                 meta.setTitle(getTitle(reset(in)));
  46                 meta.setAuthor(getAuthor(source, reset(in)));
  47                 meta.setDate("");
  48                 meta.setTags(getTags(source, reset(in), false));
  49                 meta.setSource(getSourceName());
  50                 meta.setUrl(source.toString());
  51                 meta.setPublisher(getSourceName());
  52                 meta.setUuid(source.toString());
  53                 meta.setLuid("");
  54                 meta.setLang("en");
  55                 meta.setSubject("Furry");
  56                 meta.setType(getType().toString());
  57                 meta.setImageDocument(true);
  58                 meta.setCover(getCover(source, reset(in)));
  59                 meta.setFakeCover(true);
  60
  61                 return meta;
  62         }
  63
  64         private List<String> getTags(URL source, InputStream in, boolean authors) {
  65                 List<String> tags = new ArrayList<String>();
  66
  67                 if (isSearch(source)) {
  68                         String tagLine = getLine(in, "id=\"tag-sidebar\"", 1);
  69                         if (tagLine != null) {
  70                                 String key = "href=\"";
  71                                 for (int pos = tagLine.indexOf(key); pos >= 0; pos = tagLine
  72                                                 .indexOf(key, pos + 1)) {
  73                                         int end = tagLine.indexOf("\"", pos + key.length());
  74                                         if (end >= 0) {
  75                                                 String href = tagLine.substring(pos, end);
  76                                                 String subkey;
  77                                                 if (authors)
  78                                                         subkey = "?name=";
  79                                                 else
  80                                                         subkey = "?title=";
  81                                                 if (href.contains(subkey)) {
  82                                                         String tag = href.substring(href.indexOf(subkey)
  83                                                                         + subkey.length());
  84                                                         try {
  85                                                                 tags.add(URLDecoder.decode(tag, "UTF-8"));
  86                                                         } catch (UnsupportedEncodingException e) {
  87                                                                 // supported JVMs must have UTF-8 support
  88                                                                 e.printStackTrace();
  89                                                         }
  90                                                 }
  91                                         }
  92                                 }
  93
  94                         }
  95                 }
  96
  97                 return tags;
  98         }
  99
 100         @Override
 101         public Story process(URL url, Progress pg) throws IOException {
 102                 // There is no chapters on e621, just pagination...
 103                 Story story = super.process(url, pg);
 104
 105                 Chapter only = new Chapter(1, null);
 106                 for (Chapter chap : story) {
 107                         only.getParagraphs().addAll(chap.getParagraphs());
 108                 }
 109
 110                 story.getChapters().clear();
 111                 story.getChapters().add(only);
 112
 113                 return story;
 114         }
 115
 116         @Override
 117         protected boolean supports(URL url) {
 118                 String host = url.getHost();
 119                 if (host.startsWith("www.")) {
 120                         host = host.substring("www.".length());
 121                 }
 122
 123                 return ("e621.net".equals(host) || "e926.net".equals(host))
 124                                 && (isPool(url) || isSearch(url));
 125         }
 126
 127         @Override
 128         protected boolean isHtml() {
 129                 return true;
 130         }
 131
 132         private Image getCover(URL source, InputStream in) throws IOException {
 133                 URL urlForCover = source;
 134                 if (isPool(source)) {
 135                         urlForCover = new URL(source.toString() + "?page=1");
 136                 }
 137
 138                 String images = getChapterContent(urlForCover, in, 1, null);
 139                 if (!images.isEmpty()) {
 140                         int pos = images.indexOf("<br/>");
 141                         if (pos >= 0) {
 142                                 images = images.substring(1, pos - 1);
 143                                 return getImage(this, null, images);
 144                         }
 145                 }
 146
 147                 return null;
 148         }
 149
 150         private String getAuthor(URL source, InputStream in) {
 151                 if (isSearch(source)) {
 152                         StringBuilder builder = new StringBuilder();
 153                         for (String author : getTags(source, in, true)) {
 154                                 if (builder.length() > 0)
 155                                         builder.append(", ");
 156                                 builder.append(author);
 157                         }
 158
 159                         return builder.toString();
 160                 }
 161
 162                 String author = getLine(in, "href=\"/post/show/", 0);
 163                 if (author != null) {
 164                         String key = "href=\"";
 165                         int pos = author.indexOf(key);
 166                         if (pos >= 0) {
 167                                 author = author.substring(pos + key.length());
 168                                 pos = author.indexOf("\"");
 169                                 if (pos >= 0) {
 170                                         author = author.substring(0, pos - 1);
 171                                         String page = source.getProtocol() + "://"
 172                                                         + source.getHost() + author;
 173                                         try {
 174                                                 InputStream pageIn = Instance.getCache().open(
 175                                                                 new URL(page), this, false);
 176                                                 try {
 177                                                         key = "class=\"tag-type-artist\"";
 178                                                         author = getLine(pageIn, key, 0);
 179                                                         if (author != null) {
 180                                                                 pos = author.indexOf("<a href=\"");
 181                                                                 if (pos >= 0) {
 182                                                                         author = author.substring(pos);
 183                                                                         pos = author.indexOf("</a>");
 184                                                                         if (pos >= 0) {
 185                                                                                 author = author.substring(0, pos);
 186                                                                                 return StringUtils.unhtml(author);
 187                                                                         }
 188                                                                 }
 189                                                         }
 190                                                 } finally {
 191                                                         pageIn.close();
 192                                                 }
 193                                         } catch (Exception e) {
 194                                                 // No author found
 195                                         }
 196                                 }
 197                         }
 198                 }
 199
 200                 return null;
 201         }
 202
 203         private String getTitle(InputStream in) {
 204                 String title = getLine(in, "<title>", 0);
 205                 if (title != null) {
 206                         int pos = title.indexOf('>');
 207                         if (pos >= 0) {
 208                                 title = title.substring(pos + 1);
 209                                 pos = title.indexOf('<');
 210                                 if (pos >= 0) {
 211                                         title = title.substring(0, pos);
 212                                 }
 213                         }
 214
 215                         if (title.startsWith("Pool:")) {
 216                                 title = title.substring("Pool:".length());
 217                         }
 218
 219                         title = StringUtils.unhtml(title).trim();
 220                 }
 221
 222                 return title;
 223         }
 224
 225         @Override
 226         protected String getDesc(URL source, InputStream in) throws IOException {
 227                 String desc = getLine(in, "margin-bottom: 2em;", 0);
 228
 229                 if (desc != null) {
 230                         StringBuilder builder = new StringBuilder();
 231
 232                         boolean inTags = false;
 233                         for (char car : desc.toCharArray()) {
 234                                 if ((inTags && car == '>') || (!inTags && car == '<')) {
 235                                         inTags = !inTags;
 236                                 }
 237
 238                                 if (inTags) {
 239                                         builder.append(car);
 240                                 }
 241                         }
 242
 243                         return builder.toString().trim();
 244                 }
 245
 246                 return null;
 247         }
 248
 249         @Override
 250         protected List<Entry<String, URL>> getChapters(URL source, InputStream in,
 251                         Progress pg) throws IOException {
 252                 if (isPool(source)) {
 253                         return getChaptersPool(source, in, pg);
 254                 } else if (isSearch(source)) {
 255                         return getChaptersSearch(source, in, pg);
 256                 }
 257
 258                 return new LinkedList<Entry<String, URL>>();
 259         }
 260
 261         private List<Entry<String, URL>> getChaptersSearch(URL source,
 262                         InputStream in, Progress pg) throws IOException {
 263                 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
 264
 265                 String search = source.getPath();
 266                 if (search.endsWith("/")) {
 267                         search = search.substring(0, search.length() - 1);
 268                 }
 269
 270                 int pos = search.lastIndexOf('/');
 271                 if (pos >= 0) {
 272                         search = search.substring(pos + 1);
 273                 }
 274
 275                 String baseUrl = "https://e621.net/post/index/";
 276                 if (source.getHost().contains("e926")) {
 277                         baseUrl = baseUrl.replace("e621", "e926");
 278                 }
 279
 280                 for (int i = 1; true; i++) {
 281                         URL url = new URL(baseUrl + i + "/" + search + "/");
 282                         try {
 283                                 InputStream pageI = Instance.getCache().open(url, this, false);
 284                                 try {
 285                                         if (getLine(pageI, "No posts matched your search.", 0) != null)
 286                                                 break;
 287                                         urls.add(new AbstractMap.SimpleEntry<String, URL>(Integer
 288                                                         .toString(i), url));
 289                                 } finally {
 290                                         pageI.close();
 291                                 }
 292                         } catch (Exception e) {
 293                                 break;
 294                         }
 295                 }
 296
 297                 return urls;
 298         }
 299
 300         private List<Entry<String, URL>> getChaptersPool(URL source,
 301                         InputStream in, Progress pg) throws IOException {
 302                 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
 303                 int last = 1; // no pool/show when only one page
 304
 305                 @SuppressWarnings("resource")
 306                 Scanner scan = new Scanner(in, "UTF-8");
 307                 scan.useDelimiter("\\n");
 308                 while (scan.hasNext()) {
 309                         String line = scan.next();
 310                         for (int pos = line.indexOf(source.getPath()); pos >= 0; pos = line
 311                                         .indexOf(source.getPath(), pos + source.getPath().length())) {
 312                                 int equalPos = line.indexOf("=", pos);
 313                                 int quotePos = line.indexOf("\"", pos);
 314                                 if (equalPos >= 0 && quotePos > equalPos) {
 315                                         String snum = line.substring(equalPos + 1, quotePos);
 316                                         try {
 317                                                 int num = Integer.parseInt(snum);
 318                                                 if (num > last) {
 319                                                         last = num;
 320                                                 }
 321                                         } catch (NumberFormatException e) {
 322                                         }
 323                                 }
 324                         }
 325                 }
 326
 327                 for (int i = 1; i <= last; i++) {
 328                         urls.add(new AbstractMap.SimpleEntry<String, URL>(Integer
 329                                         .toString(i), new URL(source.toString() + "?page=" + i)));
 330                 }
 331
 332                 return urls;
 333         }
 334
 335         @Override
 336         protected String getChapterContent(URL source, InputStream in, int number,
 337                         Progress pg) throws IOException {
 338                 StringBuilder builder = new StringBuilder();
 339                 String staticSite = "https://static1.e621.net";
 340                 if (source.getHost().contains("e926")) {
 341                         staticSite = staticSite.replace("e621", "e926");
 342                 }
 343
 344                 String key = staticSite + "/data/preview/";
 345
 346                 @SuppressWarnings("resource")
 347                 Scanner scan = new Scanner(in, "UTF-8");
 348                 scan.useDelimiter("\\n");
 349                 while (scan.hasNext()) {
 350                         String line = scan.next();
 351                         if (line.contains("class=\"preview")) {
 352                                 for (int pos = line.indexOf(key); pos >= 0; pos = line.indexOf(
 353                                                 key, pos + key.length())) {
 354                                         int endPos = line.indexOf("\"", pos);
 355                                         if (endPos >= 0) {
 356                                                 String id = line.substring(pos + key.length(), endPos);
 357                                                 id = staticSite + "/data/" + id;
 358
 359                                                 int dotPos = id.lastIndexOf(".");
 360                                                 if (dotPos >= 0) {
 361                                                         id = id.substring(0, dotPos);
 362                                                         builder.append("[");
 363                                                         builder.append(id);
 364                                                         builder.append("]<br/>");
 365                                                 }
 366                                         }
 367                                 }
 368                         }
 369                 }
 370
 371                 return builder.toString();
 372         }
 373
 374         private boolean isPool(URL url) {
 375                 return url.getPath().startsWith("/pool/");
 376         }
 377
 378         private boolean isSearch(URL url) {
 379                 return url.getPath().startsWith("/post/index/");
 380         }
 381 }