supported/E621.java

   1 package be.nikiroo.fanfix.supported;
   2
   3 import java.io.IOException;
   4 import java.io.UnsupportedEncodingException;
   5 import java.net.MalformedURLException;
   6 import java.net.URL;
   7 import java.net.URLDecoder;
   8 import java.util.AbstractMap;
   9 import java.util.ArrayList;
  10 import java.util.Collections;
  11 import java.util.Date;
  12 import java.util.LinkedList;
  13 import java.util.List;
  14 import java.util.Map.Entry;
  15
  16 import org.json.JSONArray;
  17 import org.json.JSONException;
  18 import org.json.JSONObject;
  19 import org.jsoup.helper.DataUtil;
  20 import org.jsoup.nodes.Document;
  21 import org.jsoup.nodes.Element;
  22
  23 import be.nikiroo.fanfix.Instance;
  24 import be.nikiroo.fanfix.bundles.Config;
  25 import be.nikiroo.fanfix.data.MetaData;
  26 import be.nikiroo.utils.Image;
  27 import be.nikiroo.utils.Progress;
  28 import be.nikiroo.utils.StringUtils;
  29 import be.nikiroo.utils.Version;
  30
  31 /**
  32  * Support class for <a href="http://e621.net/">e621.net</a> and
  33  * <a href="http://e926.net/">e926.net</a>, a Furry website supporting comics,
  34  * including some of MLP.
  35  * <p>
  36  * <a href="http://e926.net/">e926.net</a> only shows the "clean" images and
  37  * comics, but it can be difficult to browse.
  38  *
  39  * @author niki
  40  */
  41 class E621 extends BasicSupport {
  42         @Override
  43         protected boolean supports(URL url) {
  44                 String host = url.getHost();
  45                 if (host.startsWith("www.")) {
  46                         host = host.substring("www.".length());
  47                 }
  48
  49                 return ("e621.net".equals(host) || "e926.net".equals(host))
  50                                 && (isPool(url) || isSearchOrSet(url));
  51         }
  52
  53         @Override
  54         protected boolean isHtml() {
  55                 return true;
  56         }
  57
  58         @Override
  59         protected MetaData getMeta() throws IOException {
  60                 MetaData meta = new MetaData();
  61
  62                 meta.setTitle(getTitle());
  63                 meta.setAuthor(getAuthor());
  64                 meta.setDate(bsHelper.formatDate(getDate()));
  65                 meta.setTags(getTags());
  66                 meta.setSource(getType().getSourceName());
  67                 meta.setUrl(getSource().toString());
  68                 meta.setPublisher(getType().getSourceName());
  69                 meta.setUuid(getSource().toString());
  70                 meta.setLuid("");
  71                 meta.setLang("en");
  72                 meta.setSubject("Furry");
  73                 meta.setType(getType().toString());
  74                 meta.setImageDocument(true);
  75                 meta.setCover(getCover());
  76                 meta.setFakeCover(true);
  77
  78                 return meta;
  79         }
  80
  81         @Override
  82         protected String getDesc() throws IOException {
  83                 if (isSearchOrSet(getSource())) {
  84                         StringBuilder builder = new StringBuilder();
  85                         builder.append("A collection of images from ")
  86                                         .append(getSource().getHost()).append("\n") //
  87                                         .append("\tTime of creation: "
  88                                                         + StringUtils.fromTime(new Date().getTime()))
  89                                         .append("\n") //
  90                                         .append("\tTags: ");//
  91                         for (String tag : getTags()) {
  92                                 builder.append("\t\t").append(tag);
  93                         }
  94
  95                         return builder.toString();
  96                 }
  97
  98                 if (isPool(getSource())) {
  99                         Element el = getSourceNode().getElementById("description");
 100                         if (el != null) {
 101                                 return el.text();
 102                         }
 103                 }
 104
 105                 return null;
 106         }
 107
 108         @Override
 109         protected List<Entry<String, URL>> getChapters(Progress pg)
 110                         throws IOException {
 111                 int i = 1;
 112                 String jsonUrl = getJsonUrl();
 113                 if (jsonUrl != null) {
 114                         for (i = 1; true; i++) {
 115                                 if (i > 1) {
 116                                         try {
 117                                                 // The API does not accept more than 2 request per sec,
 118                                                 // and asks us to limit at one per sec when possible
 119                                                 Thread.sleep(1000);
 120                                         } catch (InterruptedException e) {
 121                                         }
 122                                 }
 123
 124                                 try {
 125                                         JSONObject json = getJson(jsonUrl + "&page=" + i, false);
 126                                         if (!json.has("posts"))
 127                                                 break;
 128                                         JSONArray posts = json.getJSONArray("posts");
 129                                         if (posts.isEmpty())
 130                                                 break;
 131                                 } catch (Exception e) {
 132                                         e.printStackTrace();
 133                                 }
 134                         }
 135
 136                         // The last page was empty:
 137                         i--;
 138                 }
 139
 140                 // The pages and images are in reverse order on /posts/
 141                 List<Entry<String, URL>> chapters = new LinkedList<Entry<String, URL>>();
 142                 for (int page = i; page > 0; page--) {
 143                         chapters.add(new AbstractMap.SimpleEntry<String, URL>(
 144                                         "Page " + Integer.toString(i - page + 1),
 145                                         new URL(jsonUrl + "&page=" + page)));
 146                 }
 147
 148                 return chapters;
 149         }
 150
 151         @Override
 152         protected String getChapterContent(URL chapUrl, int number, Progress pg)
 153                         throws IOException {
 154                 StringBuilder builder = new StringBuilder();
 155
 156                 JSONObject json = getJson(chapUrl, false);
 157                 JSONArray postsArr = json.getJSONArray("posts");
 158
 159                 // The pages and images are in reverse order on /posts/
 160                 List<JSONObject> posts = new ArrayList<JSONObject>(postsArr.length());
 161                 for (int i = postsArr.length() - 1; i >= 0; i--) {
 162                         Object o = postsArr.get(i);
 163                         if (o instanceof JSONObject)
 164                                 posts.add((JSONObject) o);
 165                 }
 166
 167                 for (JSONObject post : posts) {
 168                         if (!post.has("file"))
 169                                 continue;
 170                         JSONObject file = post.getJSONObject("file");
 171                         if (!file.has("url"))
 172                                 continue;
 173
 174                         try {
 175                                 String url = file.getString("url");
 176                                 builder.append("[");
 177                                 builder.append(url);
 178                                 builder.append("]<br/>");
 179                         } catch (JSONException e) {
 180                                 // Can be NULL if filtered
 181                                 // When the value is NULL, we get an exception
 182                                 // but the "has" method still returns true
 183                                 Instance.getInstance().getTraceHandler()
 184                                                 .error("Cannot get image for chapter " + number + " of "
 185                                                                 + getSource());
 186                         }
 187                 }
 188
 189                 return builder.toString();
 190         }
 191
 192         @Override
 193         protected URL getCanonicalUrl(URL source) {
 194                 // Convert search-pools into proper pools
 195                 if (source.getPath().equals("/posts") && source.getQuery() != null
 196                                 && source.getQuery().startsWith("tags=pool%3A")) {
 197                         String poolNumber = source.getQuery()
 198                                         .substring("tags=pool%3A".length());
 199                         try {
 200                                 Integer.parseInt(poolNumber);
 201                                 String base = source.getProtocol() + "://" + source.getHost();
 202                                 if (source.getPort() != -1) {
 203                                         base = base + ":" + source.getPort();
 204                                 }
 205                                 source = new URL(base + "/pools/" + poolNumber);
 206                         } catch (NumberFormatException e) {
 207                                 // Not a simple pool, skip
 208                         } catch (MalformedURLException e) {
 209                                 // Cannot happen
 210                         }
 211                 }
 212
 213                 if (isSetOriginalUrl(source)) {
 214                         try {
 215                                 Document doc = DataUtil.load(Instance.getInstance().getCache()
 216                                                 .open(source, this, false), "UTF-8", source.toString());
 217                                 for (Element shortname : doc
 218                                                 .getElementsByClass("set-shortname")) {
 219                                         for (Element el : shortname.getElementsByTag("a")) {
 220                                                 if (!el.attr("href").isEmpty())
 221                                                         return new URL(el.absUrl("href"));
 222                                         }
 223                                 }
 224                         } catch (IOException e) {
 225                                 Instance.getInstance().getTraceHandler().error(e);
 226                         }
 227                 }
 228
 229                 if (isPool(source)) {
 230                         try {
 231                                 return new URL(
 232                                                 source.toString().replace("/pool/show/", "/pools/"));
 233                         } catch (MalformedURLException e) {
 234                         }
 235                 }
 236
 237                 return super.getCanonicalUrl(source);
 238         }
 239
 240         private String getTitle() {
 241                 String title = "";
 242
 243                 Element el = getSourceNode().getElementsByTag("title").first();
 244                 if (el != null) {
 245                         title = el.text().trim();
 246                 }
 247
 248                 for (String s : new String[] { "e621", "-", "e621", "Pool", "-" }) {
 249                         if (title.startsWith(s)) {
 250                                 title = title.substring(s.length()).trim();
 251                         }
 252                         if (title.endsWith(s)) {
 253                                 title = title.substring(0, title.length() - s.length()).trim();
 254                         }
 255                 }
 256
 257                 if (isSearchOrSet(getSource())) {
 258                         title = title.isEmpty() ? "e621" : "[e621] " + title;
 259                 }
 260
 261                 return title;
 262         }
 263
 264         private String getAuthor() {
 265                 List<String> list = new ArrayList<String>();
 266                 String jsonUrl = getJsonUrl();
 267                 if (jsonUrl != null) {
 268                         try {
 269                                 JSONObject json = getJson(jsonUrl, false);
 270                                 JSONArray posts = json.getJSONArray("posts");
 271                                 for (Object obj : posts) {
 272                                         if (!(obj instanceof JSONObject))
 273                                                 continue;
 274
 275                                         JSONObject post = (JSONObject) obj;
 276                                         if (!post.has("tags"))
 277                                                 continue;
 278
 279                                         JSONObject tags = post.getJSONObject("tags");
 280                                         if (!tags.has("artist"))
 281                                                 continue;
 282
 283                                         JSONArray artists = tags.getJSONArray("artist");
 284                                         for (Object artist : artists) {
 285                                                 if (list.contains(artist.toString()))
 286                                                         continue;
 287
 288                                                 list.add(artist.toString());
 289                                         }
 290                                 }
 291                         } catch (Exception e) {
 292                                 e.printStackTrace();
 293                         }
 294                 }
 295
 296                 StringBuilder builder = new StringBuilder();
 297                 for (String artist : list) {
 298                         if (builder.length() > 0) {
 299                                 builder.append(", ");
 300                         }
 301                         builder.append(artist);
 302                 }
 303
 304                 return builder.toString();
 305         }
 306
 307         private String getDate() {
 308                 String jsonUrl = getJsonUrl();
 309                 if (jsonUrl != null) {
 310                         try {
 311                                 JSONObject json = getJson(jsonUrl, false);
 312                                 JSONArray posts = json.getJSONArray("posts");
 313                                 for (Object obj : posts) {
 314                                         if (!(obj instanceof JSONObject))
 315                                                 continue;
 316
 317                                         JSONObject post = (JSONObject) obj;
 318                                         if (!post.has("created_at"))
 319                                                 continue;
 320
 321                                         return post.getString("created_at");
 322                                 }
 323                         } catch (Exception e) {
 324                                 e.printStackTrace();
 325                         }
 326                 }
 327
 328                 return "";
 329         }
 330
 331         // no tags for pools
 332         private List<String> getTags() {
 333                 List<String> tags = new ArrayList<String>();
 334                 if (isSearchOrSet(getSource())) {
 335                         String str = getTagsFromUrl(getSource());
 336                         for (String tag : str.split("\\+")) {
 337                                 try {
 338                                         tags.add(URLDecoder.decode(tag.trim(), "UTF-8").trim());
 339                                 } catch (UnsupportedEncodingException e) {
 340                                 }
 341                         }
 342                 }
 343
 344                 return tags;
 345         }
 346
 347         // returns "xxx+ddd+ggg" if "tags=xxx+ddd+ggg" was present in the query
 348         private String getTagsFromUrl(URL url) {
 349                 String tags = url == null ? "" : url.getQuery();
 350                 int pos = tags.indexOf("tags=");
 351
 352                 if (pos >= 0) {
 353                         tags = tags.substring(pos).substring("tags=".length());
 354                 } else {
 355                         return "";
 356                 }
 357
 358                 pos = tags.indexOf('&');
 359                 if (pos > 0) {
 360                         tags = tags.substring(0, pos);
 361                 }
 362                 pos = tags.indexOf('/');
 363                 if (pos > 0) {
 364                         tags = tags.substring(0, pos);
 365                 }
 366
 367                 return tags;
 368         }
 369
 370         private Image getCover() throws IOException {
 371                 Image image = null;
 372                 List<Entry<String, URL>> chapters = getChapters(null);
 373                 if (!chapters.isEmpty()) {
 374                         URL chap1Url = chapters.get(0).getValue();
 375                         String imgsChap1 = getChapterContent(chap1Url, 1, null);
 376                         if (!imgsChap1.isEmpty()) {
 377                                 imgsChap1 = imgsChap1.split("]")[0].substring(1).trim();
 378                                 image = bsImages.getImage(this, new URL(imgsChap1));
 379                         }
 380                 }
 381
 382                 return image;
 383         }
 384
 385         // always /posts.json/ url
 386         private String getJsonUrl() {
 387                 String url = null;
 388                 if (isSearchOrSet(getSource())) {
 389                         url = getSource().toString().replace("/posts", "/posts.json");
 390                 }
 391
 392                 if (isPool(getSource())) {
 393                         String poolNumber = getSource().getPath()
 394                                         .substring("/pools/".length());
 395                         url = "https://e621.net/posts.json" + "?tags=pool%3A" + poolNumber;
 396                 }
 397
 398                 if (url != null) {
 399                         // Note: one way to override the blacklist
 400                         String login = Instance.getInstance().getConfig()
 401                                         .getString(Config.LOGIN_E621_LOGIN);
 402                         String apk = Instance.getInstance().getConfig()
 403                                         .getString(Config.LOGIN_E621_APIKEY);
 404
 405                         if (login != null && !login.isEmpty() && apk != null
 406                                         && !apk.isEmpty()) {
 407                                 url = String.format("%s&login=%s&api_key=%s&_client=%s", url,
 408                                                 login, apk, "fanfix-" + Version.getCurrentVersion());
 409                         }
 410                 }
 411
 412                 return url;
 413         }
 414
 415         // note: will be removed at getCanonicalUrl()
 416         private boolean isSetOriginalUrl(URL originalUrl) {
 417                 return originalUrl.getPath().startsWith("/post_sets/");
 418         }
 419
 420         private boolean isPool(URL url) {
 421                 return url.getPath().startsWith("/pools/")
 422                                 || url.getPath().startsWith("/pool/show/");
 423         }
 424
 425         // set will be renamed into search by canonical url
 426         private boolean isSearchOrSet(URL url) {
 427                 return
 428                 // search:
 429                 (url.getPath().equals("/posts") && url.getQuery().contains("tags="))
 430                                 // or set:
 431                                 || isSetOriginalUrl(url);
 432         }
 433 }