src/be/nikiroo/fanfix/supported/E621.java

   1 package be.nikiroo.fanfix.supported;
   2
   3 import java.io.IOException;
   4 import java.io.InputStream;
   5 import java.io.UnsupportedEncodingException;
   6 import java.net.URL;
   7 import java.net.URLDecoder;
   8 import java.util.AbstractMap;
   9 import java.util.ArrayList;
  10 import java.util.Collections;
  11 import java.util.Date;
  12 import java.util.LinkedList;
  13 import java.util.List;
  14 import java.util.Map.Entry;
  15
  16 import org.jsoup.helper.DataUtil;
  17 import org.jsoup.nodes.Document;
  18 import org.jsoup.nodes.Element;
  19
  20 import be.nikiroo.fanfix.Instance;
  21 import be.nikiroo.fanfix.data.MetaData;
  22 import be.nikiroo.utils.IOUtils;
  23 import be.nikiroo.utils.Image;
  24 import be.nikiroo.utils.Progress;
  25 import be.nikiroo.utils.StringUtils;
  26
  27 /**
  28  * Support class for <a href="http://e621.net/">e621.net</a> and
  29  * <a href="http://e926.net/">e926.net</a>, a Furry website supporting comics,
  30  * including some of MLP.
  31  * <p>
  32  * <a href="http://e926.net/">e926.net</a> only shows the "clean" images and
  33  * comics, but it can be difficult to browse.
  34  *
  35  * @author niki
  36  */
  37 class E621 extends BasicSupport {
  38         @Override
  39         protected boolean supports(URL url) {
  40                 String host = url.getHost();
  41                 if (host.startsWith("www.")) {
  42                         host = host.substring("www.".length());
  43                 }
  44
  45                 return ("e621.net".equals(host) || "e926.net".equals(host)) && (isPool(url) || isSearchOrSet(url));
  46         }
  47
  48         @Override
  49         protected boolean isHtml() {
  50                 return true;
  51         }
  52
  53         @Override
  54         protected MetaData getMeta() throws IOException {
  55                 MetaData meta = new MetaData();
  56
  57                 meta.setTitle(getTitle());
  58                 meta.setAuthor(getAuthor());
  59                 meta.setDate("");
  60                 meta.setTags(getTags());
  61                 meta.setSource(getType().getSourceName());
  62                 meta.setUrl(getSource().toString());
  63                 meta.setPublisher(getType().getSourceName());
  64                 meta.setUuid(getSource().toString());
  65                 meta.setLuid("");
  66                 meta.setLang("en");
  67                 meta.setSubject("Furry");
  68                 meta.setType(getType().toString());
  69                 meta.setImageDocument(true);
  70                 meta.setCover(getCover());
  71                 meta.setFakeCover(true);
  72
  73                 return meta;
  74         }
  75
  76         @Override
  77         protected String getDesc() throws IOException {
  78                 if (isSearchOrSet(getSource())) {
  79                         StringBuilder builder = new StringBuilder();
  80                         builder.append("A collection of images from ").append(getSource().getHost()).append("\n") //
  81                                         .append("\tTime of creation: " + StringUtils.fromTime(new Date().getTime())).append("\n") //
  82                                         .append("\tTags: ");//
  83                         for (String tag : getTags()) {
  84                                 builder.append("\t\t").append(tag);
  85                         }
  86
  87                         return builder.toString();
  88                 }
  89
  90                 if (isPool(getSource())) {
  91                         Element el = getSourceNode().getElementById("description");
  92                         if (el != null) {
  93                                 return el.text();
  94                         }
  95                 }
  96
  97                 return null;
  98         }
  99
 100         @Override
 101         protected List<Entry<String, URL>> getChapters(Progress pg) throws IOException {
 102                 if (isPool(getSource())) {
 103                         String baseUrl = "https://e621.net/" + getSource().getPath() + "?page=";
 104                         return getChapters(getSource(), pg, baseUrl, "");
 105                 } else if (isSearchOrSet(getSource())) {
 106                         String baseUrl = "https://e621.net/posts/?page=";
 107                         String search = "&tags=" + getTagsFromUrl(getSource());
 108                         return getChapters(getSource(), pg, baseUrl, search);
 109                 }
 110
 111                 return new LinkedList<Entry<String, URL>>();
 112         }
 113
 114         private List<Entry<String, URL>> getChapters(URL source, Progress pg, String baseUrl, String parameters)
 115                         throws IOException {
 116                 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
 117
 118                 if (source.getHost().contains("e926")) {
 119                         baseUrl = baseUrl.replace("e621", "e926");
 120                 }
 121
 122                 for (int i = 1; true; i++) {
 123                         URL url = new URL(baseUrl + i + parameters);
 124                         try {
 125                                 InputStream pageI = Instance.getCache().open(url, this, false);
 126                                 try {
 127                                         if (IOUtils.readSmallStream(pageI).contains("Nobody here but us chickens!")) {
 128                                                 break;
 129                                         }
 130                                         urls.add(new AbstractMap.SimpleEntry<String, URL>("Page " + Integer.toString(i), url));
 131                                 } finally {
 132                                         pageI.close();
 133                                 }
 134                         } catch (Exception e) {
 135                                 break;
 136                         }
 137                 }
 138
 139                 // They are sorted in reverse order on the website
 140                 Collections.reverse(urls);
 141                 return urls;
 142         }
 143
 144         @Override
 145         protected String getChapterContent(URL chapUrl, int number, Progress pg) throws IOException {
 146                 StringBuilder builder = new StringBuilder();
 147                 Document chapterNode = loadDocument(chapUrl);
 148                 for (Element el : chapterNode.getElementsByTag("article")) {
 149                         builder.append("[");
 150                         builder.append(el.attr("data-file-url"));
 151                         builder.append("]<br/>");
 152                 }
 153
 154                 return builder.toString();
 155         }
 156
 157         @Override
 158         protected URL getCanonicalUrl(URL source) {
 159                 if (isSetOriginalUrl(source)) {
 160                         try {
 161                                 Document doc = DataUtil.load(Instance.getCache().open(source, this, false), "UTF-8", source.toString());
 162                                 for (Element shortname : doc.getElementsByClass("set-shortname")) {
 163                                         for (Element el : shortname.getElementsByTag("a")) {
 164                                                 if (!el.attr("href").isEmpty())
 165                                                         return new URL(el.absUrl("href"));
 166                                         }
 167                                 }
 168                         } catch (IOException e) {
 169                                 Instance.getTraceHandler().error(e);
 170                         }
 171                 }
 172
 173                 return super.getCanonicalUrl(source);
 174         }
 175
 176         // returns "xxx+ddd+ggg" if "tags=xxx+ddd+ggg" was present in the query
 177         private String getTagsFromUrl(URL url) {
 178                 String tags = url == null ? "" : url.getQuery();
 179                 int pos = tags.indexOf("tags=");
 180
 181                 if (pos >= 0) {
 182                         tags = tags.substring(pos).substring("tags=".length());
 183                 } else {
 184                         return "";
 185                 }
 186
 187                 pos = tags.indexOf('&');
 188                 if (pos > 0) {
 189                         tags = tags.substring(0, pos);
 190                 }
 191                 pos = tags.indexOf('/');
 192                 if (pos > 0) {
 193                         tags = tags.substring(0, pos);
 194                 }
 195
 196                 return tags;
 197         }
 198
 199         private String getTitle() {
 200                 String title = "";
 201
 202                 Element el = getSourceNode().getElementsByTag("title").first();
 203                 if (el != null) {
 204                         title = el.text().trim();
 205                 }
 206
 207                 for (String s : new String[] { "e621", "-", "e621" }) {
 208                         if (title.startsWith(s)) {
 209                                 title = title.substring(s.length()).trim();
 210                         }
 211                         if (title.endsWith(s)) {
 212                                 title = title.substring(0, title.length() - s.length()).trim();
 213                         }
 214
 215                 }
 216
 217                 if (isSearchOrSet(getSource())) {
 218                         title = title.isEmpty() ? "e621" : "[e621] " + title;
 219                 }
 220                 return title;
 221         }
 222
 223         private String getAuthor() throws IOException {
 224                 StringBuilder builder = new StringBuilder();
 225
 226                 if (isSearchOrSet(getSource())) {
 227                         for (Element el : getSourceNode().getElementsByClass("search-tag")) {
 228                                 if (el.attr("itemprop").equals("author")) {
 229                                         if (builder.length() > 0) {
 230                                                 builder.append(", ");
 231                                         }
 232                                         builder.append(el.text().trim());
 233                                 }
 234                         }
 235                 }
 236
 237                 if (isPool(getSource())) {
 238                         String desc = getDesc();
 239                         String descL = desc.toLowerCase();
 240
 241                         if (descL.startsWith("by:") || descL.startsWith("by ")) {
 242                                 desc = desc.substring(3).trim();
 243                                 desc = desc.split("\n")[0];
 244
 245                                 String tab[] = desc.split(" ");
 246                                 for (int i = 0; i < Math.min(tab.length, 5); i++) {
 247                                         if (tab[i].startsWith("http"))
 248                                                 break;
 249                                         builder.append(" ").append(tab[i]);
 250                                 }
 251                         }
 252                 }
 253
 254                 return builder.toString();
 255         }
 256
 257         // no tags for pools
 258         private List<String> getTags() {
 259                 List<String> tags = new ArrayList<String>();
 260                 if (isSearchOrSet(getSource())) {
 261                         String str = getTagsFromUrl(getSource());
 262                         for (String tag : str.split("\\+")) {
 263                                 try {
 264                                         tags.add(URLDecoder.decode(tag.trim(), "UTF-8").trim());
 265                                 } catch (UnsupportedEncodingException e) {
 266                                 }
 267                         }
 268                 }
 269
 270                 return tags;
 271         }
 272
 273         private Image getCover() throws IOException {
 274                 Image image = null;
 275                 List<Entry<String, URL>> chapters = getChapters(null);
 276                 if (!chapters.isEmpty()) {
 277                         URL url = chapters.get(0).getValue();
 278                         image = bsImages.getImage(this, url);
 279                 }
 280
 281                 return image;
 282         }
 283
 284         // note: will be removed at getCanonicalUrl()
 285         private boolean isSetOriginalUrl(URL originalUrl) {
 286                 return originalUrl.getPath().startsWith("/post_sets/");
 287         }
 288
 289         private boolean isPool(URL url) {
 290                 return url.getPath().startsWith("/pools/");
 291         }
 292
 293         // set will be renamed into search by canonical url
 294         private boolean isSearchOrSet(URL url) {
 295                 return
 296                 // search:
 297                 (url.getPath().equals("/posts") && url.getQuery().contains("tags="))
 298                                 // or set:
 299                                 || isSetOriginalUrl(url);
 300         }
 301 }