supported/EHentai.java

   1 package be.nikiroo.fanfix.supported;
   2
   3 import java.awt.image.BufferedImage;
   4 import java.io.IOException;
   5 import java.io.InputStream;
   6 import java.net.MalformedURLException;
   7 import java.net.URL;
   8 import java.util.ArrayList;
   9 import java.util.List;
  10 import java.util.Map.Entry;
  11 import java.util.Scanner;
  12
  13 import be.nikiroo.fanfix.Instance;
  14 import be.nikiroo.fanfix.data.Chapter;
  15 import be.nikiroo.fanfix.data.MetaData;
  16 import be.nikiroo.fanfix.data.Story;
  17 import be.nikiroo.utils.Progress;
  18 import be.nikiroo.utils.StringUtils;
  19
  20 /**
  21  * Support class for <a href="https://e-hentai.org/">e-hentai.org</a>, a website
  22  * supporting mostly but not always NSFW comics, including some of MLP.
  23  *
  24  * @author niki
  25  */
  26 class EHentai extends BasicSupport {
  27         @Override
  28         public String getSourceName() {
  29                 return "e-hentai.org";
  30         }
  31
  32         @Override
  33         protected MetaData getMeta(URL source, InputStream in) throws IOException {
  34                 MetaData meta = new MetaData();
  35
  36                 meta.setTitle(getTitle(reset(in)));
  37                 meta.setAuthor(getAuthor(reset(in)));
  38                 meta.setDate(getDate(reset(in)));
  39                 meta.setTags(getTags(reset(in)));
  40                 meta.setSource(getSourceName());
  41                 meta.setUrl(source.toString());
  42                 meta.setPublisher(getSourceName());
  43                 meta.setUuid(source.toString());
  44                 meta.setLuid("");
  45                 meta.setLang(getLang(reset(in)));
  46                 meta.setSubject("Hentai");
  47                 meta.setType(getType().toString());
  48                 meta.setImageDocument(true);
  49                 meta.setCover(getCover(source, reset(in)));
  50                 meta.setFakeCover(true);
  51
  52                 return meta;
  53         }
  54
  55         @Override
  56         public Story process(URL url, Progress pg) throws IOException {
  57                 // There is no chapters on e621, just pagination...
  58                 Story story = super.process(url, pg);
  59
  60                 Chapter only = new Chapter(1, null);
  61                 for (Chapter chap : story) {
  62                         only.getParagraphs().addAll(chap.getParagraphs());
  63                 }
  64
  65                 story.getChapters().clear();
  66                 story.getChapters().add(only);
  67
  68                 return story;
  69         }
  70
  71         @Override
  72         protected boolean supports(URL url) {
  73                 return "e-hentai.org".equals(url.getHost());
  74         }
  75
  76         @Override
  77         protected boolean isHtml() {
  78                 return true;
  79         }
  80
  81         private BufferedImage getCover(URL source, InputStream in) {
  82                 BufferedImage author = null;
  83                 String coverLine = getKeyLine(in, "<div id=\"gd1\"", " url(", ")");
  84                 if (coverLine != null) {
  85                         coverLine = StringUtils.unhtml(coverLine).trim();
  86                         author = getImage(this, source, coverLine);
  87                 }
  88
  89                 return author;
  90         }
  91
  92         private String getAuthor(InputStream in) {
  93                 String author = null;
  94
  95                 List<String> tagsAuthor = getTagsAuthor(in);
  96                 if (!tagsAuthor.isEmpty()) {
  97                         author = tagsAuthor.get(0);
  98                 }
  99
 100                 return author;
 101         }
 102
 103         private String getLang(InputStream in) {
 104                 String lang = null;
 105
 106                 String langLine = getKeyLine(in, "class=\"gdt1\">Language",
 107                                 "class=\"gdt2\"", "</td>");
 108                 if (langLine != null) {
 109                         langLine = StringUtils.unhtml(langLine).trim();
 110                         if (langLine.equalsIgnoreCase("English")) {
 111                                 lang = "EN";
 112                         } else if (langLine.equalsIgnoreCase("Japanese")) {
 113                                 lang = "JP";
 114                         } else if (langLine.equalsIgnoreCase("French")) {
 115                                 lang = "FR";
 116                         } else {
 117                                 // TODO find the code?
 118                                 lang = langLine;
 119                         }
 120                 }
 121
 122                 return lang;
 123         }
 124
 125         private String getDate(InputStream in) {
 126                 String date = null;
 127
 128                 String dateLine = getKeyLine(in, "class=\"gdt1\">Posted",
 129                                 "class=\"gdt2\"", "</td>");
 130                 if (dateLine != null) {
 131                         dateLine = StringUtils.unhtml(dateLine).trim();
 132                         if (dateLine.length() > 10) {
 133                                 dateLine = dateLine.substring(0, 10).trim();
 134                         }
 135
 136                         date = dateLine;
 137                 }
 138
 139                 return date;
 140         }
 141
 142         private List<String> getTags(InputStream in) {
 143                 List<String> tags = new ArrayList<String>();
 144                 List<String> tagsAuthor = getTagsAuthor(in);
 145
 146                 for (int i = 1; i < tagsAuthor.size(); i++) {
 147                         tags.add(tagsAuthor.get(i));
 148                 }
 149
 150                 return tags;
 151         }
 152
 153         private List<String> getTagsAuthor(InputStream in) {
 154                 List<String> tags = new ArrayList<String>();
 155                 String tagLine = getKeyLine(in, "<meta name=\"description\"", "Tags: ",
 156                                 null);
 157                 if (tagLine != null) {
 158                         for (String tag : tagLine.split(",")) {
 159                                 String candi = tag.trim();
 160                                 if (!candi.isEmpty() && !tags.contains(candi)) {
 161                                         tags.add(candi);
 162                                 }
 163                         }
 164                 }
 165
 166                 return tags;
 167         }
 168
 169         private String getTitle(InputStream in) {
 170                 String siteName = " - E-Hentai Galleries";
 171
 172                 String title = getLine(in, "<title>", 0);
 173                 if (title != null) {
 174                         title = StringUtils.unhtml(title).trim();
 175                         if (title.endsWith(siteName)) {
 176                                 title = title.substring(0, title.length() - siteName.length())
 177                                                 .trim();
 178                         }
 179                 }
 180
 181                 return title;
 182         }
 183
 184         @Override
 185         protected String getDesc(URL source, InputStream in) throws IOException {
 186                 String desc = null;
 187
 188                 String descLine = getKeyLine(in, "Uploader Comment", null,
 189                                 "<div class=\"c7\"");
 190                 if (descLine != null) {
 191                         desc = StringUtils.unhtml(descLine);
 192                 }
 193
 194                 return desc;
 195         }
 196
 197         @Override
 198         protected List<Entry<String, URL>> getChapters(URL source, InputStream in,
 199                         Progress pg) throws IOException {
 200                 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
 201                 int last = 0; // no pool/show when only one page, first page == page 0
 202
 203                 @SuppressWarnings("resource")
 204                 Scanner scan = new Scanner(in, "UTF-8");
 205                 scan.useDelimiter(">");
 206                 while (scan.hasNext()) {
 207                         String line = scan.next();
 208                         if (line.contains(source.toString())) {
 209                                 String page = line.substring(line.indexOf(source.toString()));
 210                                 String pkey = "?p=";
 211                                 if (page.contains(pkey)) {
 212                                         page = page.substring(page.indexOf(pkey) + pkey.length());
 213                                         String number = "";
 214                                         while (!page.isEmpty() && page.charAt(0) >= '0'
 215                                                         && page.charAt(0) <= '9') {
 216                                                 number += page.charAt(0);
 217                                                 page = page.substring(1);
 218                                         }
 219                                         if (number.isEmpty()) {
 220                                                 number = "0";
 221                                         }
 222
 223                                         int current = Integer.parseInt(number);
 224                                         if (last < current) {
 225                                                 last = current;
 226                                         }
 227                                 }
 228                         }
 229                 }
 230
 231                 for (int i = 0; i <= last; i++) {
 232                         final String key = Integer.toString(i + 1);
 233                         final URL value = new URL(source.toString() + "?p=" + i);
 234                         urls.add(new Entry<String, URL>() {
 235                                 @Override
 236                                 public URL setValue(URL value) {
 237                                         return null;
 238                                 }
 239
 240                                 @Override
 241                                 public URL getValue() {
 242                                         return value;
 243                                 }
 244
 245                                 @Override
 246                                 public String getKey() {
 247                                         return key;
 248                                 }
 249                         });
 250                 }
 251
 252                 return urls;
 253         }
 254
 255         @Override
 256         protected String getChapterContent(URL source, InputStream in, int number,
 257                         Progress pg) throws IOException {
 258                 String staticSite = "https://e-hentai.org/s/";
 259                 List<URL> pages = new ArrayList<URL>();
 260
 261                 @SuppressWarnings("resource")
 262                 Scanner scan = new Scanner(in, "UTF-8");
 263                 scan.useDelimiter("\"");
 264                 while (scan.hasNext()) {
 265                         String line = scan.next();
 266                         if (line.startsWith(staticSite)) {
 267                                 try {
 268                                         pages.add(new URL(line));
 269                                 } catch (MalformedURLException e) {
 270                                         Instance.syserr(new IOException(
 271                                                         "Parsing error, a link is not correctly parsed: "
 272                                                                         + line, e));
 273                                 }
 274                         }
 275                 }
 276
 277                 if (pg == null) {
 278                         pg = new Progress();
 279                 }
 280                 pg.setMinMax(0, pages.size());
 281                 pg.setProgress(0);
 282
 283                 StringBuilder builder = new StringBuilder();
 284
 285                 for (URL page : pages) {
 286                         InputStream pageIn = Instance.getCache().open(page, this, false);
 287                         try {
 288                                 String link = getKeyLine(pageIn, "id=\"img\"", "src=\"", "\"");
 289                                 if (link != null && !link.isEmpty()) {
 290                                         builder.append("[");
 291                                         builder.append(link);
 292                                         builder.append("]<br/>");
 293                                 }
 294                                 pg.add(1);
 295                         } finally {
 296                                 if (pageIn != null) {
 297                                         pageIn.close();
 298                                 }
 299                         }
 300                 }
 301
 302                 pg.done();
 303                 return builder.toString();
 304         }
 305 }