src/be/nikiroo/fanfix/supported/EHentai.java

   1 package be.nikiroo.fanfix.supported;
   2
   3 import java.awt.image.BufferedImage;
   4 import java.io.IOException;
   5 import java.io.InputStream;
   6 import java.net.MalformedURLException;
   7 import java.net.URL;
   8 import java.util.ArrayList;
   9 import java.util.List;
  10 import java.util.Map.Entry;
  11 import java.util.Scanner;
  12
  13 import be.nikiroo.fanfix.Instance;
  14 import be.nikiroo.fanfix.data.Chapter;
  15 import be.nikiroo.fanfix.data.MetaData;
  16 import be.nikiroo.fanfix.data.Story;
  17 import be.nikiroo.utils.Progress;
  18 import be.nikiroo.utils.StringUtils;
  19
  20 /**
  21  * Support class for <a href="https://e-hentai.org/">e-hentai.org</a>, a website
  22  * supporting mostly but not always NSFW comics, including some of MLP.
  23  *
  24  * @author niki
  25  */
  26 class EHentai extends BasicSupport {
  27         @Override
  28         public String getSourceName() {
  29                 return "e-hentai.org";
  30         }
  31
  32         @Override
  33         protected MetaData getMeta(URL source, InputStream in) throws IOException {
  34                 MetaData meta = new MetaData();
  35
  36                 meta.setTitle(getTitle(reset(in)));
  37                 meta.setAuthor(getAuthor(reset(in)));
  38                 meta.setDate(getDate(reset(in)));
  39                 meta.setTags(getTags(reset(in)));
  40                 meta.setSource(getSourceName());
  41                 meta.setUrl(source.toString());
  42                 meta.setPublisher(getSourceName());
  43                 meta.setUuid(source.toString());
  44                 meta.setLuid("");
  45                 meta.setLang(getLang(reset(in)));
  46                 meta.setSubject("Hentai");
  47                 meta.setType(getType().toString());
  48                 meta.setImageDocument(true);
  49                 meta.setCover(getCover(source, reset(in)));
  50                 meta.setFakeCover(true);
  51
  52                 return meta;
  53         }
  54
  55         @Override
  56         public Story process(URL url, Progress pg) throws IOException {
  57                 // There is no chapters on e621, just pagination...
  58                 Story story = super.process(url, pg);
  59
  60                 Chapter only = new Chapter(1, null);
  61                 for (Chapter chap : story) {
  62                         only.getParagraphs().addAll(chap.getParagraphs());
  63                 }
  64
  65                 story.getChapters().clear();
  66                 story.getChapters().add(only);
  67
  68                 return story;
  69         }
  70
  71         @Override
  72         protected boolean supports(URL url) {
  73                 return "e-hentai.org".equals(url.getHost());
  74         }
  75
  76         @Override
  77         protected boolean isHtml() {
  78                 return true;
  79         }
  80
  81         private BufferedImage getCover(URL source, InputStream in)
  82                         throws IOException {
  83                 BufferedImage author = null;
  84                 String coverLine = getKeyLine(in, "<div id=\"gd1\"", " url(", ")");
  85                 if (coverLine != null) {
  86                         coverLine = StringUtils.unhtml(coverLine).trim();
  87                         author = getImage(this, source, coverLine);
  88                 }
  89
  90                 return author;
  91         }
  92
  93         private String getAuthor(InputStream in) {
  94                 String author = null;
  95
  96                 List<String> tagsAuthor = getTagsAuthor(in);
  97                 if (!tagsAuthor.isEmpty()) {
  98                         author = tagsAuthor.get(0);
  99                 }
 100
 101                 return author;
 102         }
 103
 104         private String getLang(InputStream in) {
 105                 String lang = null;
 106
 107                 String langLine = getKeyLine(in, "class=\"gdt1\">Language",
 108                                 "class=\"gdt2\"", "</td>");
 109                 if (langLine != null) {
 110                         langLine = StringUtils.unhtml(langLine).trim();
 111                         if (langLine.equalsIgnoreCase("English")) {
 112                                 lang = "EN";
 113                         } else if (langLine.equalsIgnoreCase("Japanese")) {
 114                                 lang = "JP";
 115                         } else if (langLine.equalsIgnoreCase("French")) {
 116                                 lang = "FR";
 117                         } else {
 118                                 // TODO find the code?
 119                                 lang = langLine;
 120                         }
 121                 }
 122
 123                 return lang;
 124         }
 125
 126         private String getDate(InputStream in) {
 127                 String date = null;
 128
 129                 String dateLine = getKeyLine(in, "class=\"gdt1\">Posted",
 130                                 "class=\"gdt2\"", "</td>");
 131                 if (dateLine != null) {
 132                         dateLine = StringUtils.unhtml(dateLine).trim();
 133                         if (dateLine.length() > 10) {
 134                                 dateLine = dateLine.substring(0, 10).trim();
 135                         }
 136
 137                         date = dateLine;
 138                 }
 139
 140                 return date;
 141         }
 142
 143         private List<String> getTags(InputStream in) {
 144                 List<String> tags = new ArrayList<String>();
 145                 List<String> tagsAuthor = getTagsAuthor(in);
 146
 147                 for (int i = 1; i < tagsAuthor.size(); i++) {
 148                         tags.add(tagsAuthor.get(i));
 149                 }
 150
 151                 return tags;
 152         }
 153
 154         private List<String> getTagsAuthor(InputStream in) {
 155                 List<String> tags = new ArrayList<String>();
 156                 String tagLine = getKeyLine(in, "<meta name=\"description\"", "Tags: ",
 157                                 null);
 158                 if (tagLine != null) {
 159                         for (String tag : tagLine.split(",")) {
 160                                 String candi = tag.trim();
 161                                 if (!candi.isEmpty() && !tags.contains(candi)) {
 162                                         tags.add(candi);
 163                                 }
 164                         }
 165                 }
 166
 167                 return tags;
 168         }
 169
 170         private String getTitle(InputStream in) throws IOException {
 171                 String siteName = " - E-Hentai Galleries";
 172
 173                 String title = getLine(in, "<title>", 0);
 174                 if (title != null) {
 175                         title = StringUtils.unhtml(title).trim();
 176                         if (title.endsWith(siteName)) {
 177                                 title = title.substring(0, title.length() - siteName.length())
 178                                                 .trim();
 179                         }
 180                 }
 181
 182                 return title;
 183         }
 184
 185         @Override
 186         protected String getDesc(URL source, InputStream in) throws IOException {
 187                 String desc = null;
 188
 189                 String descLine = getKeyLine(in, "Uploader Comment", null,
 190                                 "<div class=\"c7\"");
 191                 if (descLine != null) {
 192                         desc = StringUtils.unhtml(descLine);
 193                 }
 194
 195                 return desc;
 196         }
 197
 198         @Override
 199         protected List<Entry<String, URL>> getChapters(URL source, InputStream in,
 200                         Progress pg) throws IOException {
 201                 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
 202                 int last = 0; // no pool/show when only one page, first page == page 0
 203
 204                 @SuppressWarnings("resource")
 205                 Scanner scan = new Scanner(in, "UTF-8");
 206                 scan.useDelimiter(">");
 207                 while (scan.hasNext()) {
 208                         String line = scan.next();
 209                         if (line.contains(source.toString())) {
 210                                 String page = line.substring(line.indexOf(source.toString()));
 211                                 String pkey = "?p=";
 212                                 if (page.contains(pkey)) {
 213                                         page = page.substring(page.indexOf(pkey) + pkey.length());
 214                                         String number = "";
 215                                         while (!page.isEmpty() && page.charAt(0) >= '0'
 216                                                         && page.charAt(0) <= '9') {
 217                                                 number += page.charAt(0);
 218                                                 page = page.substring(1);
 219                                         }
 220                                         if (number.isEmpty()) {
 221                                                 number = "0";
 222                                         }
 223
 224                                         int current = Integer.parseInt(number);
 225                                         if (last < current) {
 226                                                 last = current;
 227                                         }
 228                                 }
 229                         }
 230                 }
 231
 232                 for (int i = 0; i <= last; i++) {
 233                         final String key = Integer.toString(i + 1);
 234                         final URL value = new URL(source.toString() + "?p=" + i);
 235                         urls.add(new Entry<String, URL>() {
 236                                 public URL setValue(URL value) {
 237                                         return null;
 238                                 }
 239
 240                                 public URL getValue() {
 241                                         return value;
 242                                 }
 243
 244                                 public String getKey() {
 245                                         return key;
 246                                 }
 247                         });
 248                 }
 249
 250                 return urls;
 251         }
 252
 253         @Override
 254         protected String getChapterContent(URL source, InputStream in, int number,
 255                         Progress pg) throws IOException {
 256                 String staticSite = "https://e-hentai.org/s/";
 257                 List<URL> pages = new ArrayList<URL>();
 258
 259                 @SuppressWarnings("resource")
 260                 Scanner scan = new Scanner(in, "UTF-8");
 261                 scan.useDelimiter("\"");
 262                 while (scan.hasNext()) {
 263                         String line = scan.next();
 264                         if (line.startsWith(staticSite)) {
 265                                 try {
 266                                         pages.add(new URL(line));
 267                                 } catch (MalformedURLException e) {
 268                                         Instance.syserr(new IOException(
 269                                                         "Parsing error, a link is not correctly parsed: "
 270                                                                         + line, e));
 271                                 }
 272                         }
 273                 }
 274
 275                 if (pg == null) {
 276                         pg = new Progress();
 277                 }
 278                 pg.setMinMax(0, pages.size());
 279                 pg.setProgress(0);
 280
 281                 StringBuilder builder = new StringBuilder();
 282
 283                 for (URL page : pages) {
 284                         InputStream pageIn = Instance.getCache().open(page, this, false);
 285                         try {
 286                                 String link = getKeyLine(pageIn, "id=\"img\"", "src=\"", "\"");
 287                                 if (link != null && !link.isEmpty()) {
 288                                         builder.append("[");
 289                                         builder.append(link);
 290                                         builder.append("]<br/>");
 291                                 }
 292                                 pg.add(1);
 293                         } finally {
 294                                 if (pageIn != null) {
 295                                         pageIn.close();
 296                                 }
 297                         }
 298                 }
 299
 300                 pg.done();
 301                 return builder.toString();
 302         }
 303 }