supported/EHentai.java

   1 package be.nikiroo.fanfix.supported;
   2
   3 import java.io.IOException;
   4 import java.io.InputStream;
   5 import java.net.MalformedURLException;
   6 import java.net.URL;
   7 import java.util.AbstractMap;
   8 import java.util.ArrayList;
   9 import java.util.List;
  10 import java.util.Map;
  11 import java.util.Map.Entry;
  12 import java.util.Scanner;
  13
  14 import be.nikiroo.fanfix.Instance;
  15 import be.nikiroo.fanfix.data.Chapter;
  16 import be.nikiroo.fanfix.data.MetaData;
  17 import be.nikiroo.fanfix.data.Story;
  18 import be.nikiroo.utils.Image;
  19 import be.nikiroo.utils.Progress;
  20 import be.nikiroo.utils.StringUtils;
  21
  22 /**
  23  * Support class for <a href="https://e-hentai.org/">e-hentai.org</a>, a website
  24  * supporting mostly but not always NSFW comics, including some of MLP.
  25  *
  26  * @author niki
  27  */
  28 class EHentai extends BasicSupport_Deprecated {
  29         @Override
  30         protected MetaData getMeta(URL source, InputStream in) throws IOException {
  31                 MetaData meta = new MetaData();
  32
  33                 meta.setTitle(getTitle(reset(in)));
  34                 meta.setAuthor(getAuthor(reset(in)));
  35                 meta.setDate(getDate(reset(in)));
  36                 meta.setTags(getTags(reset(in)));
  37                 meta.setSource(getType().getSourceName());
  38                 meta.setUrl(source.toString());
  39                 meta.setPublisher(getType().getSourceName());
  40                 meta.setUuid(source.toString());
  41                 meta.setLuid("");
  42                 meta.setLang(getLang(reset(in)));
  43                 meta.setSubject("Hentai");
  44                 meta.setType(getType().toString());
  45                 meta.setImageDocument(true);
  46                 meta.setCover(getCover(source, reset(in)));
  47                 meta.setFakeCover(true);
  48
  49                 return meta;
  50         }
  51
  52         @Override
  53         public Story process(URL url, Progress pg) throws IOException {
  54                 // There is no chapters on e621, just pagination...
  55                 Story story = super.process(url, pg);
  56
  57                 Chapter only = new Chapter(1, null);
  58                 for (Chapter chap : story) {
  59                         only.getParagraphs().addAll(chap.getParagraphs());
  60                 }
  61
  62                 story.getChapters().clear();
  63                 story.getChapters().add(only);
  64
  65                 return story;
  66         }
  67
  68         @Override
  69         protected boolean supports(URL url) {
  70                 return "e-hentai.org".equals(url.getHost());
  71         }
  72
  73         @Override
  74         protected boolean isHtml() {
  75                 return true;
  76         }
  77
  78         @Override
  79         public Map<String, String> getCookies() {
  80                 Map<String, String> cookies = super.getCookies();
  81                 cookies.put("nw", "1");
  82                 return cookies;
  83         }
  84
  85         private Image getCover(URL source, InputStream in) {
  86                 Image author = null;
  87                 String coverLine = getKeyLine(in, "<div id=\"gd1\"", " url(", ")");
  88                 if (coverLine != null) {
  89                         coverLine = StringUtils.unhtml(coverLine).trim();
  90                         author = getImage(this, source, coverLine);
  91                 }
  92
  93                 return author;
  94         }
  95
  96         private String getAuthor(InputStream in) {
  97                 String author = null;
  98
  99                 List<String> tagsAuthor = getTagsAuthor(in);
 100                 if (!tagsAuthor.isEmpty()) {
 101                         author = tagsAuthor.get(0);
 102                 }
 103
 104                 return author;
 105         }
 106
 107         private String getLang(InputStream in) {
 108                 String lang = null;
 109
 110                 String langLine = getKeyLine(in, "class=\"gdt1\">Language",
 111                                 "class=\"gdt2\"", "</td>");
 112                 if (langLine != null) {
 113                         langLine = StringUtils.unhtml(langLine).trim();
 114                         if (langLine.equalsIgnoreCase("English")) {
 115                                 lang = "en";
 116                         } else if (langLine.equalsIgnoreCase("Japanese")) {
 117                                 lang = "jp";
 118                         } else if (langLine.equalsIgnoreCase("French")) {
 119                                 lang = "fr";
 120                         } else {
 121                                 // TODO find the code?
 122                                 lang = langLine;
 123                         }
 124                 }
 125
 126                 return lang;
 127         }
 128
 129         private String getDate(InputStream in) {
 130                 String date = null;
 131
 132                 String dateLine = getKeyLine(in, "class=\"gdt1\">Posted",
 133                                 "class=\"gdt2\"", "</td>");
 134                 if (dateLine != null) {
 135                         dateLine = StringUtils.unhtml(dateLine).trim();
 136                         if (dateLine.length() > 10) {
 137                                 dateLine = dateLine.substring(0, 10).trim();
 138                         }
 139
 140                         date = dateLine;
 141                 }
 142
 143                 return date;
 144         }
 145
 146         private List<String> getTags(InputStream in) {
 147                 List<String> tags = new ArrayList<String>();
 148                 List<String> tagsAuthor = getTagsAuthor(in);
 149
 150                 for (int i = 1; i < tagsAuthor.size(); i++) {
 151                         tags.add(tagsAuthor.get(i));
 152                 }
 153
 154                 return tags;
 155         }
 156
 157         private List<String> getTagsAuthor(InputStream in) {
 158                 List<String> tags = new ArrayList<String>();
 159                 String tagLine = getKeyLine(in, "<meta name=\"description\"", "Tags: ",
 160                                 null);
 161                 if (tagLine != null) {
 162                         for (String tag : tagLine.split(",")) {
 163                                 String candi = tag.trim();
 164                                 if (!candi.isEmpty() && !tags.contains(candi)) {
 165                                         tags.add(candi);
 166                                 }
 167                         }
 168                 }
 169
 170                 return tags;
 171         }
 172
 173         private String getTitle(InputStream in) {
 174                 String siteName = " - E-Hentai Galleries";
 175
 176                 String title = getLine(in, "<title>", 0);
 177                 if (title != null) {
 178                         title = StringUtils.unhtml(title).trim();
 179                         if (title.endsWith(siteName)) {
 180                                 title = title.substring(0, title.length() - siteName.length())
 181                                                 .trim();
 182                         }
 183                 }
 184
 185                 return title;
 186         }
 187
 188         @Override
 189         protected String getDesc(URL source, InputStream in) throws IOException {
 190                 String desc = null;
 191
 192                 String descLine = getKeyLine(in, "Uploader Comment", null,
 193                                 "<div class=\"c7\"");
 194                 if (descLine != null) {
 195                         desc = StringUtils.unhtml(descLine);
 196                 }
 197
 198                 return desc;
 199         }
 200
 201         @Override
 202         protected List<Entry<String, URL>> getChapters(URL source, InputStream in,
 203                         Progress pg) throws IOException {
 204                 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
 205                 int last = 0; // no pool/show when only one page, first page == page 0
 206
 207                 @SuppressWarnings("resource")
 208                 Scanner scan = new Scanner(in, "UTF-8");
 209                 scan.useDelimiter(">");
 210                 while (scan.hasNext()) {
 211                         String line = scan.next();
 212                         if (line.contains(source.toString())) {
 213                                 String page = line.substring(line.indexOf(source.toString()));
 214                                 String pkey = "?p=";
 215                                 if (page.contains(pkey)) {
 216                                         page = page.substring(page.indexOf(pkey) + pkey.length());
 217                                         String number = "";
 218                                         while (!page.isEmpty() && page.charAt(0) >= '0'
 219                                                         && page.charAt(0) <= '9') {
 220                                                 number += page.charAt(0);
 221                                                 page = page.substring(1);
 222                                         }
 223                                         if (number.isEmpty()) {
 224                                                 number = "0";
 225                                         }
 226
 227                                         int current = Integer.parseInt(number);
 228                                         if (last < current) {
 229                                                 last = current;
 230                                         }
 231                                 }
 232                         }
 233                 }
 234
 235                 for (int i = 0; i <= last; i++) {
 236                         urls.add(new AbstractMap.SimpleEntry<String, URL>(Integer
 237                                         .toString(i + 1), new URL(source.toString() + "?p=" + i)));
 238                 }
 239
 240                 return urls;
 241         }
 242
 243         @Override
 244         protected String getChapterContent(URL source, InputStream in, int number,
 245                         Progress pg) throws IOException {
 246                 String staticSite = "https://e-hentai.org/s/";
 247                 List<URL> pages = new ArrayList<URL>();
 248
 249                 @SuppressWarnings("resource")
 250                 Scanner scan = new Scanner(in, "UTF-8");
 251                 scan.useDelimiter("\"");
 252                 while (scan.hasNext()) {
 253                         String line = scan.next();
 254                         if (line.startsWith(staticSite)) {
 255                                 try {
 256                                         pages.add(new URL(line));
 257                                 } catch (MalformedURLException e) {
 258                                         Instance.getInstance().getTraceHandler()
 259                                                         .error(new IOException("Parsing error, a link is not correctly parsed: " + line, e));
 260                                 }
 261                         }
 262                 }
 263
 264                 if (pg == null) {
 265                         pg = new Progress();
 266                 }
 267                 pg.setMinMax(0, pages.size());
 268                 pg.setProgress(0);
 269
 270                 StringBuilder builder = new StringBuilder();
 271
 272                 for (URL page : pages) {
 273                         InputStream pageIn = Instance.getInstance().getCache().open(page, this, false);
 274                         try {
 275                                 String link = getKeyLine(pageIn, "id=\"img\"", "src=\"", "\"");
 276                                 if (link != null && !link.isEmpty()) {
 277                                         builder.append("[");
 278                                         builder.append(link);
 279                                         builder.append("]<br/>");
 280                                 }
 281                                 pg.add(1);
 282                         } finally {
 283                                 if (pageIn != null) {
 284                                         pageIn.close();
 285                                 }
 286                         }
 287                 }
 288
 289                 pg.done();
 290                 return builder.toString();
 291         }
 292 }