supported/EHentai.java

   1 package be.nikiroo.fanfix.supported;
   2
   3 import java.io.IOException;
   4 import java.io.InputStream;
   5 import java.net.MalformedURLException;
   6 import java.net.URL;
   7 import java.util.AbstractMap;
   8 import java.util.ArrayList;
   9 import java.util.List;
  10 import java.util.Map;
  11 import java.util.Map.Entry;
  12 import java.util.Scanner;
  13
  14 import be.nikiroo.fanfix.Instance;
  15 import be.nikiroo.fanfix.data.Chapter;
  16 import be.nikiroo.fanfix.data.MetaData;
  17 import be.nikiroo.fanfix.data.Story;
  18 import be.nikiroo.utils.Image;
  19 import be.nikiroo.utils.Progress;
  20 import be.nikiroo.utils.StringUtils;
  21
  22 /**
  23  * Support class for <a href="https://e-hentai.org/">e-hentai.org</a>, a website
  24  * supporting mostly but not always NSFW comics, including some of MLP.
  25  *
  26  * @author niki
  27  */
  28 class EHentai extends BasicSupport_Deprecated {
  29         @Override
  30         protected MetaData getMeta(URL source, InputStream in) throws IOException {
  31                 MetaData meta = new MetaData();
  32
  33                 meta.setTitle(getTitle(reset(in)));
  34                 meta.setAuthor(getAuthor(reset(in)));
  35                 meta.setDate(getDate(reset(in)));
  36                 meta.setTags(getTags(reset(in)));
  37                 meta.setUrl(source.toString());
  38                 meta.setUuid(source.toString());
  39                 meta.setLuid("");
  40                 meta.setLang(getLang(reset(in)));
  41                 meta.setSubject("Hentai");
  42                 meta.setImageDocument(true);
  43                 meta.setCover(getCover(source, reset(in)));
  44                 meta.setFakeCover(true);
  45
  46                 return meta;
  47         }
  48
  49         @Override
  50         public Story process(URL url, Progress pg) throws IOException {
  51                 // There is no chapters on e621, just pagination...
  52                 Story story = super.process(url, pg);
  53
  54                 Chapter only = new Chapter(1, "");
  55                 for (Chapter chap : story) {
  56                         only.getParagraphs().addAll(chap.getParagraphs());
  57                 }
  58
  59                 story.getChapters().clear();
  60                 story.getChapters().add(only);
  61
  62                 return story;
  63         }
  64
  65         @Override
  66         protected boolean supports(URL url) {
  67                 return "e-hentai.org".equals(url.getHost());
  68         }
  69
  70         @Override
  71         protected boolean isHtml() {
  72                 return true;
  73         }
  74
  75         @Override
  76         public Map<String, String> getCookies() {
  77                 Map<String, String> cookies = super.getCookies();
  78                 cookies.put("nw", "1");
  79                 return cookies;
  80         }
  81
  82         private Image getCover(URL source, InputStream in) {
  83                 Image author = null;
  84                 String coverLine = getKeyLine(in, "<div id=\"gd1\"", " url(", ")");
  85                 if (coverLine != null) {
  86                         coverLine = StringUtils.unhtml(coverLine).trim();
  87                         author = getImage(this, source, coverLine);
  88                 }
  89
  90                 return author;
  91         }
  92
  93         private String getAuthor(InputStream in) {
  94                 String author = null;
  95
  96                 List<String> tagsAuthor = getTagsAuthor(in);
  97                 if (!tagsAuthor.isEmpty()) {
  98                         author = tagsAuthor.get(0);
  99                 }
 100
 101                 return author;
 102         }
 103
 104         private String getLang(InputStream in) {
 105                 String lang = null;
 106
 107                 String langLine = getKeyLine(in, "class=\"gdt1\">Language",
 108                                 "class=\"gdt2\"", "</td>");
 109                 if (langLine != null) {
 110                         langLine = StringUtils.unhtml(langLine).trim();
 111                         if (langLine.equalsIgnoreCase("English")) {
 112                                 lang = "en";
 113                         } else if (langLine.equalsIgnoreCase("Japanese")) {
 114                                 lang = "jp";
 115                         } else if (langLine.equalsIgnoreCase("French")) {
 116                                 lang = "fr";
 117                         } else {
 118                                 // TODO find the code for other languages?
 119                                 lang = langLine;
 120                         }
 121                 }
 122
 123                 return lang;
 124         }
 125
 126         private String getDate(InputStream in) {
 127                 String date = null;
 128
 129                 String dateLine = getKeyLine(in, "class=\"gdt1\">Posted",
 130                                 "class=\"gdt2\"", "</td>");
 131                 if (dateLine != null) {
 132                         dateLine = StringUtils.unhtml(dateLine).trim();
 133                         if (dateLine.length() > 10) {
 134                                 dateLine = dateLine.substring(0, 10).trim();
 135                         }
 136
 137                         date = dateLine;
 138                 }
 139
 140                 return date;
 141         }
 142
 143         private List<String> getTags(InputStream in) {
 144                 List<String> tags = new ArrayList<String>();
 145                 List<String> tagsAuthor = getTagsAuthor(in);
 146
 147                 for (int i = 1; i < tagsAuthor.size(); i++) {
 148                         tags.add(tagsAuthor.get(i));
 149                 }
 150
 151                 return tags;
 152         }
 153
 154         private List<String> getTagsAuthor(InputStream in) {
 155                 List<String> tags = new ArrayList<String>();
 156                 String tagLine = getKeyLine(in, "<meta name=\"description\"", "Tags: ",
 157                                 null);
 158                 if (tagLine != null) {
 159                         for (String tag : tagLine.split(",")) {
 160                                 String candi = tag.trim();
 161                                 if (!candi.isEmpty() && !tags.contains(candi)) {
 162                                         tags.add(candi);
 163                                 }
 164                         }
 165                 }
 166
 167                 return tags;
 168         }
 169
 170         private String getTitle(InputStream in) {
 171                 String siteName = " - E-Hentai Galleries";
 172
 173                 String title = getLine(in, "<title>", 0);
 174                 if (title != null) {
 175                         title = StringUtils.unhtml(title).trim();
 176                         if (title.endsWith(siteName)) {
 177                                 title = title.substring(0, title.length() - siteName.length())
 178                                                 .trim();
 179                         }
 180                 }
 181
 182                 return title;
 183         }
 184
 185         @Override
 186         protected String getDesc(URL source, InputStream in) throws IOException {
 187                 String desc = null;
 188
 189                 String descLine = getKeyLine(in, "Uploader Comment", null,
 190                                 "<div class=\"c7\"");
 191                 if (descLine != null) {
 192                         desc = StringUtils.unhtml(descLine);
 193                 }
 194
 195                 return desc;
 196         }
 197
 198         @Override
 199         protected List<Entry<String, URL>> getChapters(URL source, InputStream in,
 200                         Progress pg) throws IOException {
 201                 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
 202                 int last = 0; // no pool/show when only one page, first page == page 0
 203
 204                 @SuppressWarnings("resource")
 205                 Scanner scan = new Scanner(in, "UTF-8");
 206                 scan.useDelimiter(">");
 207                 while (scan.hasNext()) {
 208                         String line = scan.next();
 209                         if (line.contains(source.toString())) {
 210                                 String page = line.substring(line.indexOf(source.toString()));
 211                                 String pkey = "?p=";
 212                                 if (page.contains(pkey)) {
 213                                         page = page.substring(page.indexOf(pkey) + pkey.length());
 214                                         String number = "";
 215                                         while (!page.isEmpty() && page.charAt(0) >= '0'
 216                                                         && page.charAt(0) <= '9') {
 217                                                 number += page.charAt(0);
 218                                                 page = page.substring(1);
 219                                         }
 220                                         if (number.isEmpty()) {
 221                                                 number = "0";
 222                                         }
 223
 224                                         int current = Integer.parseInt(number);
 225                                         if (last < current) {
 226                                                 last = current;
 227                                         }
 228                                 }
 229                         }
 230                 }
 231
 232                 for (int i = 0; i <= last; i++) {
 233                         urls.add(new AbstractMap.SimpleEntry<String, URL>(Integer
 234                                         .toString(i + 1), new URL(source.toString() + "?p=" + i)));
 235                 }
 236
 237                 return urls;
 238         }
 239
 240         @Override
 241         protected String getChapterContent(URL source, InputStream in, int number,
 242                         Progress pg) throws IOException {
 243                 String staticSite = "https://e-hentai.org/s/";
 244                 List<URL> pages = new ArrayList<URL>();
 245
 246                 @SuppressWarnings("resource")
 247                 Scanner scan = new Scanner(in, "UTF-8");
 248                 scan.useDelimiter("\"");
 249                 while (scan.hasNext()) {
 250                         String line = scan.next();
 251                         if (line.startsWith(staticSite)) {
 252                                 try {
 253                                         pages.add(new URL(line));
 254                                 } catch (MalformedURLException e) {
 255                                         Instance.getInstance().getTraceHandler()
 256                                                         .error(new IOException("Parsing error, a link is not correctly parsed: " + line, e));
 257                                 }
 258                         }
 259                 }
 260
 261                 if (pg == null) {
 262                         pg = new Progress();
 263                 }
 264                 pg.setMinMax(0, pages.size());
 265                 pg.setProgress(0);
 266
 267                 StringBuilder builder = new StringBuilder();
 268
 269                 for (URL page : pages) {
 270                         InputStream pageIn = Instance.getInstance().getCache().open(page, this, false);
 271                         try {
 272                                 String link = getKeyLine(pageIn, "id=\"img\"", "src=\"", "\"");
 273                                 if (link != null && !link.isEmpty()) {
 274                                         builder.append("[");
 275                                         builder.append(link);
 276                                         builder.append("]<br/>");
 277                                 }
 278                                 pg.add(1);
 279                         } finally {
 280                                 if (pageIn != null) {
 281                                         pageIn.close();
 282                                 }
 283                         }
 284                 }
 285
 286                 pg.done();
 287                 return builder.toString();
 288         }
 289 }