src/be/nikiroo/fanfix/supported/EHentai.java

   1 package be.nikiroo.fanfix.supported;
   2
   3 import java.io.IOException;
   4 import java.io.InputStream;
   5 import java.net.MalformedURLException;
   6 import java.net.URL;
   7 import java.util.AbstractMap;
   8 import java.util.ArrayList;
   9 import java.util.List;
  10 import java.util.Map;
  11 import java.util.Map.Entry;
  12 import java.util.Scanner;
  13
  14 import be.nikiroo.fanfix.Instance;
  15 import be.nikiroo.fanfix.data.Chapter;
  16 import be.nikiroo.fanfix.data.MetaData;
  17 import be.nikiroo.fanfix.data.Story;
  18 import be.nikiroo.utils.Image;
  19 import be.nikiroo.utils.Progress;
  20 import be.nikiroo.utils.StringUtils;
  21
  22 /**
  23  * Support class for <a href="https://e-hentai.org/">e-hentai.org</a>, a website
  24  * supporting mostly but not always NSFW comics, including some of MLP.
  25  *
  26  * @author niki
  27  */
  28 class EHentai extends BasicSupport_Deprecated {
  29         @Override
  30         public String getSourceName() {
  31                 return "e-hentai.org";
  32         }
  33
  34         @Override
  35         protected MetaData getMeta(URL source, InputStream in) throws IOException {
  36                 MetaData meta = new MetaData();
  37
  38                 meta.setTitle(getTitle(reset(in)));
  39                 meta.setAuthor(getAuthor(reset(in)));
  40                 meta.setDate(getDate(reset(in)));
  41                 meta.setTags(getTags(reset(in)));
  42                 meta.setSource(getSourceName());
  43                 meta.setUrl(source.toString());
  44                 meta.setPublisher(getSourceName());
  45                 meta.setUuid(source.toString());
  46                 meta.setLuid("");
  47                 meta.setLang(getLang(reset(in)));
  48                 meta.setSubject("Hentai");
  49                 meta.setType(getType().toString());
  50                 meta.setImageDocument(true);
  51                 meta.setCover(getCover(source, reset(in)));
  52                 meta.setFakeCover(true);
  53
  54                 return meta;
  55         }
  56
  57         @Override
  58         public Story process(URL url, Progress pg) throws IOException {
  59                 // There is no chapters on e621, just pagination...
  60                 Story story = super.process(url, pg);
  61
  62                 Chapter only = new Chapter(1, null);
  63                 for (Chapter chap : story) {
  64                         only.getParagraphs().addAll(chap.getParagraphs());
  65                 }
  66
  67                 story.getChapters().clear();
  68                 story.getChapters().add(only);
  69
  70                 return story;
  71         }
  72
  73         @Override
  74         protected boolean supports(URL url) {
  75                 return "e-hentai.org".equals(url.getHost());
  76         }
  77
  78         @Override
  79         protected boolean isHtml() {
  80                 return true;
  81         }
  82
  83         @Override
  84         public Map<String, String> getCookies() {
  85                 // TODO Auto-generated method stub
  86                 Map<String, String> cookies = super.getCookies();
  87                 cookies.put("nw", "1");
  88                 return cookies;
  89         }
  90
  91         private Image getCover(URL source, InputStream in) {
  92                 Image author = null;
  93                 String coverLine = getKeyLine(in, "<div id=\"gd1\"", " url(", ")");
  94                 if (coverLine != null) {
  95                         coverLine = StringUtils.unhtml(coverLine).trim();
  96                         author = getImage(this, source, coverLine);
  97                 }
  98
  99                 return author;
 100         }
 101
 102         private String getAuthor(InputStream in) {
 103                 String author = null;
 104
 105                 List<String> tagsAuthor = getTagsAuthor(in);
 106                 if (!tagsAuthor.isEmpty()) {
 107                         author = tagsAuthor.get(0);
 108                 }
 109
 110                 return author;
 111         }
 112
 113         private String getLang(InputStream in) {
 114                 String lang = null;
 115
 116                 String langLine = getKeyLine(in, "class=\"gdt1\">Language",
 117                                 "class=\"gdt2\"", "</td>");
 118                 if (langLine != null) {
 119                         langLine = StringUtils.unhtml(langLine).trim();
 120                         if (langLine.equalsIgnoreCase("English")) {
 121                                 lang = "en";
 122                         } else if (langLine.equalsIgnoreCase("Japanese")) {
 123                                 lang = "jp";
 124                         } else if (langLine.equalsIgnoreCase("French")) {
 125                                 lang = "fr";
 126                         } else {
 127                                 // TODO find the code?
 128                                 lang = langLine;
 129                         }
 130                 }
 131
 132                 return lang;
 133         }
 134
 135         private String getDate(InputStream in) {
 136                 String date = null;
 137
 138                 String dateLine = getKeyLine(in, "class=\"gdt1\">Posted",
 139                                 "class=\"gdt2\"", "</td>");
 140                 if (dateLine != null) {
 141                         dateLine = StringUtils.unhtml(dateLine).trim();
 142                         if (dateLine.length() > 10) {
 143                                 dateLine = dateLine.substring(0, 10).trim();
 144                         }
 145
 146                         date = dateLine;
 147                 }
 148
 149                 return date;
 150         }
 151
 152         private List<String> getTags(InputStream in) {
 153                 List<String> tags = new ArrayList<String>();
 154                 List<String> tagsAuthor = getTagsAuthor(in);
 155
 156                 for (int i = 1; i < tagsAuthor.size(); i++) {
 157                         tags.add(tagsAuthor.get(i));
 158                 }
 159
 160                 return tags;
 161         }
 162
 163         private List<String> getTagsAuthor(InputStream in) {
 164                 List<String> tags = new ArrayList<String>();
 165                 String tagLine = getKeyLine(in, "<meta name=\"description\"", "Tags: ",
 166                                 null);
 167                 if (tagLine != null) {
 168                         for (String tag : tagLine.split(",")) {
 169                                 String candi = tag.trim();
 170                                 if (!candi.isEmpty() && !tags.contains(candi)) {
 171                                         tags.add(candi);
 172                                 }
 173                         }
 174                 }
 175
 176                 return tags;
 177         }
 178
 179         private String getTitle(InputStream in) {
 180                 String siteName = " - E-Hentai Galleries";
 181
 182                 String title = getLine(in, "<title>", 0);
 183                 if (title != null) {
 184                         title = StringUtils.unhtml(title).trim();
 185                         if (title.endsWith(siteName)) {
 186                                 title = title.substring(0, title.length() - siteName.length())
 187                                                 .trim();
 188                         }
 189                 }
 190
 191                 return title;
 192         }
 193
 194         @Override
 195         protected String getDesc(URL source, InputStream in) throws IOException {
 196                 String desc = null;
 197
 198                 String descLine = getKeyLine(in, "Uploader Comment", null,
 199                                 "<div class=\"c7\"");
 200                 if (descLine != null) {
 201                         desc = StringUtils.unhtml(descLine);
 202                 }
 203
 204                 return desc;
 205         }
 206
 207         @Override
 208         protected List<Entry<String, URL>> getChapters(URL source, InputStream in,
 209                         Progress pg) throws IOException {
 210                 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
 211                 int last = 0; // no pool/show when only one page, first page == page 0
 212
 213                 @SuppressWarnings("resource")
 214                 Scanner scan = new Scanner(in, "UTF-8");
 215                 scan.useDelimiter(">");
 216                 while (scan.hasNext()) {
 217                         String line = scan.next();
 218                         if (line.contains(source.toString())) {
 219                                 String page = line.substring(line.indexOf(source.toString()));
 220                                 String pkey = "?p=";
 221                                 if (page.contains(pkey)) {
 222                                         page = page.substring(page.indexOf(pkey) + pkey.length());
 223                                         String number = "";
 224                                         while (!page.isEmpty() && page.charAt(0) >= '0'
 225                                                         && page.charAt(0) <= '9') {
 226                                                 number += page.charAt(0);
 227                                                 page = page.substring(1);
 228                                         }
 229                                         if (number.isEmpty()) {
 230                                                 number = "0";
 231                                         }
 232
 233                                         int current = Integer.parseInt(number);
 234                                         if (last < current) {
 235                                                 last = current;
 236                                         }
 237                                 }
 238                         }
 239                 }
 240
 241                 for (int i = 0; i <= last; i++) {
 242                         urls.add(new AbstractMap.SimpleEntry<String, URL>(Integer
 243                                         .toString(i + 1), new URL(source.toString() + "?p=" + i)));
 244                 }
 245
 246                 return urls;
 247         }
 248
 249         @Override
 250         protected String getChapterContent(URL source, InputStream in, int number,
 251                         Progress pg) throws IOException {
 252                 String staticSite = "https://e-hentai.org/s/";
 253                 List<URL> pages = new ArrayList<URL>();
 254
 255                 @SuppressWarnings("resource")
 256                 Scanner scan = new Scanner(in, "UTF-8");
 257                 scan.useDelimiter("\"");
 258                 while (scan.hasNext()) {
 259                         String line = scan.next();
 260                         if (line.startsWith(staticSite)) {
 261                                 try {
 262                                         pages.add(new URL(line));
 263                                 } catch (MalformedURLException e) {
 264                                         Instance.getTraceHandler().error(
 265                                                         new IOException(
 266                                                                         "Parsing error, a link is not correctly parsed: "
 267                                                                                         + line, e));
 268                                 }
 269                         }
 270                 }
 271
 272                 if (pg == null) {
 273                         pg = new Progress();
 274                 }
 275                 pg.setMinMax(0, pages.size());
 276                 pg.setProgress(0);
 277
 278                 StringBuilder builder = new StringBuilder();
 279
 280                 for (URL page : pages) {
 281                         InputStream pageIn = Instance.getCache().open(page, this, false);
 282                         try {
 283                                 String link = getKeyLine(pageIn, "id=\"img\"", "src=\"", "\"");
 284                                 if (link != null && !link.isEmpty()) {
 285                                         builder.append("[");
 286                                         builder.append(link);
 287                                         builder.append("]<br/>");
 288                                 }
 289                                 pg.add(1);
 290                         } finally {
 291                                 if (pageIn != null) {
 292                                         pageIn.close();
 293                                 }
 294                         }
 295                 }
 296
 297                 pg.done();
 298                 return builder.toString();
 299         }
 300 }