New website supported on request: e-hentai.org
authorNiki Roo <niki@nikiroo.be>
Tue, 18 Apr 2017 18:13:49 +0000 (20:13 +0200)
committerNiki Roo <niki@nikiroo.be>
Tue, 18 Apr 2017 18:13:49 +0000 (20:13 +0200)
README.md
changelog.md
src/be/nikiroo/fanfix/bundles/StringId.java
src/be/nikiroo/fanfix/bundles/resources.properties
src/be/nikiroo/fanfix/supported/BasicSupport.java
src/be/nikiroo/fanfix/supported/EHentai.java [new file with mode: 0644]

index d9c47e36f95a205a9349ca04ff00da0f0db59a3b..f66be31a85fda8501b02eda5a08f94d766d15e8c 100644 (file)
--- a/README.md
+++ b/README.md
@@ -98,6 +98,7 @@ Currently missing, but either in progress or planned:
   - [ ] [Two Kinds](http://twokinds.keenspot.com/)
   - [ ] [Slightly damned](http://www.sdamned.com/)
   - [ ] Others? Any ideas?
+    - [x] [e-Hentai](https://e-hentai.org/) requested
 - [x] A GUI (work in progress)
   - [x] Make one
   - [x] Make it run when no args passed
index 3a99d86e4db4583d14a74e162f7b67c3de3f4413..837260e3fe70daa4c960d5c4b1a12e0cd9799485 100644 (file)
@@ -1,6 +1,7 @@
 # Fanfix
 
 ## Version wip
+- New website supported following a request: e-hentai.org
 - Library: perf improvement when retrieving the stories (cover not loaded when not needed)
 - Library: fix the covers that were not always removed when deleting a story
 - UI: perf improvement when displaying books (cover resized then cached)
index b910aef08bbacc6ae0d9016b947a6d193a0c4e83..78b271dd529ed2f40212433fd8769549668f8c18 100644 (file)
@@ -78,6 +78,8 @@ public enum StringId {
        INPUT_DESC_MANGAFOX, //
        @Meta(what = "input format description", where = "SupportType", format = "", info = "Description of this input type")
        INPUT_DESC_E621, //
+       @Meta(what = "input format description", where = "SupportType", format = "", info = "Description of this input type")
+       INPUT_DESC_E_HENTAI, //
        @Meta(what = "output format description", where = "OutputType", format = "%s = type", info = "Default description when the type is not known by i18n")
        OUTPUT_DESC, //
        @Meta(what = "output format description", where = "OutputType", format = "", info = "Description of this output type")
index 1d142f37bb3f2fec5ed6a7397605858674c43a4b..c5c31378357347d8b57509dcae44b5ba176069b2 100644 (file)
@@ -52,19 +52,15 @@ ERR_BS_NO_COVER = Failed to download cover: %s
 # (WHAT: char, WHERE: LaTeX/BasicSupport, FORMAT: single char)
 # Canonical OPEN SINGLE QUOTE char (for instance: `)
 OPEN_SINGLE_QUOTE = `
-OPEN_SINGLE_QUOTE_NOUTF = '
 # (WHAT: char, WHERE: LaTeX/BasicSupport, FORMAT: single char)
 # Canonical CLOSE SINGLE QUOTE char (for instance: ‘)
 CLOSE_SINGLE_QUOTE = ‘
-CLOSE_SINGLE_QUOTE_NOUTF = '
 # (WHAT: char, WHERE: LaTeX/BasicSupport, FORMAT: single char)
 # Canonical OPEN DOUBLE QUOTE char (for instance: “)
 OPEN_DOUBLE_QUOTE = “
-OPEN_DOUBLE_QUOTE_NOUTF = "
 # (WHAT: char, WHERE: LaTeX/BasicSupport, FORMAT: single char)
 # Canonical CLOSE DOUBLE QUOTE char (for instance: ”)
 CLOSE_DOUBLE_QUOTE = ”
-CLOSE_DOUBLE_QUOTE_NOUTF = "
 # (WHAT: chapter name, WHERE: BasicSupport)
 # Name of the description fake chapter
 DESCRIPTION = Description
@@ -103,6 +99,9 @@ INPUT_DESC_MANGAFOX = A well filled repository of mangas, or, as their website s
 # (WHAT: input format description, WHERE: SupportType)
 # Description of this input type
 INPUT_DESC_E621 = Furry website supporting comics, including MLP
+# (WHAT: input format description, WHERE: SupportType)
+# Description of this input type
+INPUT_DESC_E_HENTAI = Website offering many comics/manga, mostly but not always NSFW (hence 'Hentai'); MLP comics can be found there, too
 # (WHAT: output format description, WHERE: OutputType, FORMAT: %s = type)
 # Default description when the type is not known by i18n
 OUTPUT_DESC = Unknown type: %s
index b230bc3252e249caf57560b9c394cefa5fd135f8..c0419aa46c4c2d61986f2742cb3099fbcbab3840 100644 (file)
@@ -62,6 +62,8 @@ public abstract class BasicSupport {
                E621,
                /** Furry website with stories */
                YIFFSTAR,
+               /** Comics and images groups, mostly but not only NSFW */
+               E_HENTAI,
                /** CBZ files */
                CBZ,
                /** HTML files */
@@ -1389,6 +1391,8 @@ public abstract class BasicSupport {
                        return new E621().setType(type);
                case YIFFSTAR:
                        return new YiffStar().setType(type);
+               case E_HENTAI:
+                       return new EHentai().setType(type);
                case CBZ:
                        return new Cbz().setType(type);
                case HTML:
@@ -1479,4 +1483,47 @@ public abstract class BasicSupport {
 
                return rep;
        }
+
+       /**
+        * Return the text between the key and the endKey (and optional subKey can
+        * be passed, in this case we will look for the key first, then take the
+        * text between the subKey and the endKey).
+        * <p>
+        * Will only match the first line with the given key if more than one are
+        * possible. Which also means that if the subKey or endKey is not found on
+        * that line, NULL will be returned.
+        * 
+        * @param in
+        *            the input
+        * @param key
+        *            the key to match
+        * @param subKey
+        *            the sub key or NULL if none
+        * @param endKey
+        *            the end key or NULL for "up to the end"
+        * @return the text or NULL if not found
+        */
+       static String getKeyLine(InputStream in, String key, String subKey,
+                       String endKey) {
+               String result = null;
+
+               String line = getLine(in, key, 0);
+               if (line != null && line.contains(key)) {
+                       line = line.substring(line.indexOf(key) + key.length());
+                       if (subKey == null || subKey.isEmpty() || line.contains(subKey)) {
+                               if (subKey != null) {
+                                       line = line.substring(line.indexOf(subKey)
+                                                       + subKey.length());
+                               }
+                               if (endKey == null || line.contains(endKey)) {
+                                       if (endKey != null) {
+                                               line = line.substring(0, line.indexOf(endKey));
+                                               result = line;
+                                       }
+                               }
+                       }
+               }
+
+               return result;
+       }
 }
diff --git a/src/be/nikiroo/fanfix/supported/EHentai.java b/src/be/nikiroo/fanfix/supported/EHentai.java
new file mode 100644 (file)
index 0000000..eae377c
--- /dev/null
@@ -0,0 +1,264 @@
+package be.nikiroo.fanfix.supported;
+
+import java.awt.image.BufferedImage;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map.Entry;
+import java.util.Scanner;
+
+import be.nikiroo.fanfix.Instance;
+import be.nikiroo.fanfix.data.Chapter;
+import be.nikiroo.fanfix.data.MetaData;
+import be.nikiroo.fanfix.data.Story;
+import be.nikiroo.utils.Progress;
+import be.nikiroo.utils.StringUtils;
+
+/**
+ * Support class for <a href="https://e-hentai.org/">e-hentai.org</a>, a website
+ * supporting mostly but not always NSFW comics, including some of MLP.
+ * 
+ * @author niki
+ */
+class EHentai extends BasicSupport {
+       @Override
+       public String getSourceName() {
+               return "e-hentai.org";
+       }
+
+       @Override
+       protected MetaData getMeta(URL source, InputStream in) throws IOException {
+               MetaData meta = new MetaData();
+
+               meta.setTitle(getTitle(reset(in)));
+               meta.setAuthor(getAuthor(reset(in)));
+               meta.setDate("");
+               meta.setTags(getTags(reset(in)));
+               meta.setSource(getSourceName());
+               meta.setUrl(source.toString());
+               meta.setPublisher(getSourceName());
+               meta.setUuid(source.toString());
+               meta.setLuid("");
+               meta.setLang("EN");
+               meta.setSubject("Furry");
+               meta.setType(getType().toString());
+               meta.setImageDocument(true);
+               meta.setCover(getCover(source, reset(in)));
+               meta.setFakeCover(true);
+
+               return meta;
+       }
+
+       @Override
+       public Story process(URL url, Progress pg) throws IOException {
+               // There is no chapters on e621, just pagination...
+               Story story = super.process(url, pg);
+
+               Chapter only = new Chapter(1, null);
+               for (Chapter chap : story) {
+                       only.getParagraphs().addAll(chap.getParagraphs());
+               }
+
+               story.getChapters().clear();
+               story.getChapters().add(only);
+
+               return story;
+       }
+
+       @Override
+       protected boolean supports(URL url) {
+               return "e-hentai.org".equals(url.getHost());
+       }
+
+       @Override
+       protected boolean isHtml() {
+               return true;
+       }
+
+       private BufferedImage getCover(URL source, InputStream in)
+                       throws IOException {
+               BufferedImage author = null;
+               String coverLine = getKeyLine(in, "<div id=\"gd1\"", " url(", ")");
+               if (coverLine != null) {
+                       coverLine = StringUtils.unhtml(coverLine).trim();
+                       author = getImage(this, source, coverLine);
+               }
+
+               return author;
+       }
+
+       private String getAuthor(InputStream in) {
+               String author = null;
+
+               List<String> tagsAuthor = getTagsAuthor(in);
+               if (!tagsAuthor.isEmpty()) {
+                       author = tagsAuthor.get(0);
+               }
+
+               return author;
+       }
+
+       private List<String> getTags(InputStream in) {
+               List<String> tags = new ArrayList<String>();
+               List<String> tagsAuthor = getTagsAuthor(in);
+
+               for (int i = 1; i < tagsAuthor.size(); i++) {
+                       tags.add(tagsAuthor.get(i));
+               }
+
+               return tags;
+       }
+
+       private List<String> getTagsAuthor(InputStream in) {
+               List<String> tags = new ArrayList<String>();
+               String tagLine = getKeyLine(in, "<meta name=\"description\"", "Tags: ",
+                               null);
+               if (tagLine != null) {
+                       for (String tag : tagLine.split(",")) {
+                               String candi = tag.trim();
+                               if (!candi.isEmpty() && !tags.contains(candi)) {
+                                       tags.add(candi);
+                               }
+                       }
+               }
+
+               return tags;
+       }
+
+       private String getTitle(InputStream in) throws IOException {
+               String siteName = " - E-Hentai Galleries";
+
+               String title = getLine(in, "<title>", 0);
+               if (title != null) {
+                       title = StringUtils.unhtml(title).trim();
+                       if (title.endsWith(siteName)) {
+                               title = title.substring(0, title.length() - siteName.length())
+                                               .trim();
+                       }
+               }
+
+               return title;
+       }
+
+       @Override
+       protected String getDesc(URL source, InputStream in) throws IOException {
+               String desc = null;
+
+               String descLine = getKeyLine(in, "Uploader Comment", null,
+                               "<div class=\"c7\"");
+               if (descLine != null) {
+                       desc = StringUtils.unhtml(descLine);
+               }
+
+               return desc;
+       }
+
+       @Override
+       protected List<Entry<String, URL>> getChapters(URL source, InputStream in,
+                       Progress pg) throws IOException {
+               List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
+               int last = 0; // no pool/show when only one page, first page == page 0
+
+               @SuppressWarnings("resource")
+               Scanner scan = new Scanner(in, "UTF-8");
+               scan.useDelimiter(">");
+               while (scan.hasNext()) {
+                       String line = scan.next();
+                       if (line.contains(source.toString())) {
+                               String page = line.substring(line.indexOf(source.toString()));
+                               String pkey = "?p=";
+                               if (page.contains(pkey)) {
+                                       page = page.substring(page.indexOf(pkey) + pkey.length());
+                                       String number = "";
+                                       while (!page.isEmpty() && page.charAt(0) >= '0'
+                                                       && page.charAt(0) <= '9') {
+                                               number += page.charAt(0);
+                                               page = page.substring(1);
+                                       }
+                                       if (number.isEmpty()) {
+                                               number = "0";
+                                       }
+
+                                       int current = Integer.parseInt(number);
+                                       if (last < current) {
+                                               last = current;
+                                       }
+                               }
+                       }
+               }
+
+               for (int i = 0; i <= last; i++) {
+                       final String key = Integer.toString(i + 1);
+                       final URL value = new URL(source.toString() + "?p=" + i);
+                       urls.add(new Entry<String, URL>() {
+                               public URL setValue(URL value) {
+                                       return null;
+                               }
+
+                               public URL getValue() {
+                                       return value;
+                               }
+
+                               public String getKey() {
+                                       return key;
+                               }
+                       });
+               }
+
+               return urls;
+       }
+
+       @Override
+       protected String getChapterContent(URL source, InputStream in, int number,
+                       Progress pg) throws IOException {
+               String staticSite = "https://e-hentai.org/s/";
+               List<URL> pages = new ArrayList<URL>();
+
+               @SuppressWarnings("resource")
+               Scanner scan = new Scanner(in, "UTF-8");
+               scan.useDelimiter("\"");
+               while (scan.hasNext()) {
+                       String line = scan.next();
+                       if (line.startsWith(staticSite)) {
+                               try {
+                                       pages.add(new URL(line));
+                               } catch (MalformedURLException e) {
+                                       Instance.syserr(new IOException(
+                                                       "Parsing error, a link is not correctly parsed: "
+                                                                       + line, e));
+                               }
+                       }
+               }
+
+               if (pg == null) {
+                       pg = new Progress();
+               }
+               pg.setMinMax(0, pages.size());
+               pg.setProgress(0);
+
+               StringBuilder builder = new StringBuilder();
+
+               for (URL page : pages) {
+                       InputStream pageIn = Instance.getCache().open(page, this, false);
+                       try {
+                               String link = getKeyLine(pageIn, "id=\"img\"", "src=\"", "\"");
+                               if (link != null && !link.isEmpty()) {
+                                       builder.append("[");
+                                       builder.append(link);
+                                       builder.append("]<br/>");
+                               }
+                               pg.add(1);
+                       } finally {
+                               if (pageIn != null) {
+                                       pageIn.close();
+                               }
+                       }
+               }
+
+               pg.done();
+               return builder.toString();
+       }
+}