# (WHAT: char, WHERE: LaTeX/BasicSupport, FORMAT: single char)
# Canonical OPEN SINGLE QUOTE char (for instance: `)
OPEN_SINGLE_QUOTE = `
-OPEN_SINGLE_QUOTE_NOUTF = '
# (WHAT: char, WHERE: LaTeX/BasicSupport, FORMAT: single char)
# Canonical CLOSE SINGLE QUOTE char (for instance: ‘)
CLOSE_SINGLE_QUOTE = ‘
-CLOSE_SINGLE_QUOTE_NOUTF = '
# (WHAT: char, WHERE: LaTeX/BasicSupport, FORMAT: single char)
# Canonical OPEN DOUBLE QUOTE char (for instance: “)
OPEN_DOUBLE_QUOTE = “
-OPEN_DOUBLE_QUOTE_NOUTF = "
# (WHAT: char, WHERE: LaTeX/BasicSupport, FORMAT: single char)
# Canonical CLOSE DOUBLE QUOTE char (for instance: ”)
CLOSE_DOUBLE_QUOTE = ”
-CLOSE_DOUBLE_QUOTE_NOUTF = "
# (WHAT: chapter name, WHERE: BasicSupport)
# Name of the description fake chapter
DESCRIPTION = Description
# (WHAT: input format description, WHERE: SupportType)
# Description of this input type
INPUT_DESC_E621 = Furry website supporting comics, including MLP
+# (WHAT: input format description, WHERE: SupportType)
+# Description of this input type
+INPUT_DESC_E_HENTAI = Website offering many comics/manga, mostly but not always NSFW (hence 'Hentai'); MLP comics can be found there, too
# (WHAT: output format description, WHERE: OutputType, FORMAT: %s = type)
# Default description when the type is not known by i18n
OUTPUT_DESC = Unknown type: %s
E621,
/** Furry website with stories */
YIFFSTAR,
+ /** Comics and images groups, mostly but not only NSFW */
+ E_HENTAI,
/** CBZ files */
CBZ,
/** HTML files */
return new E621().setType(type);
case YIFFSTAR:
return new YiffStar().setType(type);
+ case E_HENTAI:
+ return new EHentai().setType(type);
case CBZ:
return new Cbz().setType(type);
case HTML:
return rep;
}
+
+ /**
+ * Return the text between the key and the endKey (and optional subKey can
+ * be passed, in this case we will look for the key first, then take the
+ * text between the subKey and the endKey).
+ * <p>
+ * Will only match the first line with the given key if more than one are
+ * possible. Which also means that if the subKey or endKey is not found on
+ * that line, NULL will be returned.
+ *
+ * @param in
+ * the input
+ * @param key
+ * the key to match
+ * @param subKey
+ * the sub key or NULL if none
+ * @param endKey
+ * the end key or NULL for "up to the end"
+ * @return the text or NULL if not found
+ */
+ static String getKeyLine(InputStream in, String key, String subKey,
+ String endKey) {
+ String result = null;
+
+ String line = getLine(in, key, 0);
+ if (line != null && line.contains(key)) {
+ line = line.substring(line.indexOf(key) + key.length());
+ if (subKey == null || subKey.isEmpty() || line.contains(subKey)) {
+ if (subKey != null) {
+ line = line.substring(line.indexOf(subKey)
+ + subKey.length());
+ }
+ if (endKey == null || line.contains(endKey)) {
+ if (endKey != null) {
+ line = line.substring(0, line.indexOf(endKey));
+ result = line;
+ }
+ }
+ }
+ }
+
+ return result;
+ }
}
--- /dev/null
+package be.nikiroo.fanfix.supported;
+
+import java.awt.image.BufferedImage;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map.Entry;
+import java.util.Scanner;
+
+import be.nikiroo.fanfix.Instance;
+import be.nikiroo.fanfix.data.Chapter;
+import be.nikiroo.fanfix.data.MetaData;
+import be.nikiroo.fanfix.data.Story;
+import be.nikiroo.utils.Progress;
+import be.nikiroo.utils.StringUtils;
+
+/**
+ * Support class for <a href="https://e-hentai.org/">e-hentai.org</a>, a website
+ * supporting mostly but not always NSFW comics, including some of MLP.
+ *
+ * @author niki
+ */
+class EHentai extends BasicSupport {
+ @Override
+ public String getSourceName() {
+ return "e-hentai.org";
+ }
+
+ @Override
+ protected MetaData getMeta(URL source, InputStream in) throws IOException {
+ MetaData meta = new MetaData();
+
+ meta.setTitle(getTitle(reset(in)));
+ meta.setAuthor(getAuthor(reset(in)));
+ meta.setDate("");
+ meta.setTags(getTags(reset(in)));
+ meta.setSource(getSourceName());
+ meta.setUrl(source.toString());
+ meta.setPublisher(getSourceName());
+ meta.setUuid(source.toString());
+ meta.setLuid("");
+ meta.setLang("EN");
+ meta.setSubject("Furry");
+ meta.setType(getType().toString());
+ meta.setImageDocument(true);
+ meta.setCover(getCover(source, reset(in)));
+ meta.setFakeCover(true);
+
+ return meta;
+ }
+
+ @Override
+ public Story process(URL url, Progress pg) throws IOException {
+ // There is no chapters on e621, just pagination...
+ Story story = super.process(url, pg);
+
+ Chapter only = new Chapter(1, null);
+ for (Chapter chap : story) {
+ only.getParagraphs().addAll(chap.getParagraphs());
+ }
+
+ story.getChapters().clear();
+ story.getChapters().add(only);
+
+ return story;
+ }
+
+ @Override
+ protected boolean supports(URL url) {
+ return "e-hentai.org".equals(url.getHost());
+ }
+
+ @Override
+ protected boolean isHtml() {
+ return true;
+ }
+
+ private BufferedImage getCover(URL source, InputStream in)
+ throws IOException {
+ BufferedImage author = null;
+ String coverLine = getKeyLine(in, "<div id=\"gd1\"", " url(", ")");
+ if (coverLine != null) {
+ coverLine = StringUtils.unhtml(coverLine).trim();
+ author = getImage(this, source, coverLine);
+ }
+
+ return author;
+ }
+
+ private String getAuthor(InputStream in) {
+ String author = null;
+
+ List<String> tagsAuthor = getTagsAuthor(in);
+ if (!tagsAuthor.isEmpty()) {
+ author = tagsAuthor.get(0);
+ }
+
+ return author;
+ }
+
+ private List<String> getTags(InputStream in) {
+ List<String> tags = new ArrayList<String>();
+ List<String> tagsAuthor = getTagsAuthor(in);
+
+ for (int i = 1; i < tagsAuthor.size(); i++) {
+ tags.add(tagsAuthor.get(i));
+ }
+
+ return tags;
+ }
+
+ private List<String> getTagsAuthor(InputStream in) {
+ List<String> tags = new ArrayList<String>();
+ String tagLine = getKeyLine(in, "<meta name=\"description\"", "Tags: ",
+ null);
+ if (tagLine != null) {
+ for (String tag : tagLine.split(",")) {
+ String candi = tag.trim();
+ if (!candi.isEmpty() && !tags.contains(candi)) {
+ tags.add(candi);
+ }
+ }
+ }
+
+ return tags;
+ }
+
+ private String getTitle(InputStream in) throws IOException {
+ String siteName = " - E-Hentai Galleries";
+
+ String title = getLine(in, "<title>", 0);
+ if (title != null) {
+ title = StringUtils.unhtml(title).trim();
+ if (title.endsWith(siteName)) {
+ title = title.substring(0, title.length() - siteName.length())
+ .trim();
+ }
+ }
+
+ return title;
+ }
+
+ @Override
+ protected String getDesc(URL source, InputStream in) throws IOException {
+ String desc = null;
+
+ String descLine = getKeyLine(in, "Uploader Comment", null,
+ "<div class=\"c7\"");
+ if (descLine != null) {
+ desc = StringUtils.unhtml(descLine);
+ }
+
+ return desc;
+ }
+
+ @Override
+ protected List<Entry<String, URL>> getChapters(URL source, InputStream in,
+ Progress pg) throws IOException {
+ List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
+ int last = 0; // no pool/show when only one page, first page == page 0
+
+ @SuppressWarnings("resource")
+ Scanner scan = new Scanner(in, "UTF-8");
+ scan.useDelimiter(">");
+ while (scan.hasNext()) {
+ String line = scan.next();
+ if (line.contains(source.toString())) {
+ String page = line.substring(line.indexOf(source.toString()));
+ String pkey = "?p=";
+ if (page.contains(pkey)) {
+ page = page.substring(page.indexOf(pkey) + pkey.length());
+ String number = "";
+ while (!page.isEmpty() && page.charAt(0) >= '0'
+ && page.charAt(0) <= '9') {
+ number += page.charAt(0);
+ page = page.substring(1);
+ }
+ if (number.isEmpty()) {
+ number = "0";
+ }
+
+ int current = Integer.parseInt(number);
+ if (last < current) {
+ last = current;
+ }
+ }
+ }
+ }
+
+ for (int i = 0; i <= last; i++) {
+ final String key = Integer.toString(i + 1);
+ final URL value = new URL(source.toString() + "?p=" + i);
+ urls.add(new Entry<String, URL>() {
+ public URL setValue(URL value) {
+ return null;
+ }
+
+ public URL getValue() {
+ return value;
+ }
+
+ public String getKey() {
+ return key;
+ }
+ });
+ }
+
+ return urls;
+ }
+
+ @Override
+ protected String getChapterContent(URL source, InputStream in, int number,
+ Progress pg) throws IOException {
+ String staticSite = "https://e-hentai.org/s/";
+ List<URL> pages = new ArrayList<URL>();
+
+ @SuppressWarnings("resource")
+ Scanner scan = new Scanner(in, "UTF-8");
+ scan.useDelimiter("\"");
+ while (scan.hasNext()) {
+ String line = scan.next();
+ if (line.startsWith(staticSite)) {
+ try {
+ pages.add(new URL(line));
+ } catch (MalformedURLException e) {
+ Instance.syserr(new IOException(
+ "Parsing error, a link is not correctly parsed: "
+ + line, e));
+ }
+ }
+ }
+
+ if (pg == null) {
+ pg = new Progress();
+ }
+ pg.setMinMax(0, pages.size());
+ pg.setProgress(0);
+
+ StringBuilder builder = new StringBuilder();
+
+ for (URL page : pages) {
+ InputStream pageIn = Instance.getCache().open(page, this, false);
+ try {
+ String link = getKeyLine(pageIn, "id=\"img\"", "src=\"", "\"");
+ if (link != null && !link.isEmpty()) {
+ builder.append("[");
+ builder.append(link);
+ builder.append("]<br/>");
+ }
+ pg.add(1);
+ } finally {
+ if (pageIn != null) {
+ pageIn.close();
+ }
+ }
+ }
+
+ pg.done();
+ return builder.toString();
+ }
+}