From: Niki Roo Date: Tue, 18 Apr 2017 18:13:49 +0000 (+0200) Subject: New website supported on request: e-hentai.org X-Git-Url: https://git.nikiroo.be/?a=commitdiff_plain;h=f0608ab10e762c1aed9608720b97c1901b5e2614;p=fanfix-jexer.git New website supported on request: e-hentai.org --- diff --git a/README.md b/README.md index d9c47e3..f66be31 100644 --- a/README.md +++ b/README.md @@ -98,6 +98,7 @@ Currently missing, but either in progress or planned: - [ ] [Two Kinds](http://twokinds.keenspot.com/) - [ ] [Slightly damned](http://www.sdamned.com/) - [ ] Others? Any ideas? + - [x] [e-Hentai](https://e-hentai.org/) requested - [x] A GUI (work in progress) - [x] Make one - [x] Make it run when no args passed diff --git a/changelog.md b/changelog.md index 3a99d86..837260e 100644 --- a/changelog.md +++ b/changelog.md @@ -1,6 +1,7 @@ # Fanfix ## Version wip +- New website supported following a request: e-hentai.org - Library: perf improvement when retrieving the stories (cover not loaded when not needed) - Library: fix the covers that were not always removed when deleting a story - UI: perf improvement when displaying books (cover resized then cached) diff --git a/src/be/nikiroo/fanfix/bundles/StringId.java b/src/be/nikiroo/fanfix/bundles/StringId.java index b910aef..78b271d 100644 --- a/src/be/nikiroo/fanfix/bundles/StringId.java +++ b/src/be/nikiroo/fanfix/bundles/StringId.java @@ -78,6 +78,8 @@ public enum StringId { INPUT_DESC_MANGAFOX, // @Meta(what = "input format description", where = "SupportType", format = "", info = "Description of this input type") INPUT_DESC_E621, // + @Meta(what = "input format description", where = "SupportType", format = "", info = "Description of this input type") + INPUT_DESC_E_HENTAI, // @Meta(what = "output format description", where = "OutputType", format = "%s = type", info = "Default description when the type is not known by i18n") OUTPUT_DESC, // @Meta(what = "output format description", where = "OutputType", format = "", info = "Description of this output type") diff --git a/src/be/nikiroo/fanfix/bundles/resources.properties b/src/be/nikiroo/fanfix/bundles/resources.properties index 1d142f3..c5c3137 100644 --- a/src/be/nikiroo/fanfix/bundles/resources.properties +++ b/src/be/nikiroo/fanfix/bundles/resources.properties @@ -52,19 +52,15 @@ ERR_BS_NO_COVER = Failed to download cover: %s # (WHAT: char, WHERE: LaTeX/BasicSupport, FORMAT: single char) # Canonical OPEN SINGLE QUOTE char (for instance: `) OPEN_SINGLE_QUOTE = ` -OPEN_SINGLE_QUOTE_NOUTF = ' # (WHAT: char, WHERE: LaTeX/BasicSupport, FORMAT: single char) # Canonical CLOSE SINGLE QUOTE char (for instance: ‘) CLOSE_SINGLE_QUOTE = ‘ -CLOSE_SINGLE_QUOTE_NOUTF = ' # (WHAT: char, WHERE: LaTeX/BasicSupport, FORMAT: single char) # Canonical OPEN DOUBLE QUOTE char (for instance: “) OPEN_DOUBLE_QUOTE = “ -OPEN_DOUBLE_QUOTE_NOUTF = " # (WHAT: char, WHERE: LaTeX/BasicSupport, FORMAT: single char) # Canonical CLOSE DOUBLE QUOTE char (for instance: ”) CLOSE_DOUBLE_QUOTE = ” -CLOSE_DOUBLE_QUOTE_NOUTF = " # (WHAT: chapter name, WHERE: BasicSupport) # Name of the description fake chapter DESCRIPTION = Description @@ -103,6 +99,9 @@ INPUT_DESC_MANGAFOX = A well filled repository of mangas, or, as their website s # (WHAT: input format description, WHERE: SupportType) # Description of this input type INPUT_DESC_E621 = Furry website supporting comics, including MLP +# (WHAT: input format description, WHERE: SupportType) +# Description of this input type +INPUT_DESC_E_HENTAI = Website offering many comics/manga, mostly but not always NSFW (hence 'Hentai'); MLP comics can be found there, too # (WHAT: output format description, WHERE: OutputType, FORMAT: %s = type) # Default description when the type is not known by i18n OUTPUT_DESC = Unknown type: %s diff --git a/src/be/nikiroo/fanfix/supported/BasicSupport.java b/src/be/nikiroo/fanfix/supported/BasicSupport.java index b230bc3..c0419aa 100644 --- a/src/be/nikiroo/fanfix/supported/BasicSupport.java +++ b/src/be/nikiroo/fanfix/supported/BasicSupport.java @@ -62,6 +62,8 @@ public abstract class BasicSupport { E621, /** Furry website with stories */ YIFFSTAR, + /** Comics and images groups, mostly but not only NSFW */ + E_HENTAI, /** CBZ files */ CBZ, /** HTML files */ @@ -1389,6 +1391,8 @@ public abstract class BasicSupport { return new E621().setType(type); case YIFFSTAR: return new YiffStar().setType(type); + case E_HENTAI: + return new EHentai().setType(type); case CBZ: return new Cbz().setType(type); case HTML: @@ -1479,4 +1483,47 @@ public abstract class BasicSupport { return rep; } + + /** + * Return the text between the key and the endKey (and optional subKey can + * be passed, in this case we will look for the key first, then take the + * text between the subKey and the endKey). + *

+ * Will only match the first line with the given key if more than one are + * possible. Which also means that if the subKey or endKey is not found on + * that line, NULL will be returned. + * + * @param in + * the input + * @param key + * the key to match + * @param subKey + * the sub key or NULL if none + * @param endKey + * the end key or NULL for "up to the end" + * @return the text or NULL if not found + */ + static String getKeyLine(InputStream in, String key, String subKey, + String endKey) { + String result = null; + + String line = getLine(in, key, 0); + if (line != null && line.contains(key)) { + line = line.substring(line.indexOf(key) + key.length()); + if (subKey == null || subKey.isEmpty() || line.contains(subKey)) { + if (subKey != null) { + line = line.substring(line.indexOf(subKey) + + subKey.length()); + } + if (endKey == null || line.contains(endKey)) { + if (endKey != null) { + line = line.substring(0, line.indexOf(endKey)); + result = line; + } + } + } + } + + return result; + } } diff --git a/src/be/nikiroo/fanfix/supported/EHentai.java b/src/be/nikiroo/fanfix/supported/EHentai.java new file mode 100644 index 0000000..eae377c --- /dev/null +++ b/src/be/nikiroo/fanfix/supported/EHentai.java @@ -0,0 +1,264 @@ +package be.nikiroo.fanfix.supported; + +import java.awt.image.BufferedImage; +import java.io.IOException; +import java.io.InputStream; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.Map.Entry; +import java.util.Scanner; + +import be.nikiroo.fanfix.Instance; +import be.nikiroo.fanfix.data.Chapter; +import be.nikiroo.fanfix.data.MetaData; +import be.nikiroo.fanfix.data.Story; +import be.nikiroo.utils.Progress; +import be.nikiroo.utils.StringUtils; + +/** + * Support class for e-hentai.org, a website + * supporting mostly but not always NSFW comics, including some of MLP. + * + * @author niki + */ +class EHentai extends BasicSupport { + @Override + public String getSourceName() { + return "e-hentai.org"; + } + + @Override + protected MetaData getMeta(URL source, InputStream in) throws IOException { + MetaData meta = new MetaData(); + + meta.setTitle(getTitle(reset(in))); + meta.setAuthor(getAuthor(reset(in))); + meta.setDate(""); + meta.setTags(getTags(reset(in))); + meta.setSource(getSourceName()); + meta.setUrl(source.toString()); + meta.setPublisher(getSourceName()); + meta.setUuid(source.toString()); + meta.setLuid(""); + meta.setLang("EN"); + meta.setSubject("Furry"); + meta.setType(getType().toString()); + meta.setImageDocument(true); + meta.setCover(getCover(source, reset(in))); + meta.setFakeCover(true); + + return meta; + } + + @Override + public Story process(URL url, Progress pg) throws IOException { + // There is no chapters on e621, just pagination... + Story story = super.process(url, pg); + + Chapter only = new Chapter(1, null); + for (Chapter chap : story) { + only.getParagraphs().addAll(chap.getParagraphs()); + } + + story.getChapters().clear(); + story.getChapters().add(only); + + return story; + } + + @Override + protected boolean supports(URL url) { + return "e-hentai.org".equals(url.getHost()); + } + + @Override + protected boolean isHtml() { + return true; + } + + private BufferedImage getCover(URL source, InputStream in) + throws IOException { + BufferedImage author = null; + String coverLine = getKeyLine(in, "

tagsAuthor = getTagsAuthor(in); + if (!tagsAuthor.isEmpty()) { + author = tagsAuthor.get(0); + } + + return author; + } + + private List getTags(InputStream in) { + List tags = new ArrayList(); + List tagsAuthor = getTagsAuthor(in); + + for (int i = 1; i < tagsAuthor.size(); i++) { + tags.add(tagsAuthor.get(i)); + } + + return tags; + } + + private List getTagsAuthor(InputStream in) { + List tags = new ArrayList(); + String tagLine = getKeyLine(in, "", 0); + if (title != null) { + title = StringUtils.unhtml(title).trim(); + if (title.endsWith(siteName)) { + title = title.substring(0, title.length() - siteName.length()) + .trim(); + } + } + + return title; + } + + @Override + protected String getDesc(URL source, InputStream in) throws IOException { + String desc = null; + + String descLine = getKeyLine(in, "Uploader Comment", null, + "
> getChapters(URL source, InputStream in, + Progress pg) throws IOException { + List> urls = new ArrayList>(); + int last = 0; // no pool/show when only one page, first page == page 0 + + @SuppressWarnings("resource") + Scanner scan = new Scanner(in, "UTF-8"); + scan.useDelimiter(">"); + while (scan.hasNext()) { + String line = scan.next(); + if (line.contains(source.toString())) { + String page = line.substring(line.indexOf(source.toString())); + String pkey = "?p="; + if (page.contains(pkey)) { + page = page.substring(page.indexOf(pkey) + pkey.length()); + String number = ""; + while (!page.isEmpty() && page.charAt(0) >= '0' + && page.charAt(0) <= '9') { + number += page.charAt(0); + page = page.substring(1); + } + if (number.isEmpty()) { + number = "0"; + } + + int current = Integer.parseInt(number); + if (last < current) { + last = current; + } + } + } + } + + for (int i = 0; i <= last; i++) { + final String key = Integer.toString(i + 1); + final URL value = new URL(source.toString() + "?p=" + i); + urls.add(new Entry() { + public URL setValue(URL value) { + return null; + } + + public URL getValue() { + return value; + } + + public String getKey() { + return key; + } + }); + } + + return urls; + } + + @Override + protected String getChapterContent(URL source, InputStream in, int number, + Progress pg) throws IOException { + String staticSite = "https://e-hentai.org/s/"; + List pages = new ArrayList(); + + @SuppressWarnings("resource") + Scanner scan = new Scanner(in, "UTF-8"); + scan.useDelimiter("\""); + while (scan.hasNext()) { + String line = scan.next(); + if (line.startsWith(staticSite)) { + try { + pages.add(new URL(line)); + } catch (MalformedURLException e) { + Instance.syserr(new IOException( + "Parsing error, a link is not correctly parsed: " + + line, e)); + } + } + } + + if (pg == null) { + pg = new Progress(); + } + pg.setMinMax(0, pages.size()); + pg.setProgress(0); + + StringBuilder builder = new StringBuilder(); + + for (URL page : pages) { + InputStream pageIn = Instance.getCache().open(page, this, false); + try { + String link = getKeyLine(pageIn, "id=\"img\"", "src=\"", "\""); + if (link != null && !link.isEmpty()) { + builder.append("["); + builder.append(link); + builder.append("]
"); + } + pg.add(1); + } finally { + if (pageIn != null) { + pageIn.close(); + } + } + } + + pg.done(); + return builder.toString(); + } +}