X-Git-Url: http://git.nikiroo.be/?p=fanfix.git;a=blobdiff_plain;f=src%2Fbe%2Fnikiroo%2Ffanfix%2Fsupported%2FBasicSupport.java;h=471147ea0548ede456aa66e01a6337c9ae4ef1ef;hp=b9517284318e30169f571109fb4611d789fa006d;hb=27dc71793c8d76712f01cc6fdef75195bb22483a;hpb=9252c65e13ceb952626da9e1f9e6d5caef42733e diff --git a/src/be/nikiroo/fanfix/supported/BasicSupport.java b/src/be/nikiroo/fanfix/supported/BasicSupport.java index b951728..471147e 100644 --- a/src/be/nikiroo/fanfix/supported/BasicSupport.java +++ b/src/be/nikiroo/fanfix/supported/BasicSupport.java @@ -1,12 +1,16 @@ package be.nikiroo.fanfix.supported; +import java.awt.image.BufferedImage; +import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.File; import java.io.IOException; import java.io.InputStream; +import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; +import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -21,6 +25,8 @@ import be.nikiroo.fanfix.data.MetaData; import be.nikiroo.fanfix.data.Paragraph; import be.nikiroo.fanfix.data.Paragraph.ParagraphType; import be.nikiroo.fanfix.data.Story; +import be.nikiroo.utils.IOUtils; +import be.nikiroo.utils.Progress; import be.nikiroo.utils.StringUtils; /** @@ -54,8 +60,14 @@ public abstract class BasicSupport { MANGAFOX, /** Furry website with comics support */ E621, + /** Furry website with stories */ + YIFFSTAR, + /** Comics and images groups, mostly but not only NSFW */ + E_HENTAI, /** CBZ files */ - CBZ; + CBZ, + /** HTML files */ + HTML; /** * A description of this support type (more information than the @@ -141,19 +153,18 @@ public abstract class BasicSupport { } } - /** Only used by {@link BasicSupport#getInput()} just so it is always reset. */ private InputStream in; private SupportType type; - private URL currentReferer; // with on 'r', as in 'HTTP'... + private URL currentReferer; // with only one 'r', as in 'HTTP'... // quote chars - private char openQuote = Instance.getTrans().getChar( + private char openQuote = Instance.getTrans().getCharacter( StringId.OPEN_SINGLE_QUOTE); - private char closeQuote = Instance.getTrans().getChar( + private char closeQuote = Instance.getTrans().getCharacter( StringId.CLOSE_SINGLE_QUOTE); - private char openDoubleQuote = Instance.getTrans().getChar( + private char openDoubleQuote = Instance.getTrans().getCharacter( StringId.OPEN_DOUBLE_QUOTE); - private char closeDoubleQuote = Instance.getTrans().getChar( + private char closeDoubleQuote = Instance.getTrans().getCharacter( StringId.CLOSE_DOUBLE_QUOTE); /** @@ -181,70 +192,7 @@ public abstract class BasicSupport { */ protected abstract boolean isHtml(); - /** - * Return the story title. - * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * - * @return the title - * - * @throws IOException - * in case of I/O error - */ - protected abstract String getTitle(URL source, InputStream in) - throws IOException; - - /** - * Return the story author. - * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * - * @return the author - * - * @throws IOException - * in case of I/O error - */ - protected abstract String getAuthor(URL source, InputStream in) - throws IOException; - - /** - * Return the story publication date. - * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * - * @return the date - * - * @throws IOException - * in case of I/O error - */ - protected abstract String getDate(URL source, InputStream in) - throws IOException; - - /** - * Return the subject of the story (for instance, if it is a fanfiction, - * what is the original work; if it is a technical text, what is the - * technical subject...). - * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * - * @return the subject - * - * @throws IOException - * in case of I/O error - */ - protected abstract String getSubject(URL source, InputStream in) + protected abstract MetaData getMeta(URL source, InputStream in) throws IOException; /** @@ -263,24 +211,6 @@ public abstract class BasicSupport { protected abstract String getDesc(URL source, InputStream in) throws IOException; - /** - * Return the story cover resource if any, or NULL if none. - *

- * The default cover should not be checked for here. - * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * - * @return the cover or NULL - * - * @throws IOException - * in case of I/O error - */ - protected abstract URL getCover(URL source, InputStream in) - throws IOException; - /** * Return the list of chapters (name and resource). * @@ -288,6 +218,8 @@ public abstract class BasicSupport { * the source of the story * @param in * the input (the main resource) + * @param pg + * the optional progress reporter * * @return the chapters * @@ -295,7 +227,7 @@ public abstract class BasicSupport { * in case of I/O error */ protected abstract List> getChapters(URL source, - InputStream in) throws IOException; + InputStream in, Progress pg) throws IOException; /** * Return the content of the chapter (possibly HTML encoded, if @@ -307,6 +239,8 @@ public abstract class BasicSupport { * the input (the main resource) * @param number * the chapter number + * @param pg + * the optional progress reporter * * @return the content * @@ -314,16 +248,16 @@ public abstract class BasicSupport { * in case of I/O error */ protected abstract String getChapterContent(URL source, InputStream in, - int number) throws IOException; + int number, Progress pg) throws IOException; /** - * Check if this {@link BasicSupport} is mainly catered to image files. + * Log into the support (can be a no-op depending upon the support). * - * @return TRUE if it is + * @throws IOException + * in case of I/O error */ - public boolean isImageDocument(URL source, InputStream in) - throws IOException { - return false; + public void login() throws IOException { + } /** @@ -334,11 +268,29 @@ public abstract class BasicSupport { * it. * * @return the cookies + * + * @throws IOException + * in case of I/O error */ - public Map getCookies() { + public Map getCookies() throws IOException { return new HashMap(); } + /** + * Return the canonical form of the main {@link URL}. + * + * @param source + * the source {@link URL} + * + * @return the canonical form of this {@link URL} + * + * @throws IOException + * in case of I/O error + */ + public URL getCanonicalUrl(URL source) throws IOException { + return source; + } + /** * Process the given story resource into a partially filled {@link Story} * object containing the name and metadata, except for the description. @@ -352,7 +304,7 @@ public abstract class BasicSupport { * in case of I/O error */ public Story processMeta(URL url) throws IOException { - return processMeta(url, true, false); + return processMeta(url, true, false, null); } /** @@ -364,46 +316,63 @@ public abstract class BasicSupport { * * @param close * close "this" and "in" when done + * @param pg + * the optional progress reporter * * @return the {@link Story} * * @throws IOException * in case of I/O error */ - protected Story processMeta(URL url, boolean close, boolean getDesc) - throws IOException { - in = Instance.getCache().open(url, this, false); + protected Story processMeta(URL url, boolean close, boolean getDesc, + Progress pg) throws IOException { + if (pg == null) { + pg = new Progress(); + } else { + pg.setMinMax(0, 100); + } + + login(); + pg.setProgress(10); + + url = getCanonicalUrl(url); + + setCurrentReferer(url); + + in = openInput(url); if (in == null) { return null; } try { - preprocess(getInput()); + preprocess(url, getInput()); + pg.setProgress(30); Story story = new Story(); - story.setMeta(new MetaData()); - story.getMeta().setTitle(ifUnhtml(getTitle(url, getInput()))); - story.getMeta().setAuthor( - fixAuthor(ifUnhtml(getAuthor(url, getInput())))); - story.getMeta().setDate(ifUnhtml(getDate(url, getInput()))); - story.getMeta().setTags(getTags(url, getInput())); - story.getMeta().setSource(getSourceName()); - story.getMeta().setPublisher( - ifUnhtml(getPublisher(url, getInput()))); - story.getMeta().setUuid(getUuid(url, getInput())); - story.getMeta().setLuid(getLuid(url, getInput())); - story.getMeta().setLang(getLang(url, getInput())); - story.getMeta().setSubject(ifUnhtml(getSubject(url, getInput()))); - story.getMeta().setImageDocument(isImageDocument(url, getInput())); + MetaData meta = getMeta(url, getInput()); + if (meta.getCreationDate() == null + || meta.getCreationDate().isEmpty()) { + meta.setCreationDate(StringUtils.fromTime(new Date().getTime())); + } + story.setMeta(meta); + + pg.setProgress(50); + + if (meta != null && meta.getCover() == null) { + meta.setCover(getDefaultCover(meta.getSubject())); + } + + pg.setProgress(60); if (getDesc) { String descChapterName = Instance.getTrans().getString( StringId.DESCRIPTION); story.getMeta().setResume( makeChapter(url, 0, descChapterName, - getDesc(url, getInput()))); + getDesc(url, getInput()), null)); } + pg.setProgress(100); return story; } finally { if (close) { @@ -417,6 +386,8 @@ public abstract class BasicSupport { in.close(); } } + + setCurrentReferer(null); } } @@ -426,64 +397,96 @@ public abstract class BasicSupport { * * @param url * the story resource + * @param pg + * the optional progress reporter * * @return the {@link Story} * * @throws IOException * in case of I/O error */ - public Story process(URL url) throws IOException { - setCurrentReferer(url); + public Story process(URL url, Progress pg) throws IOException { + if (pg == null) { + pg = new Progress(); + } else { + pg.setMinMax(0, 100); + } + url = getCanonicalUrl(url); + pg.setProgress(1); try { - Story story = processMeta(url, false, true); + Progress pgMeta = new Progress(); + pg.addProgress(pgMeta, 10); + Story story = processMeta(url, false, true, pgMeta); + if (!pgMeta.isDone()) { + pgMeta.setProgress(pgMeta.getMax()); // 10% + } + if (story == null) { + pg.setProgress(90); return null; } - story.setChapters(new ArrayList()); + pg.setName("Retrieving " + story.getMeta().getTitle()); - URL cover = getCover(url, getInput()); - if (cover == null) { - String subject = story.getMeta() == null ? null : story - .getMeta().getSubject(); - if (subject != null && !subject.isEmpty() - && Instance.getCoverDir() != null) { - File fileCover = new File(Instance.getCoverDir(), subject); - cover = getImage(fileCover.toURI().toURL(), subject); - } - } + setCurrentReferer(url); - if (cover != null) { - InputStream coverIn = null; - try { - coverIn = Instance.getCache().open(cover, this, true); - story.getMeta().setCover(StringUtils.toImage(coverIn)); - } catch (IOException e) { - Instance.syserr(new IOException(Instance.getTrans() - .getString(StringId.ERR_BS_NO_COVER, cover), e)); - } finally { - if (coverIn != null) - coverIn.close(); - } + Progress pgGetChapters = new Progress(); + pg.addProgress(pgGetChapters, 10); + story.setChapters(new ArrayList()); + List> chapters = getChapters(url, getInput(), + pgGetChapters); + if (!pgGetChapters.isDone()) { + pgGetChapters.setProgress(pgGetChapters.getMax()); // 20% } - List> chapters = getChapters(url, getInput()); - int i = 1; if (chapters != null) { + Progress pgChaps = new Progress("Extracting chapters", 0, + chapters.size() * 300); + pg.addProgress(pgChaps, 80); + + long words = 0; + int i = 1; for (Entry chap : chapters) { + pgChaps.setName("Extracting chapter " + i); setCurrentReferer(chap.getValue()); InputStream chapIn = Instance.getCache().open( chap.getValue(), this, true); + pgChaps.setProgress(i * 100); try { - story.getChapters().add( - makeChapter(url, i, chap.getKey(), - getChapterContent(url, chapIn, i))); + Progress pgGetChapterContent = new Progress(); + Progress pgMakeChapter = new Progress(); + pgChaps.addProgress(pgGetChapterContent, 100); + pgChaps.addProgress(pgMakeChapter, 100); + + String content = getChapterContent(url, chapIn, i, + pgGetChapterContent); + if (!pgGetChapterContent.isDone()) { + pgGetChapterContent.setProgress(pgGetChapterContent + .getMax()); + } + + Chapter cc = makeChapter(url, i, chap.getKey(), + content, pgMakeChapter); + if (!pgMakeChapter.isDone()) { + pgMakeChapter.setProgress(pgMakeChapter.getMax()); + } + + words += cc.getWords(); + story.getChapters().add(cc); + if (story.getMeta() != null) { + story.getMeta().setWords(words); + } } finally { chapIn.close(); } + i++; } + + pgChaps.setName("Extracting chapters"); + } else { + pg.setProgress(80); } return story; @@ -499,12 +502,12 @@ public abstract class BasicSupport { in.close(); } - currentReferer = null; + setCurrentReferer(null); } } /** - * The support type.$ + * The support type. * * @return the type */ @@ -547,191 +550,17 @@ public abstract class BasicSupport { } /** - * Return the story publisher (by default, - * {@link BasicSupport#getSourceName()}). - * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * - * @return the publisher - * - * @throws IOException - * in case of I/O error - */ - protected String getPublisher(URL source, InputStream in) - throws IOException { - return getSourceName(); - } - - /** - * Return the story UUID, a unique value representing the story (it is often - * an URL). - *

- * By default, this is the {@link URL} of the resource. - * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * - * @return the uuid - * - * @throws IOException - * in case of I/O error - */ - protected String getUuid(URL source, InputStream in) throws IOException { - return source.toString(); - } - - /** - * Return the story Library UID, a unique value representing the story (it - * is often a number) in the local library. - *

- * By default, this is empty. - * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * - * @return the id - * - * @throws IOException - * in case of I/O error - */ - protected String getLuid(URL source, InputStream in) throws IOException { - return ""; - } - - /** - * Return the 2-letter language code of this story. - *

- * By default, this is 'EN'. - * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * - * @return the language - * - * @throws IOException - * in case of I/O error - */ - protected String getLang(URL source, InputStream in) throws IOException { - return "EN"; - } - - /** - * Return the list of tags for this story. + * Prepare the support if needed before processing. * * @param source * the source of the story * @param in * the input (the main resource) * - * @return the tags - * - * @throws IOException - * in case of I/O error - */ - protected List getTags(URL source, InputStream in) - throws IOException { - return new ArrayList(); - } - - /** - * Return the first line from the given input which correspond to the given - * selectors. - *

- * Do not reset the input, which will be pointing at the line just after the - * result (input will be spent if no result is found). - * - * @param in - * the input - * @param needle - * a string that must be found inside the target line (also - * supports "^" at start to say "only if it starts with" the - * needle) - * @param relativeLine - * the line to return based upon the target line position (-1 = - * the line before, 0 = the target line...) - * - * @return the line - */ - protected String getLine(InputStream in, String needle, int relativeLine) { - return getLine(in, needle, relativeLine, true); - } - - /** - * Return a line from the given input which correspond to the given - * selectors. - *

- * Do not reset the input, which will be pointing at the line just after the - * result (input will be spent if no result is found) when first is TRUE, - * and will always be spent if first is FALSE. - * - * @param in - * the input - * @param needle - * a string that must be found inside the target line (also - * supports "^" at start to say "only if it starts with" the - * needle) - * @param relativeLine - * the line to return based upon the target line position (-1 = - * the line before, 0 = the target line...) - * @param first - * takes the first result (as opposed to the last one, which will - * also always spend the input) - * - * @return the line - */ - protected String getLine(InputStream in, String needle, int relativeLine, - boolean first) { - String rep = null; - - List lines = new ArrayList(); - @SuppressWarnings("resource") - Scanner scan = new Scanner(in, "UTF-8"); - int index = -1; - scan.useDelimiter("\\n"); - while (scan.hasNext()) { - lines.add(scan.next()); - - if (index == -1) { - if (needle.startsWith("^")) { - if (lines.get(lines.size() - 1).startsWith( - needle.substring(1))) { - index = lines.size() - 1; - } - - } else { - if (lines.get(lines.size() - 1).contains(needle)) { - index = lines.size() - 1; - } - } - } - - if (index >= 0 && index + relativeLine < lines.size()) { - rep = lines.get(index + relativeLine); - if (first) { - break; - } - } - } - - return rep; - } - - /** - * Prepare the support if needed before processing. - * * @throws IOException * on I/O error */ - protected void preprocess(InputStream in) throws IOException { + protected void preprocess(URL source, InputStream in) throws IOException { } /** @@ -753,6 +582,8 @@ public abstract class BasicSupport { * the chapter name * @param content * the chapter content + * @param pg + * the optional progress reporter * * @return the {@link Chapter} * @@ -760,8 +591,7 @@ public abstract class BasicSupport { * in case of I/O error */ protected Chapter makeChapter(URL source, int number, String name, - String content) throws IOException { - + String content, Progress pg) throws IOException { // Chapter name: process it correctly, then remove the possible // redundant "Chapter x: " in front of it String chapterName = processPara(name).getContent().trim(); @@ -788,38 +618,89 @@ public abstract class BasicSupport { Chapter chap = new Chapter(number, chapterName); - if (content == null) { - return chap; + if (content != null) { + List paras = makeParagraphs(source, content, pg); + long words = 0; + for (Paragraph para : paras) { + words += para.getWords(); + } + chap.setParagraphs(paras); + chap.setWords(words); + } + + return chap; + + } + + /** + * Convert the given content into {@link Paragraph}s. + * + * @param source + * the source URL of the story + * @param content + * the textual content + * @param pg + * the optional progress reporter + * + * @return the {@link Paragraph}s + * + * @throws IOException + * in case of I/O error + */ + protected List makeParagraphs(URL source, String content, + Progress pg) throws IOException { + if (pg == null) { + pg = new Progress(); } if (isHtml()) { // Special


processing: content = content.replaceAll("(
]*>)|(
)|(
)", - "\n* * *\n"); + "
* * *
"); } - InputStream in = new ByteArrayInputStream(content.getBytes("UTF-8")); - try { - @SuppressWarnings("resource") - Scanner scan = new Scanner(in, "UTF-8"); - scan.useDelimiter("(\\n|

)"); // \n for test,

for html - - List paras = new ArrayList(); - while (scan.hasNext()) { - String line = scan.next().trim(); - boolean image = false; - if (line.startsWith("[") && line.endsWith("]")) { - URL url = getImage(source, - line.substring(1, line.length() - 1).trim()); - if (url != null) { - paras.add(new Paragraph(url)); - image = true; + List paras = new ArrayList(); + + if (content != null && !content.trim().isEmpty()) { + if (isHtml()) { + String[] tab = content.split("(

|

|
|
)"); + pg.setMinMax(0, tab.length); + int i = 1; + for (String line : tab) { + if (line.startsWith("[") && line.endsWith("]")) { + pg.setName("Extracting image " + i); + } + paras.add(makeParagraph(source, line.trim())); + pg.setProgress(i++); + } + pg.setName(null); + } else { + List lines = new ArrayList(); + BufferedReader buff = null; + try { + buff = new BufferedReader( + new InputStreamReader(new ByteArrayInputStream( + content.getBytes("UTF-8")), "UTF-8")); + for (String line = buff.readLine(); line != null; line = buff + .readLine()) { + lines.add(line.trim()); + } + } finally { + if (buff != null) { + buff.close(); } } - if (!image) { - paras.add(processPara(line)); + pg.setMinMax(0, lines.size()); + int i = 0; + for (String line : lines) { + if (line.startsWith("[") && line.endsWith("]")) { + pg.setName("Extracting image " + i); + } + paras.add(makeParagraph(source, line)); + pg.setProgress(i++); } + pg.setName(null); } // Check quotes for "bad" format @@ -830,54 +711,114 @@ public abstract class BasicSupport { paras = newParas; // Remove double blanks/brks - boolean space = false; - boolean brk = true; - for (int i = 0; i < paras.size(); i++) { - Paragraph para = paras.get(i); - boolean thisSpace = para.getType() == ParagraphType.BLANK; - boolean thisBrk = para.getType() == ParagraphType.BREAK; - - if (space && thisBrk) { - paras.remove(i - 1); - i--; - } else if ((space || brk) && (thisSpace || thisBrk)) { - paras.remove(i); - i--; - } + fixBlanksBreaks(paras); + } - space = thisSpace; - brk = thisBrk; - } + return paras; + } - // Remove blank/brk at start - if (paras.size() > 0 - && (paras.get(0).getType() == ParagraphType.BLANK || paras - .get(0).getType() == ParagraphType.BREAK)) { - paras.remove(0); - } + /** + * Convert the given line into a single {@link Paragraph}. + * + * @param source + * the source URL of the story + * @param line + * the textual content of the paragraph + * + * @return the {@link Paragraph} + */ + private Paragraph makeParagraph(URL source, String line) { + URL image = null; + if (line.startsWith("[") && line.endsWith("]")) { + image = getImageUrl(this, source, + line.substring(1, line.length() - 1).trim()); + } + + if (image != null) { + return new Paragraph(image); + } else { + return processPara(line); + } + } - // Remove blank/brk at end - int last = paras.size() - 1; - if (paras.size() > 0 - && (paras.get(last).getType() == ParagraphType.BLANK || paras - .get(last).getType() == ParagraphType.BREAK)) { - paras.remove(last); + /** + * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of + * those {@link Paragraph}s. + *

+ * The resulting list will not contain a starting or trailing blank/break + * nor 2 blanks or breaks following each other. + * + * @param paras + * the list of {@link Paragraph}s to fix + */ + protected void fixBlanksBreaks(List paras) { + boolean space = false; + boolean brk = true; + for (int i = 0; i < paras.size(); i++) { + Paragraph para = paras.get(i); + boolean thisSpace = para.getType() == ParagraphType.BLANK; + boolean thisBrk = para.getType() == ParagraphType.BREAK; + + if (i > 0 && space && thisBrk) { + paras.remove(i - 1); + i--; + } else if ((space || brk) && (thisSpace || thisBrk)) { + paras.remove(i); + i--; } - chap.setParagraphs(paras); + space = thisSpace; + brk = thisBrk; + } - return chap; - } finally { - in.close(); + // Remove blank/brk at start + if (paras.size() > 0 + && (paras.get(0).getType() == ParagraphType.BLANK || paras.get( + 0).getType() == ParagraphType.BREAK)) { + paras.remove(0); + } + + // Remove blank/brk at end + int last = paras.size() - 1; + if (paras.size() > 0 + && (paras.get(last).getType() == ParagraphType.BLANK || paras + .get(last).getType() == ParagraphType.BREAK)) { + paras.remove(last); } } + /** + * Get the default cover related to this subject (see .info files). + * + * @param subject + * the subject + * + * @return the cover if any, or NULL + */ + static BufferedImage getDefaultCover(String subject) { + if (subject != null && !subject.isEmpty() + && Instance.getCoverDir() != null) { + try { + File fileCover = new File(Instance.getCoverDir(), subject); + return getImage(null, fileCover.toURI().toURL(), subject); + } catch (MalformedURLException e) { + } + } + + return null; + } + /** * Return the list of supported image extensions. * + * @param emptyAllowed + * TRUE to allow an empty extension on first place, which can be + * used when you may already have an extension in your input but + * are not sure about it + * * @return the extensions */ - protected String[] getImageExt(boolean emptyAllowed) { + static String[] getImageExt(boolean emptyAllowed) { if (emptyAllowed) { return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" }; } else { @@ -894,60 +835,115 @@ public abstract class BasicSupport { * @param line * the resource to check * - * @return the image URL if found, or NULL + * @return the image if found, or NULL * */ - protected URL getImage(URL source, String line) { - String path = new File(source.getFile()).getParent(); - URL url = null; - - // try for files - try { - String urlBase = new File(new File(path), line.trim()).toURI() - .toURL().toString(); - for (String ext : getImageExt(true)) { - if (new File(urlBase + ext).exists()) { - url = new File(urlBase + ext).toURI().toURL(); + static BufferedImage getImage(BasicSupport support, URL source, String line) { + URL url = getImageUrl(support, source, line); + if (url != null) { + InputStream in = null; + try { + in = Instance.getCache().open(url, getSupport(url), true); + return IOUtils.toImage(in); + } catch (IOException e) { + } finally { + if (in != null) { + try { + in.close(); + } catch (IOException e) { + } } } - } catch (Exception e) { - // Nothing to do here } - if (url == null) { - // try for URLs - try { - for (String ext : getImageExt(true)) { - if (Instance.getCache().check(new URL(line + ext))) { - url = new URL(line + ext); + return null; + } + + /** + * Check if the given resource can be a local image or a remote image, then + * refresh the cache with it if it is. + * + * @param source + * the story source + * @param line + * the resource to check + * + * @return the image URL if found, or NULL + * + */ + static URL getImageUrl(BasicSupport support, URL source, String line) { + URL url = null; + + if (line != null) { + // try for files + if (source != null) { + try { + + String relPath = null; + String absPath = null; + try { + String path = new File(source.getFile()).getParent(); + relPath = new File(new File(path), line.trim()) + .getAbsolutePath(); + } catch (Exception e) { + // Cannot be converted to path (one possibility to take + // into account: absolute path on Windows) + } + try { + absPath = new File(line.trim()).getAbsolutePath(); + } catch (Exception e) { + // Cannot be converted to path (at all) + } + + for (String ext : getImageExt(true)) { + if (absPath != null && new File(absPath + ext).exists()) { + url = new File(absPath + ext).toURI().toURL(); + } else if (relPath != null + && new File(relPath + ext).exists()) { + url = new File(relPath + ext).toURI().toURL(); + } } + } catch (Exception e) { + // Should not happen since we control the correct arguments } + } - // try out of cache - if (url == null) { + if (url == null) { + // try for URLs + try { for (String ext : getImageExt(true)) { - try { + if (Instance.getCache().check(new URL(line + ext))) { url = new URL(line + ext); - Instance.getCache().refresh(url, this, true); break; - } catch (IOException e) { - // no image with this ext - url = null; } } + + // try out of cache + if (url == null) { + for (String ext : getImageExt(true)) { + try { + url = new URL(line + ext); + Instance.getCache().refresh(url, support, true); + break; + } catch (IOException e) { + // no image with this ext + url = null; + } + } + } + } catch (MalformedURLException e) { + // Not an url } - } catch (MalformedURLException e) { - // Not an url } - } - // refresh the cached file - if (url != null) { - try { - Instance.getCache().refresh(url, this, true); - } catch (IOException e) { - // woops, broken image - url = null; + // refresh the cached file + if (url != null) { + try { + Instance.getCache().refresh(url, support, true); + } catch (IOException e) { + // woops, broken image + url = null; + } } } @@ -955,18 +951,45 @@ public abstract class BasicSupport { } /** - * Reset then return {@link BasicSupport#in}. + * Open the input file that will be used through the support. * - * @return {@link BasicSupport#in} + * @param source + * the source {@link URL} + * + * @return the {@link InputStream} * * @throws IOException * in case of I/O error */ - protected InputStream getInput() throws IOException { - in.reset(); + protected InputStream openInput(URL source) throws IOException { + return Instance.getCache().open(source, this, false); + } + + /** + * Reset the given {@link InputStream} and return it. + * + * @param in + * the {@link InputStream} to reset + * + * @return the same {@link InputStream} after reset + */ + protected InputStream reset(InputStream in) { + try { + in.reset(); + } catch (IOException e) { + } return in; } + /** + * Reset then return {@link BasicSupport#in}. + * + * @return {@link BasicSupport#in} + */ + protected InputStream getInput() { + return reset(in); + } + /** * Fix the author name if it is prefixed with some "by" {@link String}. * @@ -975,7 +998,7 @@ public abstract class BasicSupport { * * @return the author without prefixes */ - private String fixAuthor(String author) { + protected String fixAuthor(String author) { if (author != null) { for (String suffix : new String[] { " ", ":" }) { for (String byString : Instance.getConfig() @@ -1002,34 +1025,71 @@ public abstract class BasicSupport { * paragraphs (quotes or not)). * * @param para - * the paragraph to requotify (not necessaraly a quote) + * the paragraph to requotify (not necessarily a quote) * * @return the correctly (or so we hope) quotified paragraphs */ - private List requotify(Paragraph para) { + protected List requotify(Paragraph para) { List newParas = new ArrayList(); - if (para.getType() == ParagraphType.QUOTE) { + if (para.getType() == ParagraphType.QUOTE + && para.getContent().length() > 2) { String line = para.getContent(); boolean singleQ = line.startsWith("" + openQuote); boolean doubleQ = line.startsWith("" + openDoubleQuote); + // Do not try when more than one quote at a time + // (some stories are not easily readable if we do) + if (singleQ + && line.indexOf(closeQuote, 1) < line + .lastIndexOf(closeQuote)) { + newParas.add(para); + return newParas; + } + if (doubleQ + && line.indexOf(closeDoubleQuote, 1) < line + .lastIndexOf(closeDoubleQuote)) { + newParas.add(para); + return newParas; + } + // + if (!singleQ && !doubleQ) { line = openDoubleQuote + line + closeDoubleQuote; - newParas.add(new Paragraph(ParagraphType.QUOTE, line)); + newParas.add(new Paragraph(ParagraphType.QUOTE, line, para + .getWords())); } else { + char open = singleQ ? openQuote : openDoubleQuote; char close = singleQ ? closeQuote : closeDoubleQuote; - int posClose = line.indexOf(close); - int posDot = line.indexOf("."); - while (posDot >= 0 && posDot < posClose) { - posDot = line.indexOf(".", posDot + 1); + + int posDot = -1; + boolean inQuote = false; + int i = 0; + for (char car : line.toCharArray()) { + if (car == open) { + inQuote = true; + } else if (car == close) { + inQuote = false; + } else if (car == '.' && !inQuote) { + posDot = i; + break; + } + i++; } if (posDot >= 0) { String rest = line.substring(posDot + 1).trim(); line = line.substring(0, posDot + 1).trim(); - newParas.add(new Paragraph(ParagraphType.QUOTE, line)); - newParas.addAll(requotify(processPara(rest))); + long words = 1; + for (char car : line.toCharArray()) { + if (car == ' ') { + words++; + } + } + newParas.add(new Paragraph(ParagraphType.QUOTE, line, words)); + if (!rest.isEmpty()) { + newParas.addAll(requotify(processPara(rest))); + } } else { newParas.add(para); } @@ -1051,7 +1111,7 @@ public abstract class BasicSupport { * * @return the processed {@link Paragraph} */ - private Paragraph processPara(String line) { + protected Paragraph processPara(String line) { line = ifUnhtml(line).trim(); boolean space = true; @@ -1060,6 +1120,7 @@ public abstract class BasicSupport { boolean tentativeCloseQuote = false; char prev = '\0'; int dashCount = 0; + long words = 1; StringBuilder builder = new StringBuilder(); for (char car : line.toCharArray()) { @@ -1074,11 +1135,16 @@ public abstract class BasicSupport { if (tentativeCloseQuote) { tentativeCloseQuote = false; - if ((car >= 'a' && car <= 'z') || (car >= 'A' && car <= 'Z') - || (car >= '0' && car <= '9')) { + if (Character.isLetterOrDigit(car)) { builder.append("'"); } else { - builder.append(closeQuote); + // handle double-single quotes as double quotes + if (prev == car) { + builder.append(closeDoubleQuote); + continue; + } else { + builder.append(closeQuote); + } } } @@ -1088,15 +1154,31 @@ public abstract class BasicSupport { case '\t': case '\n': // just in case case '\r': // just in case + if (builder.length() > 0 + && builder.charAt(builder.length() - 1) != ' ') { + words++; + } builder.append(' '); break; case '\'': if (space || (brk && quote)) { quote = true; - builder.append(openQuote); - } else if (prev == ' ') { - builder.append(openQuote); + // handle double-single quotes as double quotes + if (prev == car) { + builder.deleteCharAt(builder.length() - 1); + builder.append(openDoubleQuote); + } else { + builder.append(openQuote); + } + } else if (prev == ' ' || prev == car) { + // handle double-single quotes as double quotes + if (prev == car) { + builder.deleteCharAt(builder.length() - 1); + builder.append(openDoubleQuote); + } else { + builder.append(openQuote); + } } else { // it is a quote ("I'm off") or a 'quote' ("This // 'good' restaurant"...) @@ -1149,7 +1231,13 @@ public abstract class BasicSupport { quote = true; builder.append(openQuote); } else { - builder.append(openQuote); + // handle double-single quotes as double quotes + if (prev == car) { + builder.deleteCharAt(builder.length() - 1); + builder.append(openDoubleQuote); + } else { + builder.append(openQuote); + } } space = false; brk = false; @@ -1162,7 +1250,13 @@ public abstract class BasicSupport { case '」': space = false; brk = false; - builder.append(closeQuote); + // handle double-single quotes as double quotes + if (prev == car) { + builder.deleteCharAt(builder.length() - 1); + builder.append(closeDoubleQuote); + } else { + builder.append(closeQuote); + } break; case '«': @@ -1216,11 +1310,11 @@ public abstract class BasicSupport { type = ParagraphType.QUOTE; } - return new Paragraph(type, line); + return new Paragraph(type, line, words); } /** - * Remove the HTML from the inpit if {@link BasicSupport#isHtml()} is + * Remove the HTML from the input if {@link BasicSupport#isHtml()} is * true. * * @param input @@ -1260,8 +1354,8 @@ public abstract class BasicSupport { } } - for (SupportType type : new SupportType[] { SupportType.TEXT, - SupportType.INFO_TEXT }) { + for (SupportType type : new SupportType[] { SupportType.INFO_TEXT, + SupportType.TEXT }) { BasicSupport support = getSupport(type); if (support != null && support.supports(url)) { return support; @@ -1295,10 +1389,142 @@ public abstract class BasicSupport { return new MangaFox().setType(type); case E621: return new E621().setType(type); + case YIFFSTAR: + return new YiffStar().setType(type); + case E_HENTAI: + return new EHentai().setType(type); case CBZ: return new Cbz().setType(type); + case HTML: + return new Html().setType(type); } return null; } + + /** + * Return the first line from the given input which correspond to the given + * selectors. + * + * @param in + * the input + * @param needle + * a string that must be found inside the target line (also + * supports "^" at start to say "only if it starts with" the + * needle) + * @param relativeLine + * the line to return based upon the target line position (-1 = + * the line before, 0 = the target line...) + * + * @return the line + */ + static String getLine(InputStream in, String needle, int relativeLine) { + return getLine(in, needle, relativeLine, true); + } + + /** + * Return a line from the given input which correspond to the given + * selectors. + * + * @param in + * the input + * @param needle + * a string that must be found inside the target line (also + * supports "^" at start to say "only if it starts with" the + * needle) + * @param relativeLine + * the line to return based upon the target line position (-1 = + * the line before, 0 = the target line...) + * @param first + * takes the first result (as opposed to the last one, which will + * also always spend the input) + * + * @return the line + */ + static String getLine(InputStream in, String needle, int relativeLine, + boolean first) { + String rep = null; + + try { + in.reset(); + } catch (IOException e) { + Instance.syserr(e); + } + + List lines = new ArrayList(); + @SuppressWarnings("resource") + Scanner scan = new Scanner(in, "UTF-8"); + int index = -1; + scan.useDelimiter("\\n"); + while (scan.hasNext()) { + lines.add(scan.next()); + + if (index == -1) { + if (needle.startsWith("^")) { + if (lines.get(lines.size() - 1).startsWith( + needle.substring(1))) { + index = lines.size() - 1; + } + + } else { + if (lines.get(lines.size() - 1).contains(needle)) { + index = lines.size() - 1; + } + } + } + + if (index >= 0 && index + relativeLine < lines.size()) { + rep = lines.get(index + relativeLine); + if (first) { + break; + } + } + } + + return rep; + } + + /** + * Return the text between the key and the endKey (and optional subKey can + * be passed, in this case we will look for the key first, then take the + * text between the subKey and the endKey). + *

+ * Will only match the first line with the given key if more than one are + * possible. Which also means that if the subKey or endKey is not found on + * that line, NULL will be returned. + * + * @param in + * the input + * @param key + * the key to match (also supports "^" at start to say + * "only if it starts with" the key) + * @param subKey + * the sub key or NULL if none + * @param endKey + * the end key or NULL for "up to the end" + * @return the text or NULL if not found + */ + static String getKeyLine(InputStream in, String key, String subKey, + String endKey) { + String result = null; + + String line = getLine(in, key, 0); + if (line != null && line.contains(key)) { + line = line.substring(line.indexOf(key) + key.length()); + if (subKey == null || subKey.isEmpty() || line.contains(subKey)) { + if (subKey != null) { + line = line.substring(line.indexOf(subKey) + + subKey.length()); + } + if (endKey == null || line.contains(endKey)) { + if (endKey != null) { + line = line.substring(0, line.indexOf(endKey)); + result = line; + } + } + } + } + + return result; + } }