X-Git-Url: http://git.nikiroo.be/?a=blobdiff_plain;f=src%2Fbe%2Fnikiroo%2Ffanfix%2Fsupported%2FBasicSupport.java;h=fb261fb3a27fccc04ca40f98ffc56a596ac41427;hb=f1fb834c62f9d9a73edeeda3fed060e0dede8cef;hp=f255d5ed1c30eefb6767d5795c66dcca23baa39c;hpb=793f1071fae48daed3b545a03a286c85e527d244;p=fanfix.git diff --git a/src/be/nikiroo/fanfix/supported/BasicSupport.java b/src/be/nikiroo/fanfix/supported/BasicSupport.java index f255d5e..fb261fb 100644 --- a/src/be/nikiroo/fanfix/supported/BasicSupport.java +++ b/src/be/nikiroo/fanfix/supported/BasicSupport.java @@ -25,7 +25,7 @@ import be.nikiroo.fanfix.data.MetaData; import be.nikiroo.fanfix.data.Paragraph; import be.nikiroo.fanfix.data.Paragraph.ParagraphType; import be.nikiroo.fanfix.data.Story; -import be.nikiroo.utils.IOUtils; +import be.nikiroo.utils.ImageUtils; import be.nikiroo.utils.Progress; import be.nikiroo.utils.StringUtils; @@ -62,6 +62,8 @@ public abstract class BasicSupport { E621, /** Furry website with stories */ YIFFSTAR, + /** Comics and images groups, mostly but not only NSFW */ + E_HENTAI, /** CBZ files */ CBZ, /** HTML files */ @@ -104,7 +106,8 @@ public abstract class BasicSupport { } /** - * Call {@link SupportType#valueOf(String.toUpperCase())}. + * Call {@link SupportType#valueOf(String)} after conversion to upper + * case. * * @param typeName * the possible type name @@ -117,8 +120,8 @@ public abstract class BasicSupport { } /** - * Call {@link SupportType#valueOf(String.toUpperCase())} but return - * NULL for NULL instead of raising exception. + * Call {@link SupportType#valueOf(String)} after conversion to upper + * case but return NULL for NULL instead of raising exception. * * @param typeName * the possible type name @@ -134,8 +137,9 @@ public abstract class BasicSupport { } /** - * Call {@link SupportType#valueOf(String.toUpperCase())} but return - * NULL in case of error instead of raising an exception. + * Call {@link SupportType#valueOf(String)} after conversion to upper + * case but return NULL in case of error instead of raising an + * exception. * * @param typeName * the possible type name @@ -156,13 +160,13 @@ public abstract class BasicSupport { private URL currentReferer; // with only one 'r', as in 'HTTP'... // quote chars - private char openQuote = Instance.getTrans().getChar( + private char openQuote = Instance.getTrans().getCharacter( StringId.OPEN_SINGLE_QUOTE); - private char closeQuote = Instance.getTrans().getChar( + private char closeQuote = Instance.getTrans().getCharacter( StringId.CLOSE_SINGLE_QUOTE); - private char openDoubleQuote = Instance.getTrans().getChar( + private char openDoubleQuote = Instance.getTrans().getCharacter( StringId.OPEN_DOUBLE_QUOTE); - private char closeDoubleQuote = Instance.getTrans().getChar( + private char closeDoubleQuote = Instance.getTrans().getCharacter( StringId.CLOSE_DOUBLE_QUOTE); /** @@ -190,6 +194,19 @@ public abstract class BasicSupport { */ protected abstract boolean isHtml(); + /** + * Return the {@link MetaData} of this story. + * + * @param source + * the source of the story + * @param in + * the input (the main resource) + * + * @return the associated {@link MetaData} + * + * @throws IOException + * in case of I/O error + */ protected abstract MetaData getMeta(URL source, InputStream in) throws IOException; @@ -216,6 +233,8 @@ public abstract class BasicSupport { * the source of the story * @param in * the input (the main resource) + * @param pg + * the optional progress reporter * * @return the chapters * @@ -223,7 +242,7 @@ public abstract class BasicSupport { * in case of I/O error */ protected abstract List> getChapters(URL source, - InputStream in) throws IOException; + InputStream in, Progress pg) throws IOException; /** * Return the content of the chapter (possibly HTML encoded, if @@ -235,6 +254,8 @@ public abstract class BasicSupport { * the input (the main resource) * @param number * the chapter number + * @param pg + * the optional progress reporter * * @return the content * @@ -242,7 +263,7 @@ public abstract class BasicSupport { * in case of I/O error */ protected abstract String getChapterContent(URL source, InputStream in, - int number) throws IOException; + int number, Progress pg) throws IOException; /** * Log into the support (can be a no-op depending upon the support). @@ -250,8 +271,8 @@ public abstract class BasicSupport { * @throws IOException * in case of I/O error */ + @SuppressWarnings("unused") public void login() throws IOException { - } /** @@ -262,14 +283,20 @@ public abstract class BasicSupport { * it. * * @return the cookies - * - * @throws IOException - * in case of I/O error */ - public Map getCookies() throws IOException { + public Map getCookies() { return new HashMap(); } + /** + * OAuth authorisation (aka, "bearer XXXXXXX"). + * + * @return the OAuth string + */ + public String getOAuth() { + return null; + } + /** * Return the canonical form of the main {@link URL}. * @@ -281,6 +308,7 @@ public abstract class BasicSupport { * @throws IOException * in case of I/O error */ + @SuppressWarnings("unused") public URL getCanonicalUrl(URL source) throws IOException { return source; } @@ -298,7 +326,7 @@ public abstract class BasicSupport { * in case of I/O error */ public Story processMeta(URL url) throws IOException { - return processMeta(url, true, false); + return processMeta(url, true, false, null); } /** @@ -307,30 +335,37 @@ public abstract class BasicSupport { * * @param url * the story resource - * * @param close * close "this" and "in" when done + * @param getDesc + * retrieve the description of the story, or not + * @param pg + * the optional progress reporter * * @return the {@link Story} * * @throws IOException * in case of I/O error */ - protected Story processMeta(URL url, boolean close, boolean getDesc) - throws IOException { + protected Story processMeta(URL url, boolean close, boolean getDesc, + Progress pg) throws IOException { + if (pg == null) { + pg = new Progress(); + } else { + pg.setMinMax(0, 100); + } + login(); + pg.setProgress(10); url = getCanonicalUrl(url); setCurrentReferer(url); - in = openInput(url); - if (in == null) { - return null; - } - + in = openInput(url); // NULL allowed here try { preprocess(url, getInput()); + pg.setProgress(30); Story story = new Story(); MetaData meta = getMeta(url, getInput()); @@ -340,18 +375,23 @@ public abstract class BasicSupport { } story.setMeta(meta); - if (meta != null && meta.getCover() == null) { + pg.setProgress(50); + + if (meta.getCover() == null) { meta.setCover(getDefaultCover(meta.getSubject())); } + pg.setProgress(60); + if (getDesc) { String descChapterName = Instance.getTrans().getString( StringId.DESCRIPTION); story.getMeta().setResume( makeChapter(url, 0, descChapterName, - getDesc(url, getInput()))); + getDesc(url, getInput()), null)); } + pg.setProgress(100); return story; } finally { if (close) { @@ -394,46 +434,83 @@ public abstract class BasicSupport { url = getCanonicalUrl(url); pg.setProgress(1); try { - Story story = processMeta(url, false, true); - pg.setProgress(10); + Progress pgMeta = new Progress(); + pg.addProgress(pgMeta, 10); + Story story = processMeta(url, false, true, pgMeta); + if (!pgMeta.isDone()) { + pgMeta.setProgress(pgMeta.getMax()); // 10% + } + if (story == null) { - pg.setProgress(100); + pg.setProgress(90); return null; } + pg.setName("Retrieving " + story.getMeta().getTitle()); + setCurrentReferer(url); + Progress pgGetChapters = new Progress(); + pg.addProgress(pgGetChapters, 10); story.setChapters(new ArrayList()); + List> chapters = getChapters(url, getInput(), + pgGetChapters); + if (!pgGetChapters.isDone()) { + pgGetChapters.setProgress(pgGetChapters.getMax()); // 20% + } - List> chapters = getChapters(url, getInput()); - pg.setProgress(20); - - int i = 1; if (chapters != null) { - Progress pgChaps = new Progress(0, chapters.size()); + Progress pgChaps = new Progress("Extracting chapters", 0, + chapters.size() * 300); pg.addProgress(pgChaps, 80); long words = 0; + int i = 1; for (Entry chap : chapters) { - setCurrentReferer(chap.getValue()); - InputStream chapIn = Instance.getCache().open( - chap.getValue(), this, true); + pgChaps.setName("Extracting chapter " + i); + InputStream chapIn = null; + if (chap.getValue() != null) { + setCurrentReferer(chap.getValue()); + chapIn = Instance.getCache().open(chap.getValue(), + this, true); + } + pgChaps.setProgress(i * 100); try { + Progress pgGetChapterContent = new Progress(); + Progress pgMakeChapter = new Progress(); + pgChaps.addProgress(pgGetChapterContent, 100); + pgChaps.addProgress(pgMakeChapter, 100); + + String content = getChapterContent(url, chapIn, i, + pgGetChapterContent); + if (!pgGetChapterContent.isDone()) { + pgGetChapterContent.setProgress(pgGetChapterContent + .getMax()); + } + Chapter cc = makeChapter(url, i, chap.getKey(), - getChapterContent(url, chapIn, i)); + content, pgMakeChapter); + if (!pgMakeChapter.isDone()) { + pgMakeChapter.setProgress(pgMakeChapter.getMax()); + } + words += cc.getWords(); story.getChapters().add(cc); if (story.getMeta() != null) { story.getMeta().setWords(words); } } finally { - chapIn.close(); + if (chapIn != null) { + chapIn.close(); + } } - pgChaps.setProgress(i++); + i++; } + + pgChaps.setName("Extracting chapters"); } else { - pg.setProgress(100); + pg.setProgress(80); } return story; @@ -507,6 +584,7 @@ public abstract class BasicSupport { * @throws IOException * on I/O error */ + @SuppressWarnings("unused") protected void preprocess(URL source, InputStream in) throws IOException { } @@ -516,6 +594,7 @@ public abstract class BasicSupport { * @throws IOException * on I/O error */ + @SuppressWarnings("unused") protected void close() throws IOException { } @@ -523,12 +602,16 @@ public abstract class BasicSupport { * Create a {@link Chapter} object from the given information, formatting * the content as it should be. * + * @param source + * the source of the story * @param number * the chapter number * @param name * the chapter name * @param content * the chapter content + * @param pg + * the optional progress reporter * * @return the {@link Chapter} * @@ -536,9 +619,10 @@ public abstract class BasicSupport { * in case of I/O error */ protected Chapter makeChapter(URL source, int number, String name, - String content) throws IOException { + String content, Progress pg) throws IOException { // Chapter name: process it correctly, then remove the possible - // redundant "Chapter x: " in front of it + // redundant "Chapter x: " in front of it, or "-" (as in + // "Chapter 5: - Fun!" after the ": " was automatically added) String chapterName = processPara(name).getContent().trim(); for (String lang : Instance.getConfig().getString(Config.CHAPTER) .split(",")) { @@ -556,7 +640,7 @@ public abstract class BasicSupport { Integer.toString(number).length()).trim(); } - if (chapterName.startsWith(":")) { + while (chapterName.startsWith(":") || chapterName.startsWith("-")) { chapterName = chapterName.substring(1).trim(); } // @@ -564,7 +648,7 @@ public abstract class BasicSupport { Chapter chap = new Chapter(number, chapterName); if (content != null) { - List paras = makeParagraphs(source, content); + List paras = makeParagraphs(source, content, pg); long words = 0; for (Paragraph para : paras) { words += para.getWords(); @@ -584,66 +668,106 @@ public abstract class BasicSupport { * the source URL of the story * @param content * the textual content + * @param pg + * the optional progress reporter * * @return the {@link Paragraph}s * * @throws IOException * in case of I/O error */ - protected List makeParagraphs(URL source, String content) - throws IOException { + protected List makeParagraphs(URL source, String content, + Progress pg) throws IOException { + if (pg == null) { + pg = new Progress(); + } + if (isHtml()) { // Special
processing: content = content.replaceAll("(
]*>)|(
)|(
)", - "\n* * *\n"); + "
* * *
"); } List paras = new ArrayList(); - InputStream in = new ByteArrayInputStream(content.getBytes("UTF-8")); - try { - BufferedReader buff = new BufferedReader(new InputStreamReader(in, - "UTF-8")); - - for (String encodedLine = buff.readLine(); encodedLine != null; encodedLine = buff - .readLine()) { - String lines[]; - if (isHtml()) { - lines = encodedLine.split("(

|

|
|
|\\n)"); - } else { - lines = new String[] { encodedLine }; - } - - for (String aline : lines) { - String line = aline.trim(); - URL image = null; + if (content != null && !content.trim().isEmpty()) { + if (isHtml()) { + String[] tab = content.split("(

|

|
|
)"); + pg.setMinMax(0, tab.length); + int i = 1; + for (String line : tab) { if (line.startsWith("[") && line.endsWith("]")) { - image = getImageUrl(this, source, - line.substring(1, line.length() - 1).trim()); + pg.setName("Extracting image " + i); } + paras.add(makeParagraph(source, line.trim())); + pg.setProgress(i++); + } + pg.setName(null); + } else { + List lines = new ArrayList(); + BufferedReader buff = null; + try { + buff = new BufferedReader( + new InputStreamReader(new ByteArrayInputStream( + content.getBytes("UTF-8")), "UTF-8")); + for (String line = buff.readLine(); line != null; line = buff + .readLine()) { + lines.add(line.trim()); + } + } finally { + if (buff != null) { + buff.close(); + } + } - if (image != null) { - paras.add(new Paragraph(image)); - } else { - paras.add(processPara(line)); + pg.setMinMax(0, lines.size()); + int i = 0; + for (String line : lines) { + if (line.startsWith("[") && line.endsWith("]")) { + pg.setName("Extracting image " + i); } + paras.add(makeParagraph(source, line)); + pg.setProgress(i++); } + pg.setName(null); } - } finally { - in.close(); + + // Check quotes for "bad" format + List newParas = new ArrayList(); + for (Paragraph para : paras) { + newParas.addAll(requotify(para)); + } + paras = newParas; + + // Remove double blanks/brks + fixBlanksBreaks(paras); } - // Check quotes for "bad" format - List newParas = new ArrayList(); - for (Paragraph para : paras) { - newParas.addAll(requotify(para)); + return paras; + } + + /** + * Convert the given line into a single {@link Paragraph}. + * + * @param source + * the source URL of the story + * @param line + * the textual content of the paragraph + * + * @return the {@link Paragraph} + */ + private Paragraph makeParagraph(URL source, String line) { + URL image = null; + if (line.startsWith("[") && line.endsWith("]")) { + image = getImageUrl(this, source, + line.substring(1, line.length() - 1).trim()); } - paras = newParas; - // Remove double blanks/brks - fixBlanksBreaks(paras); + if (image != null) { + return new Paragraph(image); + } - return paras; + return processPara(line); } /** @@ -726,9 +850,9 @@ public abstract class BasicSupport { static String[] getImageExt(boolean emptyAllowed) { if (emptyAllowed) { return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" }; - } else { - return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" }; } + + return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" }; } /** @@ -749,7 +873,7 @@ public abstract class BasicSupport { InputStream in = null; try { in = Instance.getCache().open(url, getSupport(url), true); - return IOUtils.toImage(in); + return ImageUtils.fromStream(in); } catch (IOException e) { } finally { if (in != null) { @@ -781,19 +905,35 @@ public abstract class BasicSupport { if (line != null) { // try for files - String path = null; if (source != null) { - path = new File(source.getFile()).getParent(); try { - String basePath = new File(new File(path), line.trim()) - .getAbsolutePath(); + + String relPath = null; + String absPath = null; + try { + String path = new File(source.getFile()).getParent(); + relPath = new File(new File(path), line.trim()) + .getAbsolutePath(); + } catch (Exception e) { + // Cannot be converted to path (one possibility to take + // into account: absolute path on Windows) + } + try { + absPath = new File(line.trim()).getAbsolutePath(); + } catch (Exception e) { + // Cannot be converted to path (at all) + } + for (String ext : getImageExt(true)) { - if (new File(basePath + ext).exists()) { - url = new File(basePath + ext).toURI().toURL(); + if (absPath != null && new File(absPath + ext).exists()) { + url = new File(absPath + ext).toURI().toURL(); + } else if (relPath != null + && new File(relPath + ext).exists()) { + url = new File(relPath + ext).toURI().toURL(); } } } catch (Exception e) { - // Nothing to do here + // Should not happen since we control the correct arguments } } @@ -801,7 +941,8 @@ public abstract class BasicSupport { // try for URLs try { for (String ext : getImageExt(true)) { - if (Instance.getCache().check(new URL(line + ext))) { + if (Instance.getCache() + .check(new URL(line + ext), true)) { url = new URL(line + ext); break; } @@ -841,6 +982,9 @@ public abstract class BasicSupport { /** * Open the input file that will be used through the support. + *

+ * Can return NULL, in which case you are supposed to work without an + * {@link InputStream}. * * @param source * the source {@link URL} @@ -854,22 +998,6 @@ public abstract class BasicSupport { return Instance.getCache().open(source, this, false); } - /** - * Reset the given {@link InputStream} and return it. - * - * @param in - * the {@link InputStream} to reset - * - * @return the same {@link InputStream} after reset - */ - protected InputStream reset(InputStream in) { - try { - in.reset(); - } catch (IOException e) { - } - return in; - } - /** * Reset then return {@link BasicSupport#in}. * @@ -1031,9 +1159,9 @@ public abstract class BasicSupport { if (prev == car) { builder.append(closeDoubleQuote); continue; - } else { - builder.append(closeQuote); } + + builder.append(closeQuote); } } @@ -1269,7 +1397,12 @@ public abstract class BasicSupport { case INFO_TEXT: return new InfoText().setType(type); case FIMFICTION: - return new Fimfiction().setType(type); + try { + // Can fail if no client key or NO in options + return new FimfictionApi().setType(type); + } catch (IOException e) { + return new Fimfiction().setType(type); + } case FANFICTION: return new Fanfiction().setType(type); case TEXT: @@ -1280,6 +1413,8 @@ public abstract class BasicSupport { return new E621().setType(type); case YIFFSTAR: return new YiffStar().setType(type); + case E_HENTAI: + return new EHentai().setType(type); case CBZ: return new Cbz().setType(type); case HTML: @@ -1289,6 +1424,25 @@ public abstract class BasicSupport { return null; } + /** + * Reset the given {@link InputStream} and return it. + * + * @param in + * the {@link InputStream} to reset + * + * @return the same {@link InputStream} after reset + */ + static protected InputStream reset(InputStream in) { + try { + if (in != null) { + in.reset(); + } + } catch (IOException e) { + } + + return in; + } + /** * Return the first line from the given input which correspond to the given * selectors. @@ -1305,7 +1459,8 @@ public abstract class BasicSupport { * * @return the line */ - static String getLine(InputStream in, String needle, int relativeLine) { + static protected String getLine(InputStream in, String needle, + int relativeLine) { return getLine(in, needle, relativeLine, true); } @@ -1328,15 +1483,11 @@ public abstract class BasicSupport { * * @return the line */ - static String getLine(InputStream in, String needle, int relativeLine, - boolean first) { + static protected String getLine(InputStream in, String needle, + int relativeLine, boolean first) { String rep = null; - try { - in.reset(); - } catch (IOException e) { - Instance.syserr(e); - } + reset(in); List lines = new ArrayList(); @SuppressWarnings("resource") @@ -1370,4 +1521,133 @@ public abstract class BasicSupport { return rep; } + + /** + * Return the text between the key and the endKey (and optional subKey can + * be passed, in this case we will look for the key first, then take the + * text between the subKey and the endKey). + *

+ * Will only match the first line with the given key if more than one are + * possible. Which also means that if the subKey or endKey is not found on + * that line, NULL will be returned. + * + * @param in + * the input + * @param key + * the key to match (also supports "^" at start to say + * "only if it starts with" the key) + * @param subKey + * the sub key or NULL if none + * @param endKey + * the end key or NULL for "up to the end" + * @return the text or NULL if not found + */ + static protected String getKeyLine(InputStream in, String key, + String subKey, String endKey) { + return getKeyText(getLine(in, key, 0), key, subKey, endKey); + } + + /** + * Return the text between the key and the endKey (and optional subKey can + * be passed, in this case we will look for the key first, then take the + * text between the subKey and the endKey). + * + * @param in + * the input + * @param key + * the key to match (also supports "^" at start to say + * "only if it starts with" the key) + * @param subKey + * the sub key or NULL if none + * @param endKey + * the end key or NULL for "up to the end" + * @return the text or NULL if not found + */ + static protected String getKeyText(String in, String key, String subKey, + String endKey) { + String result = null; + + String line = in; + if (line != null && line.contains(key)) { + line = line.substring(line.indexOf(key) + key.length()); + if (subKey == null || subKey.isEmpty() || line.contains(subKey)) { + if (subKey != null) { + line = line.substring(line.indexOf(subKey) + + subKey.length()); + } + if (endKey == null || line.contains(endKey)) { + if (endKey != null) { + line = line.substring(0, line.indexOf(endKey)); + result = line; + } + } + } + } + + return result; + } + + /** + * Return the text between the key and the endKey (optional subKeys can be + * passed, in this case we will look for the subKeys first, then take the + * text between the key and the endKey). + * + * @param in + * the input + * @param key + * the key to match + * @param endKey + * the end key or NULL for "up to the end" + * @param afters + * the sub-keys to find before checking for key/endKey + * + * @return the text or NULL if not found + */ + static protected String getKeyTextAfter(String in, String key, + String endKey, String... afters) { + + if (in != null && !in.isEmpty()) { + int pos = indexOfAfter(in, 0, afters); + if (pos < 0) { + return null; + } + + in = in.substring(pos); + } + + return getKeyText(in, key, null, endKey); + } + + /** + * Return the first index after all the given "afters" have been found in + * the {@link String}, or -1 if it was not possible. + * + * @param in + * the input + * @param startAt + * start at this position in the string + * @param afters + * the sub-keys to find before checking for key/endKey + * + * @return the text or NULL if not found + */ + static protected int indexOfAfter(String in, int startAt, String... afters) { + int pos = -1; + if (in != null && !in.isEmpty()) { + pos = startAt; + if (afters != null) { + for (int i = 0; pos >= 0 && i < afters.length; i++) { + String subKey = afters[i]; + if (!subKey.isEmpty()) { + pos = in.indexOf(subKey, pos); + if (pos >= 0) { + pos += subKey.length(); + } + } + } + } + } + + return pos; + } }