X-Git-Url: http://git.nikiroo.be/?p=fanfix.git;a=blobdiff_plain;f=src%2Fbe%2Fnikiroo%2Ffanfix%2Fsupported%2FBasicSupport.java;h=471147ea0548ede456aa66e01a6337c9ae4ef1ef;hp=129182208f29f0ce70d46ffb70053231d0792a30;hb=27dc71793c8d76712f01cc6fdef75195bb22483a;hpb=a4143cd74a90e17a811a4581cbeb213fed1f6304 diff --git a/src/be/nikiroo/fanfix/supported/BasicSupport.java b/src/be/nikiroo/fanfix/supported/BasicSupport.java index 1291822..471147e 100644 --- a/src/be/nikiroo/fanfix/supported/BasicSupport.java +++ b/src/be/nikiroo/fanfix/supported/BasicSupport.java @@ -10,6 +10,7 @@ import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; +import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -61,6 +62,8 @@ public abstract class BasicSupport { E621, /** Furry website with stories */ YIFFSTAR, + /** Comics and images groups, mostly but not only NSFW */ + E_HENTAI, /** CBZ files */ CBZ, /** HTML files */ @@ -155,13 +158,13 @@ public abstract class BasicSupport { private URL currentReferer; // with only one 'r', as in 'HTTP'... // quote chars - private char openQuote = Instance.getTrans().getChar( + private char openQuote = Instance.getTrans().getCharacter( StringId.OPEN_SINGLE_QUOTE); - private char closeQuote = Instance.getTrans().getChar( + private char closeQuote = Instance.getTrans().getCharacter( StringId.CLOSE_SINGLE_QUOTE); - private char openDoubleQuote = Instance.getTrans().getChar( + private char openDoubleQuote = Instance.getTrans().getCharacter( StringId.OPEN_DOUBLE_QUOTE); - private char closeDoubleQuote = Instance.getTrans().getChar( + private char closeDoubleQuote = Instance.getTrans().getCharacter( StringId.CLOSE_DOUBLE_QUOTE); /** @@ -215,6 +218,8 @@ public abstract class BasicSupport { * the source of the story * @param in * the input (the main resource) + * @param pg + * the optional progress reporter * * @return the chapters * @@ -222,7 +227,7 @@ public abstract class BasicSupport { * in case of I/O error */ protected abstract List> getChapters(URL source, - InputStream in) throws IOException; + InputStream in, Progress pg) throws IOException; /** * Return the content of the chapter (possibly HTML encoded, if @@ -234,6 +239,8 @@ public abstract class BasicSupport { * the input (the main resource) * @param number * the chapter number + * @param pg + * the optional progress reporter * * @return the content * @@ -241,7 +248,17 @@ public abstract class BasicSupport { * in case of I/O error */ protected abstract String getChapterContent(URL source, InputStream in, - int number) throws IOException; + int number, Progress pg) throws IOException; + + /** + * Log into the support (can be a no-op depending upon the support). + * + * @throws IOException + * in case of I/O error + */ + public void login() throws IOException { + + } /** * Return the list of cookies (values included) that must be used to @@ -251,8 +268,11 @@ public abstract class BasicSupport { * it. * * @return the cookies + * + * @throws IOException + * in case of I/O error */ - public Map getCookies() { + public Map getCookies() throws IOException { return new HashMap(); } @@ -284,7 +304,7 @@ public abstract class BasicSupport { * in case of I/O error */ public Story processMeta(URL url) throws IOException { - return processMeta(url, true, false); + return processMeta(url, true, false, null); } /** @@ -296,14 +316,25 @@ public abstract class BasicSupport { * * @param close * close "this" and "in" when done + * @param pg + * the optional progress reporter * * @return the {@link Story} * * @throws IOException * in case of I/O error */ - protected Story processMeta(URL url, boolean close, boolean getDesc) - throws IOException { + protected Story processMeta(URL url, boolean close, boolean getDesc, + Progress pg) throws IOException { + if (pg == null) { + pg = new Progress(); + } else { + pg.setMinMax(0, 100); + } + + login(); + pg.setProgress(10); + url = getCanonicalUrl(url); setCurrentReferer(url); @@ -315,23 +346,33 @@ public abstract class BasicSupport { try { preprocess(url, getInput()); + pg.setProgress(30); Story story = new Story(); MetaData meta = getMeta(url, getInput()); + if (meta.getCreationDate() == null + || meta.getCreationDate().isEmpty()) { + meta.setCreationDate(StringUtils.fromTime(new Date().getTime())); + } story.setMeta(meta); + pg.setProgress(50); + if (meta != null && meta.getCover() == null) { meta.setCover(getDefaultCover(meta.getSubject())); } + pg.setProgress(60); + if (getDesc) { String descChapterName = Instance.getTrans().getString( StringId.DESCRIPTION); story.getMeta().setResume( makeChapter(url, 0, descChapterName, - getDesc(url, getInput()))); + getDesc(url, getInput()), null)); } + pg.setProgress(100); return story; } finally { if (close) { @@ -374,41 +415,78 @@ public abstract class BasicSupport { url = getCanonicalUrl(url); pg.setProgress(1); try { - Story story = processMeta(url, false, true); - pg.setProgress(10); + Progress pgMeta = new Progress(); + pg.addProgress(pgMeta, 10); + Story story = processMeta(url, false, true, pgMeta); + if (!pgMeta.isDone()) { + pgMeta.setProgress(pgMeta.getMax()); // 10% + } + if (story == null) { - pg.setProgress(100); + pg.setProgress(90); return null; } + pg.setName("Retrieving " + story.getMeta().getTitle()); + setCurrentReferer(url); + Progress pgGetChapters = new Progress(); + pg.addProgress(pgGetChapters, 10); story.setChapters(new ArrayList()); + List> chapters = getChapters(url, getInput(), + pgGetChapters); + if (!pgGetChapters.isDone()) { + pgGetChapters.setProgress(pgGetChapters.getMax()); // 20% + } - List> chapters = getChapters(url, getInput()); - pg.setProgress(20); - - int i = 1; if (chapters != null) { - Progress pgChaps = new Progress(0, chapters.size()); + Progress pgChaps = new Progress("Extracting chapters", 0, + chapters.size() * 300); pg.addProgress(pgChaps, 80); + long words = 0; + int i = 1; for (Entry chap : chapters) { + pgChaps.setName("Extracting chapter " + i); setCurrentReferer(chap.getValue()); InputStream chapIn = Instance.getCache().open( chap.getValue(), this, true); + pgChaps.setProgress(i * 100); try { - story.getChapters().add( - makeChapter(url, i, chap.getKey(), - getChapterContent(url, chapIn, i))); + Progress pgGetChapterContent = new Progress(); + Progress pgMakeChapter = new Progress(); + pgChaps.addProgress(pgGetChapterContent, 100); + pgChaps.addProgress(pgMakeChapter, 100); + + String content = getChapterContent(url, chapIn, i, + pgGetChapterContent); + if (!pgGetChapterContent.isDone()) { + pgGetChapterContent.setProgress(pgGetChapterContent + .getMax()); + } + + Chapter cc = makeChapter(url, i, chap.getKey(), + content, pgMakeChapter); + if (!pgMakeChapter.isDone()) { + pgMakeChapter.setProgress(pgMakeChapter.getMax()); + } + + words += cc.getWords(); + story.getChapters().add(cc); + if (story.getMeta() != null) { + story.getMeta().setWords(words); + } } finally { chapIn.close(); } - pgChaps.setProgress(i++); + i++; } + + pgChaps.setName("Extracting chapters"); } else { - pg.setProgress(100); + pg.setProgress(80); } return story; @@ -504,6 +582,8 @@ public abstract class BasicSupport { * the chapter name * @param content * the chapter content + * @param pg + * the optional progress reporter * * @return the {@link Chapter} * @@ -511,7 +591,7 @@ public abstract class BasicSupport { * in case of I/O error */ protected Chapter makeChapter(URL source, int number, String name, - String content) throws IOException { + String content, Progress pg) throws IOException { // Chapter name: process it correctly, then remove the possible // redundant "Chapter x: " in front of it String chapterName = processPara(name).getContent().trim(); @@ -539,7 +619,13 @@ public abstract class BasicSupport { Chapter chap = new Chapter(number, chapterName); if (content != null) { - chap.setParagraphs(makeParagraphs(source, content)); + List paras = makeParagraphs(source, content, pg); + long words = 0; + for (Paragraph para : paras) { + words += para.getWords(); + } + chap.setParagraphs(paras); + chap.setWords(words); } return chap; @@ -553,68 +639,108 @@ public abstract class BasicSupport { * the source URL of the story * @param content * the textual content + * @param pg + * the optional progress reporter * * @return the {@link Paragraph}s * * @throws IOException * in case of I/O error */ - protected List makeParagraphs(URL source, String content) - throws IOException { + protected List makeParagraphs(URL source, String content, + Progress pg) throws IOException { + if (pg == null) { + pg = new Progress(); + } + if (isHtml()) { // Special
processing: content = content.replaceAll("(
]*>)|(
)|(
)", - "\n* * *\n"); + "
* * *
"); } List paras = new ArrayList(); - InputStream in = new ByteArrayInputStream(content.getBytes("UTF-8")); - try { - BufferedReader buff = new BufferedReader(new InputStreamReader(in, - "UTF-8")); - - for (String encodedLine = buff.readLine(); encodedLine != null; encodedLine = buff - .readLine()) { - String lines[]; - if (isHtml()) { - lines = encodedLine.split("(

|

|
|
|\\n)"); - } else { - lines = new String[] { encodedLine }; - } - - for (String aline : lines) { - String line = aline.trim(); - URL image = null; + if (content != null && !content.trim().isEmpty()) { + if (isHtml()) { + String[] tab = content.split("(

|

|
|
)"); + pg.setMinMax(0, tab.length); + int i = 1; + for (String line : tab) { if (line.startsWith("[") && line.endsWith("]")) { - image = getImageUrl(this, source, - line.substring(1, line.length() - 1).trim()); + pg.setName("Extracting image " + i); } + paras.add(makeParagraph(source, line.trim())); + pg.setProgress(i++); + } + pg.setName(null); + } else { + List lines = new ArrayList(); + BufferedReader buff = null; + try { + buff = new BufferedReader( + new InputStreamReader(new ByteArrayInputStream( + content.getBytes("UTF-8")), "UTF-8")); + for (String line = buff.readLine(); line != null; line = buff + .readLine()) { + lines.add(line.trim()); + } + } finally { + if (buff != null) { + buff.close(); + } + } - if (image != null) { - paras.add(new Paragraph(image)); - } else { - paras.add(processPara(line)); + pg.setMinMax(0, lines.size()); + int i = 0; + for (String line : lines) { + if (line.startsWith("[") && line.endsWith("]")) { + pg.setName("Extracting image " + i); } + paras.add(makeParagraph(source, line)); + pg.setProgress(i++); } + pg.setName(null); } - } finally { - in.close(); - } - // Check quotes for "bad" format - List newParas = new ArrayList(); - for (Paragraph para : paras) { - newParas.addAll(requotify(para)); - } - paras = newParas; + // Check quotes for "bad" format + List newParas = new ArrayList(); + for (Paragraph para : paras) { + newParas.addAll(requotify(para)); + } + paras = newParas; - // Remove double blanks/brks - fixBlanksBreaks(paras); + // Remove double blanks/brks + fixBlanksBreaks(paras); + } return paras; } + /** + * Convert the given line into a single {@link Paragraph}. + * + * @param source + * the source URL of the story + * @param line + * the textual content of the paragraph + * + * @return the {@link Paragraph} + */ + private Paragraph makeParagraph(URL source, String line) { + URL image = null; + if (line.startsWith("[") && line.endsWith("]")) { + image = getImageUrl(this, source, + line.substring(1, line.length() - 1).trim()); + } + + if (image != null) { + return new Paragraph(image); + } else { + return processPara(line); + } + } + /** * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of * those {@link Paragraph}s. @@ -750,19 +876,35 @@ public abstract class BasicSupport { if (line != null) { // try for files - String path = null; if (source != null) { - path = new File(source.getFile()).getParent(); try { - String basePath = new File(new File(path), line.trim()) - .getAbsolutePath(); + + String relPath = null; + String absPath = null; + try { + String path = new File(source.getFile()).getParent(); + relPath = new File(new File(path), line.trim()) + .getAbsolutePath(); + } catch (Exception e) { + // Cannot be converted to path (one possibility to take + // into account: absolute path on Windows) + } + try { + absPath = new File(line.trim()).getAbsolutePath(); + } catch (Exception e) { + // Cannot be converted to path (at all) + } + for (String ext : getImageExt(true)) { - if (new File(basePath + ext).exists()) { - url = new File(basePath + ext).toURI().toURL(); + if (absPath != null && new File(absPath + ext).exists()) { + url = new File(absPath + ext).toURI().toURL(); + } else if (relPath != null + && new File(relPath + ext).exists()) { + url = new File(relPath + ext).toURI().toURL(); } } } catch (Exception e) { - // Nothing to do here + // Should not happen since we control the correct arguments } } @@ -914,7 +1056,8 @@ public abstract class BasicSupport { if (!singleQ && !doubleQ) { line = openDoubleQuote + line + closeDoubleQuote; - newParas.add(new Paragraph(ParagraphType.QUOTE, line)); + newParas.add(new Paragraph(ParagraphType.QUOTE, line, para + .getWords())); } else { char open = singleQ ? openQuote : openDoubleQuote; char close = singleQ ? closeQuote : closeDoubleQuote; @@ -937,7 +1080,13 @@ public abstract class BasicSupport { if (posDot >= 0) { String rest = line.substring(posDot + 1).trim(); line = line.substring(0, posDot + 1).trim(); - newParas.add(new Paragraph(ParagraphType.QUOTE, line)); + long words = 1; + for (char car : line.toCharArray()) { + if (car == ' ') { + words++; + } + } + newParas.add(new Paragraph(ParagraphType.QUOTE, line, words)); if (!rest.isEmpty()) { newParas.addAll(requotify(processPara(rest))); } @@ -971,6 +1120,7 @@ public abstract class BasicSupport { boolean tentativeCloseQuote = false; char prev = '\0'; int dashCount = 0; + long words = 1; StringBuilder builder = new StringBuilder(); for (char car : line.toCharArray()) { @@ -1004,6 +1154,10 @@ public abstract class BasicSupport { case '\t': case '\n': // just in case case '\r': // just in case + if (builder.length() > 0 + && builder.charAt(builder.length() - 1) != ' ') { + words++; + } builder.append(' '); break; @@ -1156,7 +1310,7 @@ public abstract class BasicSupport { type = ParagraphType.QUOTE; } - return new Paragraph(type, line); + return new Paragraph(type, line, words); } /** @@ -1237,6 +1391,8 @@ public abstract class BasicSupport { return new E621().setType(type); case YIFFSTAR: return new YiffStar().setType(type); + case E_HENTAI: + return new EHentai().setType(type); case CBZ: return new Cbz().setType(type); case HTML: @@ -1327,4 +1483,48 @@ public abstract class BasicSupport { return rep; } + + /** + * Return the text between the key and the endKey (and optional subKey can + * be passed, in this case we will look for the key first, then take the + * text between the subKey and the endKey). + *

+ * Will only match the first line with the given key if more than one are + * possible. Which also means that if the subKey or endKey is not found on + * that line, NULL will be returned. + * + * @param in + * the input + * @param key + * the key to match (also supports "^" at start to say + * "only if it starts with" the key) + * @param subKey + * the sub key or NULL if none + * @param endKey + * the end key or NULL for "up to the end" + * @return the text or NULL if not found + */ + static String getKeyLine(InputStream in, String key, String subKey, + String endKey) { + String result = null; + + String line = getLine(in, key, 0); + if (line != null && line.contains(key)) { + line = line.substring(line.indexOf(key) + key.length()); + if (subKey == null || subKey.isEmpty() || line.contains(subKey)) { + if (subKey != null) { + line = line.substring(line.indexOf(subKey) + + subKey.length()); + } + if (endKey == null || line.contains(endKey)) { + if (endKey != null) { + line = line.substring(0, line.indexOf(endKey)); + result = line; + } + } + } + } + + return result; + } }