X-Git-Url: http://git.nikiroo.be/?p=fanfix.git;a=blobdiff_plain;f=src%2Fbe%2Fnikiroo%2Ffanfix%2Fsupported%2FBasicSupport.java;h=b6fd1e277d67c3facbba0738b45f096f49d4734c;hp=74f11156e82c265a0eee1ddbd09b7c417a98ed12;hb=68e370a441d8e6b10bfaa904ecacb29e7d6160d8;hpb=08fe2e33007063e30fe22dc1d290f8afaa18eb1d diff --git a/src/be/nikiroo/fanfix/supported/BasicSupport.java b/src/be/nikiroo/fanfix/supported/BasicSupport.java index 74f1115..b6fd1e2 100644 --- a/src/be/nikiroo/fanfix/supported/BasicSupport.java +++ b/src/be/nikiroo/fanfix/supported/BasicSupport.java @@ -1,12 +1,14 @@ package be.nikiroo.fanfix.supported; +import java.awt.image.BufferedImage; +import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.File; import java.io.IOException; import java.io.InputStream; +import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -20,8 +22,10 @@ import be.nikiroo.fanfix.bundles.StringId; import be.nikiroo.fanfix.data.Chapter; import be.nikiroo.fanfix.data.MetaData; import be.nikiroo.fanfix.data.Paragraph; -import be.nikiroo.fanfix.data.Story; import be.nikiroo.fanfix.data.Paragraph.ParagraphType; +import be.nikiroo.fanfix.data.Story; +import be.nikiroo.utils.IOUtils; +import be.nikiroo.utils.Progress; import be.nikiroo.utils.StringUtils; /** @@ -142,7 +146,6 @@ public abstract class BasicSupport { } } - /** Only used by {@link BasicSupport#getInput()} just so it is always reset. */ private InputStream in; private SupportType type; private URL currentReferer; // with on 'r', as in 'HTTP'... @@ -182,70 +185,7 @@ public abstract class BasicSupport { */ protected abstract boolean isHtml(); - /** - * Return the story title. - * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * - * @return the title - * - * @throws IOException - * in case of I/O error - */ - protected abstract String getTitle(URL source, InputStream in) - throws IOException; - - /** - * Return the story author. - * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * - * @return the author - * - * @throws IOException - * in case of I/O error - */ - protected abstract String getAuthor(URL source, InputStream in) - throws IOException; - - /** - * Return the story publication date. - * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * - * @return the date - * - * @throws IOException - * in case of I/O error - */ - protected abstract String getDate(URL source, InputStream in) - throws IOException; - - /** - * Return the subject of the story (for instance, if it is a fanfiction, - * what is the original work; if it is a technical text, what is the - * technical subject...). - * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * - * @return the subject - * - * @throws IOException - * in case of I/O error - */ - protected abstract String getSubject(URL source, InputStream in) + protected abstract MetaData getMeta(URL source, InputStream in) throws IOException; /** @@ -264,24 +204,6 @@ public abstract class BasicSupport { protected abstract String getDesc(URL source, InputStream in) throws IOException; - /** - * Return the story cover resource if any, or NULL if none. - *

- * The default cover should not be checked for here. - * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * - * @return the cover or NULL - * - * @throws IOException - * in case of I/O error - */ - protected abstract URL getCover(URL source, InputStream in) - throws IOException; - /** * Return the list of chapters (name and resource). * @@ -317,16 +239,6 @@ public abstract class BasicSupport { protected abstract String getChapterContent(URL source, InputStream in, int number) throws IOException; - /** - * Check if this {@link BasicSupport} is mainly catered to image files. - * - * @return TRUE if it is - */ - public boolean isImageDocument(URL source, InputStream in) - throws IOException { - return false; - } - /** * Return the list of cookies (values included) that must be used to * correctly fetch the resources. @@ -379,23 +291,15 @@ public abstract class BasicSupport { } try { - preprocess(getInput()); + preprocess(url, getInput()); Story story = new Story(); - story.setMeta(new MetaData()); - story.getMeta().setTitle(ifUnhtml(getTitle(url, getInput()))); - story.getMeta().setAuthor( - fixAuthor(ifUnhtml(getAuthor(url, getInput())))); - story.getMeta().setDate(ifUnhtml(getDate(url, getInput()))); - story.getMeta().setTags(getTags(url, getInput())); - story.getMeta().setSource(getSourceName()); - story.getMeta().setPublisher( - ifUnhtml(getPublisher(url, getInput()))); - story.getMeta().setUuid(getUuid(url, getInput())); - story.getMeta().setLuid(getLuid(url, getInput())); - story.getMeta().setLang(getLang(url, getInput())); - story.getMeta().setSubject(ifUnhtml(getSubject(url, getInput()))); - story.getMeta().setImageDocument(isImageDocument(url, getInput())); + MetaData meta = getMeta(url, getInput()); + story.setMeta(meta); + + if (meta != null && meta.getCover() == null) { + meta.setCover(getDefaultCover(meta.getSubject())); + } if (getDesc) { String descChapterName = Instance.getTrans().getString( @@ -427,51 +331,42 @@ public abstract class BasicSupport { * * @param url * the story resource + * @param pg + * the optional progress reporter * * @return the {@link Story} * * @throws IOException * in case of I/O error */ - public Story process(URL url) throws IOException { + public Story process(URL url, Progress pg) throws IOException { + if (pg == null) { + pg = new Progress(); + } else { + pg.setMinMax(0, 100); + } + setCurrentReferer(url); + pg.setProgress(1); try { Story story = processMeta(url, false, true); + pg.setProgress(10); if (story == null) { + pg.setProgress(100); return null; } story.setChapters(new ArrayList()); - URL cover = getCover(url, getInput()); - if (cover == null) { - String subject = story.getMeta() == null ? null : story - .getMeta().getSubject(); - if (subject != null && !subject.isEmpty() - && Instance.getCoverDir() != null) { - File fileCover = new File(Instance.getCoverDir(), subject); - cover = getImage(fileCover.toURI().toURL(), subject); - } - } - - if (cover != null) { - InputStream coverIn = null; - try { - coverIn = Instance.getCache().open(cover, this, true); - story.getMeta().setCover(StringUtils.toImage(coverIn)); - } catch (IOException e) { - Instance.syserr(new IOException(Instance.getTrans() - .getString(StringId.ERR_BS_NO_COVER, cover), e)); - } finally { - if (coverIn != null) - coverIn.close(); - } - } - List> chapters = getChapters(url, getInput()); + pg.setProgress(20); + int i = 1; if (chapters != null) { + Progress pgChaps = new Progress(0, chapters.size()); + pg.addProgress(pgChaps, 80); + for (Entry chap : chapters) { setCurrentReferer(chap.getValue()); InputStream chapIn = Instance.getCache().open( @@ -483,8 +378,11 @@ public abstract class BasicSupport { } finally { chapIn.close(); } - i++; + + pgChaps.setProgress(i++); } + } else { + pg.setProgress(100); } return story; @@ -548,177 +446,17 @@ public abstract class BasicSupport { } /** - * Return the story publisher (by default, - * {@link BasicSupport#getSourceName()}). - * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * - * @return the publisher - * - * @throws IOException - * in case of I/O error - */ - protected String getPublisher(URL source, InputStream in) - throws IOException { - return getSourceName(); - } - - /** - * Return the story UUID, a unique value representing the story (it is often - * an URL). - *

- * By default, this is the {@link URL} of the resource. - * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * - * @return the uuid - * - * @throws IOException - * in case of I/O error - */ - protected String getUuid(URL source, InputStream in) throws IOException { - return source.toString(); - } - - /** - * Return the story Library UID, a unique value representing the story (it - * is often a number) in the local library. - *

- * By default, this is empty. - * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * - * @return the id - * - * @throws IOException - * in case of I/O error - */ - protected String getLuid(URL source, InputStream in) throws IOException { - return ""; - } - - /** - * Return the 2-letter language code of this story. - *

- * By default, this is 'EN'. - * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * - * @return the language - * - * @throws IOException - * in case of I/O error - */ - protected String getLang(URL source, InputStream in) throws IOException { - return "EN"; - } - - /** - * Return the list of tags for this story. + * Prepare the support if needed before processing. * * @param source * the source of the story * @param in * the input (the main resource) * - * @return the tags - * - * @throws IOException - * in case of I/O error - */ - protected List getTags(URL source, InputStream in) - throws IOException { - return new ArrayList(); - } - - /** - * Return the first line from the given input which correspond to the given - * selectors. - *

- * Do not reset the input, which will be pointing at the line just after the - * result (input will be spent if no result is found). - * - * @param in - * the input - * @param needle - * a string that must be found inside the target line - * @param relativeLine - * the line to return based upon the target line position (-1 = - * the line before, 0 = the target line...) - * - * @return the line - */ - protected String getLine(InputStream in, String needle, int relativeLine) { - return getLine(in, needle, relativeLine, true); - } - - /** - * Return a line from the given input which correspond to the given - * selectors. - *

- * Do not reset the input, which will be pointing at the line just after the - * result (input will be spent if no result is found) when first is TRUE, - * and will always be spent if first is FALSE. - * - * @param in - * the input - * @param needle - * a string that must be found inside the target line - * @param relativeLine - * the line to return based upon the target line position (-1 = - * the line before, 0 = the target line...) - * @param first - * takes the first result (as opposed to the last one, which will - * also always spend the input) - * - * @return the line - */ - protected String getLine(InputStream in, String needle, int relativeLine, - boolean first) { - String rep = null; - - List lines = new ArrayList(); - @SuppressWarnings("resource") - Scanner scan = new Scanner(in, "UTF-8"); - int index = -1; - scan.useDelimiter("\\n"); - while (scan.hasNext()) { - lines.add(scan.next()); - - if (index == -1 && lines.get(lines.size() - 1).contains(needle)) { - index = lines.size() - 1; - } - - if (index >= 0 && index + relativeLine < lines.size()) { - rep = lines.get(index + relativeLine); - if (first) { - break; - } - } - } - - return rep; - } - - /** - * Prepare the support if needed before processing. - * * @throws IOException * on I/O error */ - protected void preprocess(InputStream in) throws IOException { + protected void preprocess(URL source, InputStream in) throws IOException { } /** @@ -748,7 +486,6 @@ public abstract class BasicSupport { */ protected Chapter makeChapter(URL source, int number, String name, String content) throws IOException { - // Chapter name: process it correctly, then remove the possible // redundant "Chapter x: " in front of it String chapterName = processPara(name).getContent().trim(); @@ -775,89 +512,148 @@ public abstract class BasicSupport { Chapter chap = new Chapter(number, chapterName); - if (content == null) { - return chap; + if (content != null) { + chap.setParagraphs(makeParagraphs(source, content)); } + return chap; + + } + + /** + * Convert the given content into {@link Paragraph}s. + * + * @param source + * the source URL of the story + * @param content + * the textual content + * + * @return the {@link Paragraph}s + * + * @throws IOException + * in case of I/O error + */ + protected List makeParagraphs(URL source, String content) + throws IOException { if (isHtml()) { // Special


processing: content = content.replaceAll("(
]*>)|(
)|(
)", "\n* * *\n"); } - InputStream in = new ByteArrayInputStream( - content.getBytes(StandardCharsets.UTF_8)); + List paras = new ArrayList(); + InputStream in = new ByteArrayInputStream(content.getBytes("UTF-8")); try { - @SuppressWarnings("resource") - Scanner scan = new Scanner(in, "UTF-8"); - scan.useDelimiter("(\\n|

)"); // \n for test,

for html - - List paras = new ArrayList(); - while (scan.hasNext()) { - String line = scan.next().trim(); - boolean image = false; - if (line.startsWith("[") && line.endsWith("]")) { - URL url = getImage(source, - line.substring(1, line.length() - 1).trim()); - if (url != null) { - paras.add(new Paragraph(url)); - image = true; - } + BufferedReader buff = new BufferedReader(new InputStreamReader(in, + "UTF-8")); + + for (String encodedLine = buff.readLine(); encodedLine != null; encodedLine = buff + .readLine()) { + String lines[]; + if (isHtml()) { + lines = encodedLine.split("(

|

|
|
|\\n)"); + } else { + lines = new String[] { encodedLine }; } - if (!image) { - paras.add(processPara(line)); + for (String aline : lines) { + String line = aline.trim(); + + URL image = null; + if (line.startsWith("[") && line.endsWith("]")) { + image = getImageUrl(this, source, + line.substring(1, line.length() - 1).trim()); + } + + if (image != null) { + paras.add(new Paragraph(image)); + } else { + paras.add(processPara(line)); + } } } + } finally { + in.close(); + } - // Check quotes for "bad" format - List newParas = new ArrayList(); - for (Paragraph para : paras) { - newParas.addAll(requotify(para)); - } - paras = newParas; - - // Remove double blanks/brks - boolean space = false; - boolean brk = true; - for (int i = 0; i < paras.size(); i++) { - Paragraph para = paras.get(i); - boolean thisSpace = para.getType() == ParagraphType.BLANK; - boolean thisBrk = para.getType() == ParagraphType.BREAK; - - if (space && thisBrk) { - paras.remove(i - 1); - i--; - } else if ((space || brk) && (thisSpace || thisBrk)) { - paras.remove(i); - i--; - } + // Check quotes for "bad" format + List newParas = new ArrayList(); + for (Paragraph para : paras) { + newParas.addAll(requotify(para)); + } + paras = newParas; - space = thisSpace; - brk = thisBrk; - } + // Remove double blanks/brks + fixBlanksBreaks(paras); - // Remove blank/brk at start - if (paras.size() > 0 - && (paras.get(0).getType() == ParagraphType.BLANK || paras - .get(0).getType() == ParagraphType.BREAK)) { - paras.remove(0); - } + return paras; + } - // Remove blank/brk at end - int last = paras.size() - 1; - if (paras.size() > 0 - && (paras.get(last).getType() == ParagraphType.BLANK || paras - .get(last).getType() == ParagraphType.BREAK)) { - paras.remove(last); + /** + * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of + * those {@link Paragraph}s. + *

+ * The resulting list will not contain a starting or trailing blank/break + * nor 2 blanks or breaks following each other. + * + * @param paras + * the list of {@link Paragraph}s to fix + */ + protected void fixBlanksBreaks(List paras) { + boolean space = false; + boolean brk = true; + for (int i = 0; i < paras.size(); i++) { + Paragraph para = paras.get(i); + boolean thisSpace = para.getType() == ParagraphType.BLANK; + boolean thisBrk = para.getType() == ParagraphType.BREAK; + + if (i > 0 && space && thisBrk) { + paras.remove(i - 1); + i--; + } else if ((space || brk) && (thisSpace || thisBrk)) { + paras.remove(i); + i--; } - chap.setParagraphs(paras); + space = thisSpace; + brk = thisBrk; + } + + // Remove blank/brk at start + if (paras.size() > 0 + && (paras.get(0).getType() == ParagraphType.BLANK || paras.get( + 0).getType() == ParagraphType.BREAK)) { + paras.remove(0); + } - return chap; - } finally { - in.close(); + // Remove blank/brk at end + int last = paras.size() - 1; + if (paras.size() > 0 + && (paras.get(last).getType() == ParagraphType.BLANK || paras + .get(last).getType() == ParagraphType.BREAK)) { + paras.remove(last); + } + } + + /** + * Get the default cover related to this subject (see .info files). + * + * @param subject + * the subject + * + * @return the cover if any, or NULL + */ + static BufferedImage getDefaultCover(String subject) { + if (subject != null && !subject.isEmpty() + && Instance.getCoverDir() != null) { + try { + File fileCover = new File(Instance.getCoverDir(), subject); + return getImage(null, fileCover.toURI().toURL(), subject); + } catch (MalformedURLException e) { + } } + + return null; } /** @@ -865,7 +661,7 @@ public abstract class BasicSupport { * * @return the extensions */ - protected String[] getImageExt(boolean emptyAllowed) { + static String[] getImageExt(boolean emptyAllowed) { if (emptyAllowed) { return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" }; } else { @@ -873,6 +669,27 @@ public abstract class BasicSupport { } } + static BufferedImage getImage(BasicSupport support, URL source, String line) { + URL url = getImageUrl(support, source, line); + if (url != null) { + InputStream in = null; + try { + in = Instance.getCache().open(url, getSupport(url), true); + return IOUtils.toImage(in); + } catch (IOException e) { + } finally { + if (in != null) { + try { + in.close(); + } catch (IOException e) { + } + } + } + } + + return null; + } + /** * Check if the given resource can be a local image or a remote image, then * refresh the cache with it if it is. @@ -885,74 +702,84 @@ public abstract class BasicSupport { * @return the image URL if found, or NULL * */ - protected URL getImage(URL source, String line) { - String path = new File(source.getFile()).getParent(); + static URL getImageUrl(BasicSupport support, URL source, String line) { URL url = null; - // try for files - try { - String urlBase = new File(new File(path), line.trim()).toURI() - .toURL().toString(); - for (String ext : getImageExt(true)) { - if (new File(urlBase + ext).exists()) { - url = new File(urlBase + ext).toURI().toURL(); - } - } - } catch (Exception e) { - // Nothing to do here - } - - if (url == null) { - // try for URLs - try { - for (String ext : getImageExt(true)) { - if (Instance.getCache().check(new URL(line + ext))) { - url = new URL(line + ext); + if (line != null) { + // try for files + String path = null; + if (source != null) { + path = new File(source.getFile()).getParent(); + try { + String basePath = new File(new File(path), line.trim()) + .getAbsolutePath(); + for (String ext : getImageExt(true)) { + if (new File(basePath + ext).exists()) { + url = new File(basePath + ext).toURI().toURL(); + } } + } catch (Exception e) { + // Nothing to do here } + } - // try out of cache - if (url == null) { + if (url == null) { + // try for URLs + try { for (String ext : getImageExt(true)) { - try { + if (Instance.getCache().check(new URL(line + ext))) { url = new URL(line + ext); - Instance.getCache().refresh(url, this, true); break; - } catch (IOException e) { - // no image with this ext - url = null; } } + + // try out of cache + if (url == null) { + for (String ext : getImageExt(true)) { + try { + url = new URL(line + ext); + Instance.getCache().refresh(url, support, true); + break; + } catch (IOException e) { + // no image with this ext + url = null; + } + } + } + } catch (MalformedURLException e) { + // Not an url } - } catch (MalformedURLException e) { - // Not an url } - } - // refresh the cached file - if (url != null) { - try { - Instance.getCache().refresh(url, this, true); - } catch (IOException e) { - // woops, broken image - url = null; + // refresh the cached file + if (url != null) { + try { + Instance.getCache().refresh(url, support, true); + } catch (IOException e) { + // woops, broken image + url = null; + } } } return url; } + protected InputStream reset(InputStream in) { + try { + in.reset(); + } catch (IOException e) { + } + return in; + } + /** * Reset then return {@link BasicSupport#in}. * * @return {@link BasicSupport#in} - * - * @throws IOException - * in case of I/O error */ - protected InputStream getInput() throws IOException { - in.reset(); - return in; + protected InputStream getInput() { + return reset(in); } /** @@ -963,7 +790,7 @@ public abstract class BasicSupport { * * @return the author without prefixes */ - private String fixAuthor(String author) { + protected String fixAuthor(String author) { if (author != null) { for (String suffix : new String[] { " ", ":" }) { for (String byString : Instance.getConfig() @@ -994,30 +821,60 @@ public abstract class BasicSupport { * * @return the correctly (or so we hope) quotified paragraphs */ - private List requotify(Paragraph para) { + protected List requotify(Paragraph para) { List newParas = new ArrayList(); - if (para.getType() == ParagraphType.QUOTE) { + if (para.getType() == ParagraphType.QUOTE + && para.getContent().length() > 2) { String line = para.getContent(); boolean singleQ = line.startsWith("" + openQuote); boolean doubleQ = line.startsWith("" + openDoubleQuote); + // Do not try when more than one quote at a time + // (some stories are not easily readable if we do) + if (singleQ + && line.indexOf(closeQuote, 1) < line + .lastIndexOf(closeQuote)) { + newParas.add(para); + return newParas; + } + if (doubleQ + && line.indexOf(closeDoubleQuote, 1) < line + .lastIndexOf(closeDoubleQuote)) { + newParas.add(para); + return newParas; + } + // + if (!singleQ && !doubleQ) { line = openDoubleQuote + line + closeDoubleQuote; newParas.add(new Paragraph(ParagraphType.QUOTE, line)); } else { + char open = singleQ ? openQuote : openDoubleQuote; char close = singleQ ? closeQuote : closeDoubleQuote; - int posClose = line.indexOf(close); - int posDot = line.indexOf("."); - while (posDot >= 0 && posDot < posClose) { - posDot = line.indexOf(".", posDot + 1); + + int posDot = -1; + boolean inQuote = false; + int i = 0; + for (char car : line.toCharArray()) { + if (car == open) { + inQuote = true; + } else if (car == close) { + inQuote = false; + } else if (car == '.' && !inQuote) { + posDot = i; + break; + } + i++; } if (posDot >= 0) { String rest = line.substring(posDot + 1).trim(); line = line.substring(0, posDot + 1).trim(); newParas.add(new Paragraph(ParagraphType.QUOTE, line)); - newParas.addAll(requotify(processPara(rest))); + if (!rest.isEmpty()) { + newParas.addAll(requotify(processPara(rest))); + } } else { newParas.add(para); } @@ -1289,4 +1146,86 @@ public abstract class BasicSupport { return null; } + + /** + * Return the first line from the given input which correspond to the given + * selectors. + * + * @param in + * the input + * @param needle + * a string that must be found inside the target line (also + * supports "^" at start to say "only if it starts with" the + * needle) + * @param relativeLine + * the line to return based upon the target line position (-1 = + * the line before, 0 = the target line...) + * + * @return the line + */ + static String getLine(InputStream in, String needle, int relativeLine) { + return getLine(in, needle, relativeLine, true); + } + + /** + * Return a line from the given input which correspond to the given + * selectors. + * + * @param in + * the input + * @param needle + * a string that must be found inside the target line (also + * supports "^" at start to say "only if it starts with" the + * needle) + * @param relativeLine + * the line to return based upon the target line position (-1 = + * the line before, 0 = the target line...) + * @param first + * takes the first result (as opposed to the last one, which will + * also always spend the input) + * + * @return the line + */ + static String getLine(InputStream in, String needle, int relativeLine, + boolean first) { + String rep = null; + + try { + in.reset(); + } catch (IOException e) { + Instance.syserr(e); + } + + List lines = new ArrayList(); + @SuppressWarnings("resource") + Scanner scan = new Scanner(in, "UTF-8"); + int index = -1; + scan.useDelimiter("\\n"); + while (scan.hasNext()) { + lines.add(scan.next()); + + if (index == -1) { + if (needle.startsWith("^")) { + if (lines.get(lines.size() - 1).startsWith( + needle.substring(1))) { + index = lines.size() - 1; + } + + } else { + if (lines.get(lines.size() - 1).contains(needle)) { + index = lines.size() - 1; + } + } + } + + if (index >= 0 && index + relativeLine < lines.size()) { + rep = lines.get(index + relativeLine); + if (first) { + break; + } + } + } + + return rep; + } }