X-Git-Url: http://git.nikiroo.be/?p=fanfix.git;a=blobdiff_plain;f=src%2Fbe%2Fnikiroo%2Ffanfix%2Fsupported%2FBasicSupport.java;h=169cc5f02d7a0a85cc4328227bd646c408d5a8e1;hp=61a4500759bccb26366d23ade04f07053c169dcb;hb=373da363323d3a9263aa6ebd392ca3272b23b412;hpb=333f0e7b5e333e8f6222881ce35398f403fc4121 diff --git a/src/be/nikiroo/fanfix/supported/BasicSupport.java b/src/be/nikiroo/fanfix/supported/BasicSupport.java index 61a4500..169cc5f 100644 --- a/src/be/nikiroo/fanfix/supported/BasicSupport.java +++ b/src/be/nikiroo/fanfix/supported/BasicSupport.java @@ -1,10 +1,12 @@ package be.nikiroo.fanfix.supported; import java.awt.image.BufferedImage; +import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.File; import java.io.IOException; import java.io.InputStream; +import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; @@ -14,8 +16,6 @@ import java.util.Map; import java.util.Map.Entry; import java.util.Scanner; -import javax.imageio.ImageIO; - import be.nikiroo.fanfix.Instance; import be.nikiroo.fanfix.bundles.Config; import be.nikiroo.fanfix.bundles.StringId; @@ -24,6 +24,8 @@ import be.nikiroo.fanfix.data.MetaData; import be.nikiroo.fanfix.data.Paragraph; import be.nikiroo.fanfix.data.Paragraph.ParagraphType; import be.nikiroo.fanfix.data.Story; +import be.nikiroo.utils.IOUtils; +import be.nikiroo.utils.Progress; import be.nikiroo.utils.StringUtils; /** @@ -58,7 +60,9 @@ public abstract class BasicSupport { /** Furry website with comics support */ E621, /** CBZ files */ - CBZ; + CBZ, + /** HTML files */ + HTML; /** * A description of this support type (more information than the @@ -283,7 +287,7 @@ public abstract class BasicSupport { */ protected Story processMeta(URL url, boolean close, boolean getDesc) throws IOException { - in = Instance.getCache().open(url, this, false); + in = openInput(url); if (in == null) { return null; } @@ -329,26 +333,42 @@ public abstract class BasicSupport { * * @param url * the story resource + * @param pg + * the optional progress reporter * * @return the {@link Story} * * @throws IOException * in case of I/O error */ - public Story process(URL url) throws IOException { + public Story process(URL url, Progress pg) throws IOException { + if (pg == null) { + pg = new Progress(); + } else { + pg.setMinMax(0, 100); + } + setCurrentReferer(url); + pg.setProgress(1); try { Story story = processMeta(url, false, true); + pg.setProgress(10); if (story == null) { + pg.setProgress(100); return null; } story.setChapters(new ArrayList()); List> chapters = getChapters(url, getInput()); + pg.setProgress(20); + int i = 1; if (chapters != null) { + Progress pgChaps = new Progress(0, chapters.size()); + pg.addProgress(pgChaps, 80); + for (Entry chap : chapters) { setCurrentReferer(chap.getValue()); InputStream chapIn = Instance.getCache().open( @@ -361,8 +381,10 @@ public abstract class BasicSupport { chapIn.close(); } - i++; + pgChaps.setProgress(i++); } + } else { + pg.setProgress(100); } return story; @@ -466,7 +488,6 @@ public abstract class BasicSupport { */ protected Chapter makeChapter(URL source, int number, String name, String content) throws IOException { - // Chapter name: process it correctly, then remove the possible // redundant "Chapter x: " in front of it String chapterName = processPara(name).getContent().trim(); @@ -493,90 +514,137 @@ public abstract class BasicSupport { Chapter chap = new Chapter(number, chapterName); - if (content == null) { - return chap; + if (content != null) { + chap.setParagraphs(makeParagraphs(source, content)); } + return chap; + + } + + /** + * Convert the given content into {@link Paragraph}s. + * + * @param source + * the source URL of the story + * @param content + * the textual content + * + * @return the {@link Paragraph}s + * + * @throws IOException + * in case of I/O error + */ + protected List makeParagraphs(URL source, String content) + throws IOException { if (isHtml()) { // Special
processing: content = content.replaceAll("(
]*>)|(
)|(
)", "\n* * *\n"); } + List paras = new ArrayList(); InputStream in = new ByteArrayInputStream(content.getBytes("UTF-8")); try { - @SuppressWarnings("resource") - Scanner scan = new Scanner(in, "UTF-8"); - scan.useDelimiter("(\\n|

)"); // \n for test,

for html - - List paras = new ArrayList(); - while (scan.hasNext()) { - String line = scan.next().trim(); - boolean image = false; - if (line.startsWith("[") && line.endsWith("]")) { - URL url = getImageUrl(this, source, - line.substring(1, line.length() - 1).trim()); - if (url != null) { - paras.add(new Paragraph(url)); - image = true; - } + BufferedReader buff = new BufferedReader(new InputStreamReader(in, + "UTF-8")); + + for (String encodedLine = buff.readLine(); encodedLine != null; encodedLine = buff + .readLine()) { + String lines[]; + if (isHtml()) { + lines = encodedLine.split("(

|

|
|
|\\n)"); + } else { + lines = new String[] { encodedLine }; } - if (!image) { - paras.add(processPara(line)); + for (String aline : lines) { + String line = aline.trim(); + + URL image = null; + if (line.startsWith("[") && line.endsWith("]")) { + image = getImageUrl(this, source, + line.substring(1, line.length() - 1).trim()); + } + + if (image != null) { + paras.add(new Paragraph(image)); + } else { + paras.add(processPara(line)); + } } } + } finally { + in.close(); + } - // Check quotes for "bad" format - List newParas = new ArrayList(); - for (Paragraph para : paras) { - newParas.addAll(requotify(para)); - } - paras = newParas; - - // Remove double blanks/brks - boolean space = false; - boolean brk = true; - for (int i = 0; i < paras.size(); i++) { - Paragraph para = paras.get(i); - boolean thisSpace = para.getType() == ParagraphType.BLANK; - boolean thisBrk = para.getType() == ParagraphType.BREAK; - - if (space && thisBrk) { - paras.remove(i - 1); - i--; - } else if ((space || brk) && (thisSpace || thisBrk)) { - paras.remove(i); - i--; - } + // Check quotes for "bad" format + List newParas = new ArrayList(); + for (Paragraph para : paras) { + newParas.addAll(requotify(para)); + } + paras = newParas; - space = thisSpace; - brk = thisBrk; - } + // Remove double blanks/brks + fixBlanksBreaks(paras); - // Remove blank/brk at start - if (paras.size() > 0 - && (paras.get(0).getType() == ParagraphType.BLANK || paras - .get(0).getType() == ParagraphType.BREAK)) { - paras.remove(0); - } + return paras; + } - // Remove blank/brk at end - int last = paras.size() - 1; - if (paras.size() > 0 - && (paras.get(last).getType() == ParagraphType.BLANK || paras - .get(last).getType() == ParagraphType.BREAK)) { - paras.remove(last); + /** + * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of + * those {@link Paragraph}s. + *

+ * The resulting list will not contain a starting or trailing blank/break + * nor 2 blanks or breaks following each other. + * + * @param paras + * the list of {@link Paragraph}s to fix + */ + protected void fixBlanksBreaks(List paras) { + boolean space = false; + boolean brk = true; + for (int i = 0; i < paras.size(); i++) { + Paragraph para = paras.get(i); + boolean thisSpace = para.getType() == ParagraphType.BLANK; + boolean thisBrk = para.getType() == ParagraphType.BREAK; + + if (i > 0 && space && thisBrk) { + paras.remove(i - 1); + i--; + } else if ((space || brk) && (thisSpace || thisBrk)) { + paras.remove(i); + i--; } - chap.setParagraphs(paras); + space = thisSpace; + brk = thisBrk; + } - return chap; - } finally { - in.close(); + // Remove blank/brk at start + if (paras.size() > 0 + && (paras.get(0).getType() == ParagraphType.BLANK || paras.get( + 0).getType() == ParagraphType.BREAK)) { + paras.remove(0); + } + + // Remove blank/brk at end + int last = paras.size() - 1; + if (paras.size() > 0 + && (paras.get(last).getType() == ParagraphType.BLANK || paras + .get(last).getType() == ParagraphType.BREAK)) { + paras.remove(last); } } + /** + * Get the default cover related to this subject (see .info files). + * + * @param subject + * the subject + * + * @return the cover if any, or NULL + */ static BufferedImage getDefaultCover(String subject) { if (subject != null && !subject.isEmpty() && Instance.getCoverDir() != null) { @@ -609,7 +677,7 @@ public abstract class BasicSupport { InputStream in = null; try { in = Instance.getCache().open(url, getSupport(url), true); - return ImageIO.read(in); + return IOUtils.toImage(in); } catch (IOException e) { } finally { if (in != null) { @@ -699,6 +767,21 @@ public abstract class BasicSupport { return url; } + /** + * Open the input file that will be used through the support. + * + * @param source + * the source {@link URL} + * + * @return the {@link InputStream} + * + * @throws IOException + * in case of I/O error + */ + protected InputStream openInput(URL source) throws IOException { + return Instance.getCache().open(source, this, false); + } + protected InputStream reset(InputStream in) { try { in.reset(); @@ -755,7 +838,7 @@ public abstract class BasicSupport { * * @return the correctly (or so we hope) quotified paragraphs */ - private List requotify(Paragraph para) { + protected List requotify(Paragraph para) { List newParas = new ArrayList(); if (para.getType() == ParagraphType.QUOTE @@ -764,6 +847,22 @@ public abstract class BasicSupport { boolean singleQ = line.startsWith("" + openQuote); boolean doubleQ = line.startsWith("" + openDoubleQuote); + // Do not try when more than one quote at a time + // (some stories are not easily readable if we do) + if (singleQ + && line.indexOf(closeQuote, 1) < line + .lastIndexOf(closeQuote)) { + newParas.add(para); + return newParas; + } + if (doubleQ + && line.indexOf(closeDoubleQuote, 1) < line + .lastIndexOf(closeDoubleQuote)) { + newParas.add(para); + return newParas; + } + // + if (!singleQ && !doubleQ) { line = openDoubleQuote + line + closeDoubleQuote; newParas.add(new Paragraph(ParagraphType.QUOTE, line)); @@ -1023,8 +1122,8 @@ public abstract class BasicSupport { } } - for (SupportType type : new SupportType[] { SupportType.TEXT, - SupportType.INFO_TEXT }) { + for (SupportType type : new SupportType[] { SupportType.INFO_TEXT, + SupportType.TEXT }) { BasicSupport support = getSupport(type); if (support != null && support.supports(url)) { return support; @@ -1060,6 +1159,8 @@ public abstract class BasicSupport { return new E621().setType(type); case CBZ: return new Cbz().setType(type); + case HTML: + return new Html().setType(type); } return null;