X-Git-Url: http://git.nikiroo.be/?p=fanfix.git;a=blobdiff_plain;f=src%2Fbe%2Fnikiroo%2Ffanfix%2Fsupported%2FBasicSupport.java;h=2b4715ab0beec0af04f5d52a70f88f55a61193fd;hp=93155961eaea1ff36957553e102d420181a93b7c;hb=2284842831ea46e89b97dd22b6e294caad361f30;hpb=d98a29006897a1ae31ff3a039afe9643a48e0704 diff --git a/src/be/nikiroo/fanfix/supported/BasicSupport.java b/src/be/nikiroo/fanfix/supported/BasicSupport.java index 9315596..2b4715a 100644 --- a/src/be/nikiroo/fanfix/supported/BasicSupport.java +++ b/src/be/nikiroo/fanfix/supported/BasicSupport.java @@ -1,10 +1,12 @@ package be.nikiroo.fanfix.supported; import java.awt.image.BufferedImage; +import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.File; import java.io.IOException; import java.io.InputStream; +import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; @@ -23,6 +25,7 @@ import be.nikiroo.fanfix.data.Paragraph; import be.nikiroo.fanfix.data.Paragraph.ParagraphType; import be.nikiroo.fanfix.data.Story; import be.nikiroo.utils.IOUtils; +import be.nikiroo.utils.Progress; import be.nikiroo.utils.StringUtils; /** @@ -57,7 +60,9 @@ public abstract class BasicSupport { /** Furry website with comics support */ E621, /** CBZ files */ - CBZ; + CBZ, + /** HTML files */ + HTML; /** * A description of this support type (more information than the @@ -145,7 +150,7 @@ public abstract class BasicSupport { private InputStream in; private SupportType type; - private URL currentReferer; // with on 'r', as in 'HTTP'... + private URL currentReferer; // with only one 'r', as in 'HTTP'... // quote chars private char openQuote = Instance.getTrans().getChar( @@ -282,7 +287,7 @@ public abstract class BasicSupport { */ protected Story processMeta(URL url, boolean close, boolean getDesc) throws IOException { - in = Instance.getCache().open(url, this, false); + in = openInput(url); if (in == null) { return null; } @@ -328,26 +333,42 @@ public abstract class BasicSupport { * * @param url * the story resource + * @param pg + * the optional progress reporter * * @return the {@link Story} * * @throws IOException * in case of I/O error */ - public Story process(URL url) throws IOException { + public Story process(URL url, Progress pg) throws IOException { + if (pg == null) { + pg = new Progress(); + } else { + pg.setMinMax(0, 100); + } + setCurrentReferer(url); + pg.setProgress(1); try { Story story = processMeta(url, false, true); + pg.setProgress(10); if (story == null) { + pg.setProgress(100); return null; } story.setChapters(new ArrayList()); List> chapters = getChapters(url, getInput()); + pg.setProgress(20); + int i = 1; if (chapters != null) { + Progress pgChaps = new Progress(0, chapters.size()); + pg.addProgress(pgChaps, 80); + for (Entry chap : chapters) { setCurrentReferer(chap.getValue()); InputStream chapIn = Instance.getCache().open( @@ -360,8 +381,10 @@ public abstract class BasicSupport { chapIn.close(); } - i++; + pgChaps.setProgress(i++); } + } else { + pg.setProgress(100); } return story; @@ -491,90 +514,137 @@ public abstract class BasicSupport { Chapter chap = new Chapter(number, chapterName); - if (content == null) { - return chap; + if (content != null) { + chap.setParagraphs(makeParagraphs(source, content)); } + return chap; + + } + + /** + * Convert the given content into {@link Paragraph}s. + * + * @param source + * the source URL of the story + * @param content + * the textual content + * + * @return the {@link Paragraph}s + * + * @throws IOException + * in case of I/O error + */ + protected List makeParagraphs(URL source, String content) + throws IOException { if (isHtml()) { // Special
processing: content = content.replaceAll("(
]*>)|(
)|(
)", "\n* * *\n"); } + List paras = new ArrayList(); InputStream in = new ByteArrayInputStream(content.getBytes("UTF-8")); try { - @SuppressWarnings("resource") - Scanner scan = new Scanner(in, "UTF-8"); - scan.useDelimiter("(\\n|

)"); // \n for test,

for html - - List paras = new ArrayList(); - while (scan.hasNext()) { - String line = scan.next().trim(); - boolean image = false; - if (line.startsWith("[") && line.endsWith("]")) { - URL url = getImageUrl(this, source, - line.substring(1, line.length() - 1).trim()); - if (url != null) { - paras.add(new Paragraph(url)); - image = true; - } + BufferedReader buff = new BufferedReader(new InputStreamReader(in, + "UTF-8")); + + for (String encodedLine = buff.readLine(); encodedLine != null; encodedLine = buff + .readLine()) { + String lines[]; + if (isHtml()) { + lines = encodedLine.split("(

|

|
|
|\\n)"); + } else { + lines = new String[] { encodedLine }; } - if (!image) { - paras.add(processPara(line)); + for (String aline : lines) { + String line = aline.trim(); + + URL image = null; + if (line.startsWith("[") && line.endsWith("]")) { + image = getImageUrl(this, source, + line.substring(1, line.length() - 1).trim()); + } + + if (image != null) { + paras.add(new Paragraph(image)); + } else { + paras.add(processPara(line)); + } } } + } finally { + in.close(); + } - // Check quotes for "bad" format - List newParas = new ArrayList(); - for (Paragraph para : paras) { - newParas.addAll(requotify(para)); - } - paras = newParas; - - // Remove double blanks/brks - boolean space = false; - boolean brk = true; - for (int i = 0; i < paras.size(); i++) { - Paragraph para = paras.get(i); - boolean thisSpace = para.getType() == ParagraphType.BLANK; - boolean thisBrk = para.getType() == ParagraphType.BREAK; - - if (space && thisBrk) { - paras.remove(i - 1); - i--; - } else if ((space || brk) && (thisSpace || thisBrk)) { - paras.remove(i); - i--; - } + // Check quotes for "bad" format + List newParas = new ArrayList(); + for (Paragraph para : paras) { + newParas.addAll(requotify(para)); + } + paras = newParas; - space = thisSpace; - brk = thisBrk; - } + // Remove double blanks/brks + fixBlanksBreaks(paras); - // Remove blank/brk at start - if (paras.size() > 0 - && (paras.get(0).getType() == ParagraphType.BLANK || paras - .get(0).getType() == ParagraphType.BREAK)) { - paras.remove(0); - } + return paras; + } - // Remove blank/brk at end - int last = paras.size() - 1; - if (paras.size() > 0 - && (paras.get(last).getType() == ParagraphType.BLANK || paras - .get(last).getType() == ParagraphType.BREAK)) { - paras.remove(last); + /** + * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of + * those {@link Paragraph}s. + *

+ * The resulting list will not contain a starting or trailing blank/break + * nor 2 blanks or breaks following each other. + * + * @param paras + * the list of {@link Paragraph}s to fix + */ + protected void fixBlanksBreaks(List paras) { + boolean space = false; + boolean brk = true; + for (int i = 0; i < paras.size(); i++) { + Paragraph para = paras.get(i); + boolean thisSpace = para.getType() == ParagraphType.BLANK; + boolean thisBrk = para.getType() == ParagraphType.BREAK; + + if (i > 0 && space && thisBrk) { + paras.remove(i - 1); + i--; + } else if ((space || brk) && (thisSpace || thisBrk)) { + paras.remove(i); + i--; } - chap.setParagraphs(paras); + space = thisSpace; + brk = thisBrk; + } - return chap; - } finally { - in.close(); + // Remove blank/brk at start + if (paras.size() > 0 + && (paras.get(0).getType() == ParagraphType.BLANK || paras.get( + 0).getType() == ParagraphType.BREAK)) { + paras.remove(0); + } + + // Remove blank/brk at end + int last = paras.size() - 1; + if (paras.size() > 0 + && (paras.get(last).getType() == ParagraphType.BLANK || paras + .get(last).getType() == ParagraphType.BREAK)) { + paras.remove(last); } } + /** + * Get the default cover related to this subject (see .info files). + * + * @param subject + * the subject + * + * @return the cover if any, or NULL + */ static BufferedImage getDefaultCover(String subject) { if (subject != null && !subject.isEmpty() && Instance.getCoverDir() != null) { @@ -697,6 +767,21 @@ public abstract class BasicSupport { return url; } + /** + * Open the input file that will be used through the support. + * + * @param source + * the source {@link URL} + * + * @return the {@link InputStream} + * + * @throws IOException + * in case of I/O error + */ + protected InputStream openInput(URL source) throws IOException { + return Instance.getCache().open(source, this, false); + } + protected InputStream reset(InputStream in) { try { in.reset(); @@ -753,7 +838,7 @@ public abstract class BasicSupport { * * @return the correctly (or so we hope) quotified paragraphs */ - private List requotify(Paragraph para) { + protected List requotify(Paragraph para) { List newParas = new ArrayList(); if (para.getType() == ParagraphType.QUOTE @@ -828,7 +913,7 @@ public abstract class BasicSupport { * * @return the processed {@link Paragraph} */ - private Paragraph processPara(String line) { + protected Paragraph processPara(String line) { line = ifUnhtml(line).trim(); boolean space = true; @@ -851,11 +936,16 @@ public abstract class BasicSupport { if (tentativeCloseQuote) { tentativeCloseQuote = false; - if ((car >= 'a' && car <= 'z') || (car >= 'A' && car <= 'Z') - || (car >= '0' && car <= '9')) { + if (Character.isLetterOrDigit(car)) { builder.append("'"); } else { - builder.append(closeQuote); + // handle double-single quotes as double quotes + if (prev == car) { + builder.append(closeDoubleQuote); + continue; + } else { + builder.append(closeQuote); + } } } @@ -871,9 +961,21 @@ public abstract class BasicSupport { case '\'': if (space || (brk && quote)) { quote = true; - builder.append(openQuote); - } else if (prev == ' ') { - builder.append(openQuote); + // handle double-single quotes as double quotes + if (prev == car) { + builder.deleteCharAt(builder.length() - 1); + builder.append(openDoubleQuote); + } else { + builder.append(openQuote); + } + } else if (prev == ' ' || prev == car) { + // handle double-single quotes as double quotes + if (prev == car) { + builder.deleteCharAt(builder.length() - 1); + builder.append(openDoubleQuote); + } else { + builder.append(openQuote); + } } else { // it is a quote ("I'm off") or a 'quote' ("This // 'good' restaurant"...) @@ -926,7 +1028,13 @@ public abstract class BasicSupport { quote = true; builder.append(openQuote); } else { - builder.append(openQuote); + // handle double-single quotes as double quotes + if (prev == car) { + builder.deleteCharAt(builder.length() - 1); + builder.append(openDoubleQuote); + } else { + builder.append(openQuote); + } } space = false; brk = false; @@ -939,7 +1047,13 @@ public abstract class BasicSupport { case '」': space = false; brk = false; - builder.append(closeQuote); + // handle double-single quotes as double quotes + if (prev == car) { + builder.deleteCharAt(builder.length() - 1); + builder.append(closeDoubleQuote); + } else { + builder.append(closeQuote); + } break; case '«': @@ -1037,8 +1151,8 @@ public abstract class BasicSupport { } } - for (SupportType type : new SupportType[] { SupportType.TEXT, - SupportType.INFO_TEXT }) { + for (SupportType type : new SupportType[] { SupportType.INFO_TEXT, + SupportType.TEXT }) { BasicSupport support = getSupport(type); if (support != null && support.supports(url)) { return support; @@ -1074,6 +1188,8 @@ public abstract class BasicSupport { return new E621().setType(type); case CBZ: return new Cbz().setType(type); + case HTML: + return new Html().setType(type); } return null;