X-Git-Url: http://git.nikiroo.be/?a=blobdiff_plain;f=src%2Fbe%2Fnikiroo%2Ffanfix%2Fsupported%2FBasicSupportPara.java;h=1dbedc9cc326b0bdcb18e11753c8d78ccc611e22;hb=98b95fb81566ca8b04c8d891a02c8019d8bed63d;hp=ef4d7d778f192ed10335366886768517399d9f3a;hpb=d3ab274f9152618a8bc21a0a20caed1a8520b224;p=fanfix.git diff --git a/src/be/nikiroo/fanfix/supported/BasicSupportPara.java b/src/be/nikiroo/fanfix/supported/BasicSupportPara.java deleted file mode 100644 index ef4d7d7..0000000 --- a/src/be/nikiroo/fanfix/supported/BasicSupportPara.java +++ /dev/null @@ -1,584 +0,0 @@ -package be.nikiroo.fanfix.supported; - -import java.io.BufferedReader; -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.net.URL; -import java.util.ArrayList; -import java.util.List; - -import be.nikiroo.fanfix.Instance; -import be.nikiroo.fanfix.bundles.Config; -import be.nikiroo.fanfix.bundles.StringId; -import be.nikiroo.fanfix.data.Chapter; -import be.nikiroo.fanfix.data.Paragraph; -import be.nikiroo.fanfix.data.Paragraph.ParagraphType; -import be.nikiroo.utils.Image; -import be.nikiroo.utils.Progress; -import be.nikiroo.utils.StringUtils; - -/** - * Helper class for {@link BasicSupport}, mostly dedicated to {@link Paragraph} - * and text formating for the {@link BasicSupport} class. - * - * @author niki - */ -public class BasicSupportPara { - // quote chars - private static char openQuote = Instance.getTrans().getCharacter( - StringId.OPEN_SINGLE_QUOTE); - private static char closeQuote = Instance.getTrans().getCharacter( - StringId.CLOSE_SINGLE_QUOTE); - private static char openDoubleQuote = Instance.getTrans().getCharacter( - StringId.OPEN_DOUBLE_QUOTE); - private static char closeDoubleQuote = Instance.getTrans().getCharacter( - StringId.CLOSE_DOUBLE_QUOTE); - - // used by this class: - BasicSupportHelper bsHelper; - BasicSupportImages bsImages; - - public BasicSupportPara(BasicSupportHelper bsHelper, BasicSupportImages bsImages) { - this.bsHelper = bsHelper; - this.bsImages = bsImages; - } - - /** - * Create a {@link Chapter} object from the given information, formatting - * the content as it should be. - * - * @param support - * the linked {@link BasicSupport} - * @param source - * the source of the story (for image lookup in the same path if - * the source is a file, can be NULL) - * @param number - * the chapter number - * @param name - * the chapter name - * @param content - * the chapter content - * @param pg - * the optional progress reporter - * @param html - * TRUE if the input content is in HTML mode - * - * @return the {@link Chapter} - * - * @throws IOException - * in case of I/O error - */ - public Chapter makeChapter(BasicSupport support, URL source, - int number, String name, String content, boolean html, Progress pg) - throws IOException { - // Chapter name: process it correctly, then remove the possible - // redundant "Chapter x: " in front of it, or "-" (as in - // "Chapter 5: - Fun!" after the ": " was automatically added) - String chapterName = processPara(name, false) - .getContent().trim(); - for (String lang : Instance.getConfig().getList(Config.CONF_CHAPTER)) { - String chapterWord = Instance.getConfig().getStringX( - Config.CONF_CHAPTER, lang); - if (chapterName.startsWith(chapterWord)) { - chapterName = chapterName.substring(chapterWord.length()) - .trim(); - break; - } - } - - if (chapterName.startsWith(Integer.toString(number))) { - chapterName = chapterName.substring( - Integer.toString(number).length()).trim(); - } - - while (chapterName.startsWith(":") || chapterName.startsWith("-")) { - chapterName = chapterName.substring(1).trim(); - } - // - - Chapter chap = new Chapter(number, chapterName); - - if (content != null) { - List paras = makeParagraphs(support, source, content, - html, pg); - long words = 0; - for (Paragraph para : paras) { - words += para.getWords(); - } - chap.setParagraphs(paras); - chap.setWords(words); - } - - return chap; - } - - /** - * Check quotes for bad format (i.e., quotes with normal paragraphs inside) - * and requotify them (i.e., separate them into QUOTE paragraphs and other - * paragraphs (quotes or not)). - * - * @param para - * the paragraph to requotify (not necessarily a quote) - * @param html - * TRUE if the input content is in HTML mode - * - * @return the correctly (or so we hope) quotified paragraphs - */ - protected List requotify(Paragraph para, boolean html) { - List newParas = new ArrayList(); - - if (para.getType() == ParagraphType.QUOTE - && para.getContent().length() > 2) { - String line = para.getContent(); - boolean singleQ = line.startsWith("" + openQuote); - boolean doubleQ = line.startsWith("" + openDoubleQuote); - - // Do not try when more than one quote at a time - // (some stories are not easily readable if we do) - if (singleQ - && line.indexOf(closeQuote, 1) < line - .lastIndexOf(closeQuote)) { - newParas.add(para); - return newParas; - } - if (doubleQ - && line.indexOf(closeDoubleQuote, 1) < line - .lastIndexOf(closeDoubleQuote)) { - newParas.add(para); - return newParas; - } - // - - if (!singleQ && !doubleQ) { - line = openDoubleQuote + line + closeDoubleQuote; - newParas.add(new Paragraph(ParagraphType.QUOTE, line, para - .getWords())); - } else { - char open = singleQ ? openQuote : openDoubleQuote; - char close = singleQ ? closeQuote : closeDoubleQuote; - - int posDot = -1; - boolean inQuote = false; - int i = 0; - for (char car : line.toCharArray()) { - if (car == open) { - inQuote = true; - } else if (car == close) { - inQuote = false; - } else if (car == '.' && !inQuote) { - posDot = i; - break; - } - i++; - } - - if (posDot >= 0) { - String rest = line.substring(posDot + 1).trim(); - line = line.substring(0, posDot + 1).trim(); - long words = 1; - for (char car : line.toCharArray()) { - if (car == ' ') { - words++; - } - } - newParas.add(new Paragraph(ParagraphType.QUOTE, line, words)); - if (!rest.isEmpty()) { - newParas.addAll(requotify(processPara(rest, html), html)); - } - } else { - newParas.add(para); - } - } - } else { - newParas.add(para); - } - - return newParas; - } - - /** - * Process a {@link Paragraph} from a raw line of text. - *

- * Will also fix quotes and HTML encoding if needed. - * - * @param line - * the raw line - * @param html - * TRUE if the input content is in HTML mode - * - * @return the processed {@link Paragraph} - */ - protected Paragraph processPara(String line, boolean html) { - if (html) { - line = StringUtils.unhtml(line).trim(); - } - boolean space = true; - boolean brk = true; - boolean quote = false; - boolean tentativeCloseQuote = false; - char prev = '\0'; - int dashCount = 0; - long words = 1; - - StringBuilder builder = new StringBuilder(); - for (char car : line.toCharArray()) { - if (car != '-') { - if (dashCount > 0) { - // dash, ndash and mdash: - – — - // currently: always use mdash - builder.append(dashCount == 1 ? '-' : '—'); - } - dashCount = 0; - } - - if (tentativeCloseQuote) { - tentativeCloseQuote = false; - if (Character.isLetterOrDigit(car)) { - builder.append("'"); - } else { - // handle double-single quotes as double quotes - if (prev == car) { - builder.append(closeDoubleQuote); - continue; - } - - builder.append(closeQuote); - } - } - - switch (car) { - case ' ': // note: unbreakable space - case ' ': - case '\t': - case '\n': // just in case - case '\r': // just in case - if (builder.length() > 0 - && builder.charAt(builder.length() - 1) != ' ') { - words++; - } - builder.append(' '); - break; - - case '\'': - if (space || (brk && quote)) { - quote = true; - // handle double-single quotes as double quotes - if (prev == car) { - builder.deleteCharAt(builder.length() - 1); - builder.append(openDoubleQuote); - } else { - builder.append(openQuote); - } - } else if (prev == ' ' || prev == car) { - // handle double-single quotes as double quotes - if (prev == car) { - builder.deleteCharAt(builder.length() - 1); - builder.append(openDoubleQuote); - } else { - builder.append(openQuote); - } - } else { - // it is a quote ("I'm off") or a 'quote' ("This - // 'good' restaurant"...) - tentativeCloseQuote = true; - } - break; - - case '"': - if (space || (brk && quote)) { - quote = true; - builder.append(openDoubleQuote); - } else if (prev == ' ') { - builder.append(openDoubleQuote); - } else { - builder.append(closeDoubleQuote); - } - break; - - case '-': - if (space) { - quote = true; - } else { - dashCount++; - } - space = false; - break; - - case '*': - case '~': - case '/': - case '\\': - case '<': - case '>': - case '=': - case '+': - case '_': - case '–': - case '—': - space = false; - builder.append(car); - break; - - case '‘': - case '`': - case '‹': - case '﹁': - case '〈': - case '「': - if (space || (brk && quote)) { - quote = true; - builder.append(openQuote); - } else { - // handle double-single quotes as double quotes - if (prev == car) { - builder.deleteCharAt(builder.length() - 1); - builder.append(openDoubleQuote); - } else { - builder.append(openQuote); - } - } - space = false; - brk = false; - break; - - case '’': - case '›': - case '﹂': - case '〉': - case '」': - space = false; - brk = false; - // handle double-single quotes as double quotes - if (prev == car) { - builder.deleteCharAt(builder.length() - 1); - builder.append(closeDoubleQuote); - } else { - builder.append(closeQuote); - } - break; - - case '«': - case '“': - case '﹃': - case '《': - case '『': - if (space || (brk && quote)) { - quote = true; - builder.append(openDoubleQuote); - } else { - builder.append(openDoubleQuote); - } - space = false; - brk = false; - break; - - case '»': - case '”': - case '﹄': - case '》': - case '』': - space = false; - brk = false; - builder.append(closeDoubleQuote); - break; - - default: - space = false; - brk = false; - builder.append(car); - break; - } - - prev = car; - } - - if (tentativeCloseQuote) { - tentativeCloseQuote = false; - builder.append(closeQuote); - } - - line = builder.toString().trim(); - - ParagraphType type = ParagraphType.NORMAL; - if (space) { - type = ParagraphType.BLANK; - } else if (brk) { - type = ParagraphType.BREAK; - } else if (quote) { - type = ParagraphType.QUOTE; - } - - return new Paragraph(type, line, words); - } - - /** - * Convert the given content into {@link Paragraph}s. - * - * @param support - * the linked {@link BasicSupport} (can be NULL), used to - * download optional image content in [] - * @param source - * the source URL of the story (for image lookup in the same path - * if the source is a file, can be NULL) - * @param content - * the textual content - * @param html - * TRUE if the input content is in HTML mode - * @param pg - * the optional progress reporter - * - * @return the {@link Paragraph}s - * - * @throws IOException - * in case of I/O error - */ - protected List makeParagraphs(BasicSupport support, - URL source, String content, boolean html, Progress pg) - throws IOException { - if (pg == null) { - pg = new Progress(); - } - - if (html) { - // Special


processing: - content = content.replaceAll("(
]*>)|(
)|(
)", - "
* * *
"); - } - - List paras = new ArrayList(); - - if (content != null && !content.trim().isEmpty()) { - if (html) { - String[] tab = content.split("(

|

|
|
)"); - pg.setMinMax(0, tab.length); - int i = 1; - for (String line : tab) { - if (line.startsWith("[") && line.endsWith("]")) { - pg.setName("Extracting image " + i); - } - paras.add(makeParagraph(support, source, line.trim(), html)); - pg.setProgress(i++); - } - } else { - List lines = new ArrayList(); - BufferedReader buff = null; - try { - buff = new BufferedReader( - new InputStreamReader(new ByteArrayInputStream( - content.getBytes("UTF-8")), "UTF-8")); - for (String line = buff.readLine(); line != null; line = buff - .readLine()) { - lines.add(line.trim()); - } - } finally { - if (buff != null) { - buff.close(); - } - } - - pg.setMinMax(0, lines.size()); - int i = 0; - for (String line : lines) { - if (line.startsWith("[") && line.endsWith("]")) { - pg.setName("Extracting image " + i); - } - paras.add(makeParagraph(support, source, line, html)); - pg.setProgress(i++); - } - } - - pg.done(); - pg.setName(null); - - // Check quotes for "bad" format - List newParas = new ArrayList(); - for (Paragraph para : paras) { - newParas.addAll(requotify(para, html)); - } - paras = newParas; - - // Remove double blanks/brks - fixBlanksBreaks(paras); - } - - return paras; - } - - /** - * Convert the given line into a single {@link Paragraph}. - * - * @param support - * the linked {@link BasicSupport} (can be NULL), used to - * download optional image content in [] - * @param source - * the source URL of the story (for image lookup in the same path - * if the source is a file, can be NULL) - * @param line - * the textual content of the paragraph - * @param html - * TRUE if the input content is in HTML mode - * - * @return the {@link Paragraph} - */ - protected Paragraph makeParagraph(BasicSupport support, URL source, - String line, boolean html) { - Image image = null; - if (line.startsWith("[") && line.endsWith("]")) { - image = bsHelper.getImage(support, source, line - .substring(1, line.length() - 1).trim()); - } - - if (image != null) { - return new Paragraph(image); - } - - return processPara(line, html); - } - - /** - * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of - * those {@link Paragraph}s. - *

- * The resulting list will not contain a starting or trailing blank/break - * nor 2 blanks or breaks following each other. - * - * @param paras - * the list of {@link Paragraph}s to fix - */ - protected void fixBlanksBreaks(List paras) { - boolean space = false; - boolean brk = true; - for (int i = 0; i < paras.size(); i++) { - Paragraph para = paras.get(i); - boolean thisSpace = para.getType() == ParagraphType.BLANK; - boolean thisBrk = para.getType() == ParagraphType.BREAK; - - if (i > 0 && space && thisBrk) { - paras.remove(i - 1); - i--; - } else if ((space || brk) && (thisSpace || thisBrk)) { - paras.remove(i); - i--; - } - - space = thisSpace; - brk = thisBrk; - } - - // Remove blank/brk at start - if (paras.size() > 0 - && (paras.get(0).getType() == ParagraphType.BLANK || paras.get( - 0).getType() == ParagraphType.BREAK)) { - paras.remove(0); - } - - // Remove blank/brk at end - int last = paras.size() - 1; - if (paras.size() > 0 - && (paras.get(last).getType() == ParagraphType.BLANK || paras - .get(last).getType() == ParagraphType.BREAK)) { - paras.remove(last); - } - } -}