X-Git-Url: http://git.nikiroo.be/?p=fanfix.git;a=blobdiff_plain;f=src%2Fbe%2Fnikiroo%2Ffanfix%2Fsupported%2FBasicSupport_Deprecated.java;fp=src%2Fbe%2Fnikiroo%2Ffanfix%2Fsupported%2FBasicSupport_Deprecated.java;h=0000000000000000000000000000000000000000;hp=1faac03eba405657f6861f53660f2150357a4387;hb=0fc81e6465aa9c1f1dfc19b532082220d609768a;hpb=505be508ae7d3fb48122be548b310a238cfb91eb diff --git a/src/be/nikiroo/fanfix/supported/BasicSupport_Deprecated.java b/src/be/nikiroo/fanfix/supported/BasicSupport_Deprecated.java deleted file mode 100644 index 1faac03..0000000 --- a/src/be/nikiroo/fanfix/supported/BasicSupport_Deprecated.java +++ /dev/null @@ -1,1322 +0,0 @@ -package be.nikiroo.fanfix.supported; - -import java.io.BufferedReader; -import java.io.ByteArrayInputStream; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.net.MalformedURLException; -import java.net.URL; -import java.util.ArrayList; -import java.util.Date; -import java.util.List; -import java.util.Map.Entry; -import java.util.Scanner; - -import be.nikiroo.fanfix.Instance; -import be.nikiroo.fanfix.bundles.Config; -import be.nikiroo.fanfix.bundles.StringId; -import be.nikiroo.fanfix.data.Chapter; -import be.nikiroo.fanfix.data.MetaData; -import be.nikiroo.fanfix.data.Paragraph; -import be.nikiroo.fanfix.data.Paragraph.ParagraphType; -import be.nikiroo.fanfix.data.Story; -import be.nikiroo.utils.Image; -import be.nikiroo.utils.Progress; -import be.nikiroo.utils.StringUtils; - -/** - * DEPRECATED: use the new Jsoup 'Node' system. - *

- * This class is the base class used by the other support classes. It can be - * used outside of this package, and have static method that you can use to get - * access to the correct support class. - *

- * It will be used with 'resources' (usually web pages or files). - * - * @author niki - */ -@Deprecated -public abstract class BasicSupport_Deprecated extends BasicSupport { - private InputStream in; - - // quote chars - private char openQuote = Instance.getTrans().getCharacter( - StringId.OPEN_SINGLE_QUOTE); - private char closeQuote = Instance.getTrans().getCharacter( - StringId.CLOSE_SINGLE_QUOTE); - private char openDoubleQuote = Instance.getTrans().getCharacter( - StringId.OPEN_DOUBLE_QUOTE); - private char closeDoubleQuote = Instance.getTrans().getCharacter( - StringId.CLOSE_DOUBLE_QUOTE); - - // New methods not used in Deprecated mode - @Override - protected String getDesc() throws IOException { - throw new RuntimeException("should not be used by legacy code"); - } - - @Override - protected MetaData getMeta() throws IOException { - throw new RuntimeException("should not be used by legacy code"); - } - - @Override - protected List> getChapters(Progress pg) - throws IOException { - throw new RuntimeException("should not be used by legacy code"); - } - - @Override - protected String getChapterContent(URL chapUrl, int number, Progress pg) - throws IOException { - throw new RuntimeException("should not be used by legacy code"); - } - - @Override - public Story process(Progress pg) throws IOException { - return process(getSource(), pg); - } - - // - - /** - * Return the {@link MetaData} of this story. - * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * - * @return the associated {@link MetaData}, never NULL - * - * @throws IOException - * in case of I/O error - */ - protected abstract MetaData getMeta(URL source, InputStream in) - throws IOException; - - /** - * Return the story description. - * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * - * @return the description - * - * @throws IOException - * in case of I/O error - */ - protected abstract String getDesc(URL source, InputStream in) - throws IOException; - - /** - * Return the list of chapters (name and resource). - * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * @param pg - * the optional progress reporter - * - * @return the chapters - * - * @throws IOException - * in case of I/O error - */ - protected abstract List> getChapters(URL source, - InputStream in, Progress pg) throws IOException; - - /** - * Return the content of the chapter (possibly HTML encoded, if - * {@link BasicSupport_Deprecated#isHtml()} is TRUE). - * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * @param number - * the chapter number - * @param pg - * the optional progress reporter - * - * @return the content - * - * @throws IOException - * in case of I/O error - */ - protected abstract String getChapterContent(URL source, InputStream in, - int number, Progress pg) throws IOException; - - /** - * Process the given story resource into a partially filled {@link Story} - * object containing the name and metadata, except for the description. - * - * @param url - * the story resource - * - * @return the {@link Story} - * - * @throws IOException - * in case of I/O error - */ - public Story processMeta(URL url) throws IOException { - return processMeta(url, true, false, null); - } - - /** - * Process the given story resource into a partially filled {@link Story} - * object containing the name and metadata. - * - * @param url - * the story resource - * @param close - * close "this" and "in" when done - * @param getDesc - * retrieve the description of the story, or not - * @param pg - * the optional progress reporter - * - * @return the {@link Story}, never NULL - * - * @throws IOException - * in case of I/O error - */ - protected Story processMeta(URL url, boolean close, boolean getDesc, - Progress pg) throws IOException { - if (pg == null) { - pg = new Progress(); - } else { - pg.setMinMax(0, 100); - } - - login(); - pg.setProgress(10); - - url = getCanonicalUrl(url); - - setCurrentReferer(url); - - in = openInput(url); // NULL allowed here - try { - preprocess(url, getInput()); - pg.setProgress(30); - - Story story = new Story(); - MetaData meta = getMeta(url, getInput()); - if (meta.getCreationDate() == null - || meta.getCreationDate().isEmpty()) { - meta.setCreationDate(StringUtils.fromTime(new Date().getTime())); - } - story.setMeta(meta); - - pg.setProgress(50); - - if (meta.getCover() == null) { - meta.setCover(getDefaultCover(meta.getSubject())); - } - - pg.setProgress(60); - - if (getDesc) { - String descChapterName = Instance.getTrans().getString( - StringId.DESCRIPTION); - story.getMeta().setResume( - makeChapter(url, 0, descChapterName, - getDesc(url, getInput()), null)); - } - - pg.setProgress(100); - return story; - } finally { - if (close) { - close(); - - if (in != null) { - in.close(); - } - } - } - } - - /** - * Process the given story resource into a fully filled {@link Story} - * object. - * - * @param url - * the story resource - * @param pg - * the optional progress reporter - * - * @return the {@link Story}, never NULL - * - * @throws IOException - * in case of I/O error - */ - protected Story process(URL url, Progress pg) throws IOException { - if (pg == null) { - pg = new Progress(); - } else { - pg.setMinMax(0, 100); - } - - url = getCanonicalUrl(url); - pg.setProgress(1); - try { - Progress pgMeta = new Progress(); - pg.addProgress(pgMeta, 10); - Story story = processMeta(url, false, true, pgMeta); - if (!pgMeta.isDone()) { - pgMeta.setProgress(pgMeta.getMax()); // 10% - } - - pg.setName("Retrieving " + story.getMeta().getTitle()); - - setCurrentReferer(url); - - Progress pgGetChapters = new Progress(); - pg.addProgress(pgGetChapters, 10); - story.setChapters(new ArrayList()); - List> chapters = getChapters(url, getInput(), - pgGetChapters); - if (!pgGetChapters.isDone()) { - pgGetChapters.setProgress(pgGetChapters.getMax()); // 20% - } - - if (chapters != null) { - Progress pgChaps = new Progress("Extracting chapters", 0, - chapters.size() * 300); - pg.addProgress(pgChaps, 80); - - long words = 0; - int i = 1; - for (Entry chap : chapters) { - pgChaps.setName("Extracting chapter " + i); - InputStream chapIn = null; - if (chap.getValue() != null) { - setCurrentReferer(chap.getValue()); - chapIn = Instance.getCache().open(chap.getValue(), - this, false); - } - pgChaps.setProgress(i * 100); - try { - Progress pgGetChapterContent = new Progress(); - Progress pgMakeChapter = new Progress(); - pgChaps.addProgress(pgGetChapterContent, 100); - pgChaps.addProgress(pgMakeChapter, 100); - - String content = getChapterContent(url, chapIn, i, - pgGetChapterContent); - if (!pgGetChapterContent.isDone()) { - pgGetChapterContent.setProgress(pgGetChapterContent - .getMax()); - } - - Chapter cc = makeChapter(url, i, chap.getKey(), - content, pgMakeChapter); - if (!pgMakeChapter.isDone()) { - pgMakeChapter.setProgress(pgMakeChapter.getMax()); - } - - words += cc.getWords(); - story.getChapters().add(cc); - story.getMeta().setWords(words); - } finally { - if (chapIn != null) { - chapIn.close(); - } - } - - i++; - } - - pgChaps.setName("Extracting chapters"); - } else { - pg.setProgress(80); - } - - return story; - - } finally { - close(); - - if (in != null) { - in.close(); - } - } - } - - /** - * Prepare the support if needed before processing. - * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * - * @throws IOException - * on I/O error - */ - @SuppressWarnings("unused") - protected void preprocess(URL source, InputStream in) throws IOException { - } - - /** - * Create a {@link Chapter} object from the given information, formatting - * the content as it should be. - * - * @param source - * the source of the story - * @param number - * the chapter number - * @param name - * the chapter name - * @param content - * the chapter content - * @param pg - * the optional progress reporter - * - * @return the {@link Chapter} - * - * @throws IOException - * in case of I/O error - */ - protected Chapter makeChapter(URL source, int number, String name, - String content, Progress pg) throws IOException { - // Chapter name: process it correctly, then remove the possible - // redundant "Chapter x: " in front of it, or "-" (as in - // "Chapter 5: - Fun!" after the ": " was automatically added) - String chapterName = processPara(name).getContent().trim(); - for (String lang : Instance.getConfig().getList(Config.CONF_CHAPTER)) { - String chapterWord = Instance.getConfig().getStringX( - Config.CONF_CHAPTER, lang); - if (chapterName.startsWith(chapterWord)) { - chapterName = chapterName.substring(chapterWord.length()) - .trim(); - break; - } - } - - if (chapterName.startsWith(Integer.toString(number))) { - chapterName = chapterName.substring( - Integer.toString(number).length()).trim(); - } - - while (chapterName.startsWith(":") || chapterName.startsWith("-")) { - chapterName = chapterName.substring(1).trim(); - } - // - - Chapter chap = new Chapter(number, chapterName); - - if (content != null) { - List paras = makeParagraphs(source, content, pg); - long words = 0; - for (Paragraph para : paras) { - words += para.getWords(); - } - chap.setParagraphs(paras); - chap.setWords(words); - } - - return chap; - - } - - /** - * Convert the given content into {@link Paragraph}s. - * - * @param source - * the source URL of the story - * @param content - * the textual content - * @param pg - * the optional progress reporter - * - * @return the {@link Paragraph}s - * - * @throws IOException - * in case of I/O error - */ - protected List makeParagraphs(URL source, String content, - Progress pg) throws IOException { - if (pg == null) { - pg = new Progress(); - } - - if (isHtml()) { - // Special


processing: - content = content.replaceAll("(
]*>)|(
)|(
)", - "
* * *
"); - } - - List paras = new ArrayList(); - - if (content != null && !content.trim().isEmpty()) { - if (isHtml()) { - String[] tab = content.split("(

|

|
|
)"); - pg.setMinMax(0, tab.length); - int i = 1; - for (String line : tab) { - if (line.startsWith("[") && line.endsWith("]")) { - pg.setName("Extracting image " + i); - } - paras.add(makeParagraph(source, line.trim())); - pg.setProgress(i++); - } - pg.setName(null); - } else { - List lines = new ArrayList(); - BufferedReader buff = null; - try { - buff = new BufferedReader( - new InputStreamReader(new ByteArrayInputStream( - content.getBytes("UTF-8")), "UTF-8")); - for (String line = buff.readLine(); line != null; line = buff - .readLine()) { - lines.add(line.trim()); - } - } finally { - if (buff != null) { - buff.close(); - } - } - - pg.setMinMax(0, lines.size()); - int i = 0; - for (String line : lines) { - if (line.startsWith("[") && line.endsWith("]")) { - pg.setName("Extracting image " + i); - } - paras.add(makeParagraph(source, line)); - pg.setProgress(i++); - } - pg.setName(null); - } - - // Check quotes for "bad" format - List newParas = new ArrayList(); - for (Paragraph para : paras) { - newParas.addAll(requotify(para)); - } - paras = newParas; - - // Remove double blanks/brks - fixBlanksBreaks(paras); - } - - return paras; - } - - /** - * Convert the given line into a single {@link Paragraph}. - * - * @param source - * the source URL of the story - * @param line - * the textual content of the paragraph - * - * @return the {@link Paragraph} - */ - private Paragraph makeParagraph(URL source, String line) { - Image image = null; - if (line.startsWith("[") && line.endsWith("]")) { - image = getImage(this, source, line.substring(1, line.length() - 1) - .trim()); - } - - if (image != null) { - return new Paragraph(image); - } - - return processPara(line); - } - - /** - * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of - * those {@link Paragraph}s. - *

- * The resulting list will not contain a starting or trailing blank/break - * nor 2 blanks or breaks following each other. - * - * @param paras - * the list of {@link Paragraph}s to fix - */ - protected void fixBlanksBreaks(List paras) { - boolean space = false; - boolean brk = true; - for (int i = 0; i < paras.size(); i++) { - Paragraph para = paras.get(i); - boolean thisSpace = para.getType() == ParagraphType.BLANK; - boolean thisBrk = para.getType() == ParagraphType.BREAK; - - if (i > 0 && space && thisBrk) { - paras.remove(i - 1); - i--; - } else if ((space || brk) && (thisSpace || thisBrk)) { - paras.remove(i); - i--; - } - - space = thisSpace; - brk = thisBrk; - } - - // Remove blank/brk at start - if (paras.size() > 0 - && (paras.get(0).getType() == ParagraphType.BLANK || paras.get( - 0).getType() == ParagraphType.BREAK)) { - paras.remove(0); - } - - // Remove blank/brk at end - int last = paras.size() - 1; - if (paras.size() > 0 - && (paras.get(last).getType() == ParagraphType.BLANK || paras - .get(last).getType() == ParagraphType.BREAK)) { - paras.remove(last); - } - } - - /** - * Get the default cover related to this subject (see .info files). - * - * @param subject - * the subject - * - * @return the cover if any, or NULL - */ - static Image getDefaultCover(String subject) { - if (subject != null && !subject.isEmpty() - && Instance.getCoverDir() != null) { - try { - File fileCover = new File(Instance.getCoverDir(), subject); - return getImage(null, fileCover.toURI().toURL(), subject); - } catch (MalformedURLException e) { - } - } - - return null; - } - - /** - * Return the list of supported image extensions. - * - * @param emptyAllowed - * TRUE to allow an empty extension on first place, which can be - * used when you may already have an extension in your input but - * are not sure about it - * - * @return the extensions - */ - static String[] getImageExt(boolean emptyAllowed) { - if (emptyAllowed) { - return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" }; - } - - return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" }; - } - - /** - * Check if the given resource can be a local image or a remote image, then - * refresh the cache with it if it is. - * - * @param source - * the story source - * @param line - * the resource to check - * - * @return the image if found, or NULL - * - */ - static Image getImage(BasicSupport_Deprecated support, URL source, - String line) { - URL url = getImageUrl(support, source, line); - if (url != null) { - if ("file".equals(url.getProtocol())) { - if (new File(url.getPath()).isDirectory()) { - return null; - } - } - InputStream in = null; - try { - in = Instance.getCache().open(url, getSupport(url), true); - return new Image(in); - } catch (IOException e) { - } finally { - if (in != null) { - try { - in.close(); - } catch (IOException e) { - } - } - } - } - - return null; - } - - /** - * Check if the given resource can be a local image or a remote image, then - * refresh the cache with it if it is. - * - * @param source - * the story source - * @param line - * the resource to check - * - * @return the image URL if found, or NULL - * - */ - static URL getImageUrl(BasicSupport_Deprecated support, URL source, - String line) { - URL url = null; - - if (line != null) { - // try for files - if (source != null) { - try { - String relPath = null; - String absPath = null; - try { - String path = new File(source.getFile()).getParent(); - relPath = new File(new File(path), line.trim()) - .getAbsolutePath(); - } catch (Exception e) { - // Cannot be converted to path (one possibility to take - // into account: absolute path on Windows) - } - try { - absPath = new File(line.trim()).getAbsolutePath(); - } catch (Exception e) { - // Cannot be converted to path (at all) - } - - for (String ext : getImageExt(true)) { - File absFile = new File(absPath + ext); - File relFile = new File(relPath + ext); - if (absPath != null && absFile.exists() - && absFile.isFile()) { - url = absFile.toURI().toURL(); - } else if (relPath != null && relFile.exists() - && relFile.isFile()) { - url = relFile.toURI().toURL(); - } - } - } catch (Exception e) { - // Should not happen since we control the correct arguments - } - } - - if (url == null) { - // try for URLs - try { - for (String ext : getImageExt(true)) { - if (Instance.getCache() - .check(new URL(line + ext), true)) { - url = new URL(line + ext); - break; - } - } - - // try out of cache - if (url == null) { - for (String ext : getImageExt(true)) { - try { - url = new URL(line + ext); - Instance.getCache().refresh(url, support, true); - break; - } catch (IOException e) { - // no image with this ext - url = null; - } - } - } - } catch (MalformedURLException e) { - // Not an url - } - } - - // refresh the cached file - if (url != null) { - try { - Instance.getCache().refresh(url, support, true); - } catch (IOException e) { - // woops, broken image - url = null; - } - } - } - - return url; - } - - /** - * Open the input file that will be used through the support. - *

- * Can return NULL, in which case you are supposed to work without an - * {@link InputStream}. - * - * @param source - * the source {@link URL} - * - * @return the {@link InputStream} - * - * @throws IOException - * in case of I/O error - */ - protected InputStream openInput(URL source) throws IOException { - return Instance.getCache().open(source, this, false); - } - - /** - * Reset then return {@link BasicSupport_Deprecated#in}. - * - * @return {@link BasicSupport_Deprecated#in} - */ - protected InputStream getInput() { - return reset(in); - } - - /** - * Check quotes for bad format (i.e., quotes with normal paragraphs inside) - * and requotify them (i.e., separate them into QUOTE paragraphs and other - * paragraphs (quotes or not)). - * - * @param para - * the paragraph to requotify (not necessarily a quote) - * - * @return the correctly (or so we hope) quotified paragraphs - */ - protected List requotify(Paragraph para) { - List newParas = new ArrayList(); - - if (para.getType() == ParagraphType.QUOTE - && para.getContent().length() > 2) { - String line = para.getContent(); - boolean singleQ = line.startsWith("" + openQuote); - boolean doubleQ = line.startsWith("" + openDoubleQuote); - - // Do not try when more than one quote at a time - // (some stories are not easily readable if we do) - if (singleQ - && line.indexOf(closeQuote, 1) < line - .lastIndexOf(closeQuote)) { - newParas.add(para); - return newParas; - } - if (doubleQ - && line.indexOf(closeDoubleQuote, 1) < line - .lastIndexOf(closeDoubleQuote)) { - newParas.add(para); - return newParas; - } - // - - if (!singleQ && !doubleQ) { - line = openDoubleQuote + line + closeDoubleQuote; - newParas.add(new Paragraph(ParagraphType.QUOTE, line, para - .getWords())); - } else { - char open = singleQ ? openQuote : openDoubleQuote; - char close = singleQ ? closeQuote : closeDoubleQuote; - - int posDot = -1; - boolean inQuote = false; - int i = 0; - for (char car : line.toCharArray()) { - if (car == open) { - inQuote = true; - } else if (car == close) { - inQuote = false; - } else if (car == '.' && !inQuote) { - posDot = i; - break; - } - i++; - } - - if (posDot >= 0) { - String rest = line.substring(posDot + 1).trim(); - line = line.substring(0, posDot + 1).trim(); - long words = 1; - for (char car : line.toCharArray()) { - if (car == ' ') { - words++; - } - } - newParas.add(new Paragraph(ParagraphType.QUOTE, line, words)); - if (!rest.isEmpty()) { - newParas.addAll(requotify(processPara(rest))); - } - } else { - newParas.add(para); - } - } - } else { - newParas.add(para); - } - - return newParas; - } - - /** - * Process a {@link Paragraph} from a raw line of text. - *

- * Will also fix quotes and HTML encoding if needed. - * - * @param line - * the raw line - * - * @return the processed {@link Paragraph} - */ - protected Paragraph processPara(String line) { - line = ifUnhtml(line).trim(); - - boolean space = true; - boolean brk = true; - boolean quote = false; - boolean tentativeCloseQuote = false; - char prev = '\0'; - int dashCount = 0; - long words = 1; - - StringBuilder builder = new StringBuilder(); - for (char car : line.toCharArray()) { - if (car != '-') { - if (dashCount > 0) { - // dash, ndash and mdash: - – — - // currently: always use mdash - builder.append(dashCount == 1 ? '-' : '—'); - } - dashCount = 0; - } - - if (tentativeCloseQuote) { - tentativeCloseQuote = false; - if (Character.isLetterOrDigit(car)) { - builder.append("'"); - } else { - // handle double-single quotes as double quotes - if (prev == car) { - builder.append(closeDoubleQuote); - continue; - } - - builder.append(closeQuote); - } - } - - switch (car) { - case ' ': // note: unbreakable space - case ' ': - case '\t': - case '\n': // just in case - case '\r': // just in case - if (builder.length() > 0 - && builder.charAt(builder.length() - 1) != ' ') { - words++; - } - builder.append(' '); - break; - - case '\'': - if (space || (brk && quote)) { - quote = true; - // handle double-single quotes as double quotes - if (prev == car) { - builder.deleteCharAt(builder.length() - 1); - builder.append(openDoubleQuote); - } else { - builder.append(openQuote); - } - } else if (prev == ' ' || prev == car) { - // handle double-single quotes as double quotes - if (prev == car) { - builder.deleteCharAt(builder.length() - 1); - builder.append(openDoubleQuote); - } else { - builder.append(openQuote); - } - } else { - // it is a quote ("I'm off") or a 'quote' ("This - // 'good' restaurant"...) - tentativeCloseQuote = true; - } - break; - - case '"': - if (space || (brk && quote)) { - quote = true; - builder.append(openDoubleQuote); - } else if (prev == ' ') { - builder.append(openDoubleQuote); - } else { - builder.append(closeDoubleQuote); - } - break; - - case '-': - if (space) { - quote = true; - } else { - dashCount++; - } - space = false; - break; - - case '*': - case '~': - case '/': - case '\\': - case '<': - case '>': - case '=': - case '+': - case '_': - case '–': - case '—': - space = false; - builder.append(car); - break; - - case '‘': - case '`': - case '‹': - case '﹁': - case '〈': - case '「': - if (space || (brk && quote)) { - quote = true; - builder.append(openQuote); - } else { - // handle double-single quotes as double quotes - if (prev == car) { - builder.deleteCharAt(builder.length() - 1); - builder.append(openDoubleQuote); - } else { - builder.append(openQuote); - } - } - space = false; - brk = false; - break; - - case '’': - case '›': - case '﹂': - case '〉': - case '」': - space = false; - brk = false; - // handle double-single quotes as double quotes - if (prev == car) { - builder.deleteCharAt(builder.length() - 1); - builder.append(closeDoubleQuote); - } else { - builder.append(closeQuote); - } - break; - - case '«': - case '“': - case '﹃': - case '《': - case '『': - if (space || (brk && quote)) { - quote = true; - builder.append(openDoubleQuote); - } else { - builder.append(openDoubleQuote); - } - space = false; - brk = false; - break; - - case '»': - case '”': - case '﹄': - case '》': - case '』': - space = false; - brk = false; - builder.append(closeDoubleQuote); - break; - - default: - space = false; - brk = false; - builder.append(car); - break; - } - - prev = car; - } - - if (tentativeCloseQuote) { - tentativeCloseQuote = false; - builder.append(closeQuote); - } - - line = builder.toString().trim(); - - ParagraphType type = ParagraphType.NORMAL; - if (space) { - type = ParagraphType.BLANK; - } else if (brk) { - type = ParagraphType.BREAK; - } else if (quote) { - type = ParagraphType.QUOTE; - } - - return new Paragraph(type, line, words); - } - - /** - * Remove the HTML from the input if - * {@link BasicSupport_Deprecated#isHtml()} is true. - * - * @param input - * the input - * - * @return the no html version if needed - */ - private String ifUnhtml(String input) { - if (isHtml() && input != null) { - return StringUtils.unhtml(input); - } - - return input; - } - - /** - * Reset the given {@link InputStream} and return it. - * - * @param in - * the {@link InputStream} to reset - * - * @return the same {@link InputStream} after reset - */ - static protected InputStream reset(InputStream in) { - try { - if (in != null) { - in.reset(); - } - } catch (IOException e) { - } - - return in; - } - - /** - * Return the first line from the given input which correspond to the given - * selectors. - * - * @param in - * the input - * @param needle - * a string that must be found inside the target line (also - * supports "^" at start to say "only if it starts with" the - * needle) - * @param relativeLine - * the line to return based upon the target line position (-1 = - * the line before, 0 = the target line...) - * - * @return the line, or NULL if not found - */ - static protected String getLine(InputStream in, String needle, - int relativeLine) { - return getLine(in, needle, relativeLine, true); - } - - /** - * Return a line from the given input which correspond to the given - * selectors. - * - * @param in - * the input - * @param needle - * a string that must be found inside the target line (also - * supports "^" at start to say "only if it starts with" the - * needle) - * @param relativeLine - * the line to return based upon the target line position (-1 = - * the line before, 0 = the target line...) - * @param first - * takes the first result (as opposed to the last one, which will - * also always spend the input) - * - * @return the line, or NULL if not found - */ - static protected String getLine(InputStream in, String needle, - int relativeLine, boolean first) { - String rep = null; - - reset(in); - - List lines = new ArrayList(); - @SuppressWarnings("resource") - Scanner scan = new Scanner(in, "UTF-8"); - int index = -1; - scan.useDelimiter("\\n"); - while (scan.hasNext()) { - lines.add(scan.next()); - - if (index == -1) { - if (needle.startsWith("^")) { - if (lines.get(lines.size() - 1).startsWith( - needle.substring(1))) { - index = lines.size() - 1; - } - - } else { - if (lines.get(lines.size() - 1).contains(needle)) { - index = lines.size() - 1; - } - } - } - - if (index >= 0 && index + relativeLine < lines.size()) { - rep = lines.get(index + relativeLine); - if (first) { - break; - } - } - } - - return rep; - } - - /** - * Return the text between the key and the endKey (and optional subKey can - * be passed, in this case we will look for the key first, then take the - * text between the subKey and the endKey). - *

- * Will only match the first line with the given key if more than one are - * possible. Which also means that if the subKey or endKey is not found on - * that line, NULL will be returned. - * - * @param in - * the input - * @param key - * the key to match (also supports "^" at start to say - * "only if it starts with" the key) - * @param subKey - * the sub key or NULL if none - * @param endKey - * the end key or NULL for "up to the end" - * @return the text or NULL if not found - */ - static protected String getKeyLine(InputStream in, String key, - String subKey, String endKey) { - return getKeyText(getLine(in, key, 0), key, subKey, endKey); - } - - /** - * Return the text between the key and the endKey (and optional subKey can - * be passed, in this case we will look for the key first, then take the - * text between the subKey and the endKey). - * - * @param in - * the input - * @param key - * the key to match (also supports "^" at start to say - * "only if it starts with" the key) - * @param subKey - * the sub key or NULL if none - * @param endKey - * the end key or NULL for "up to the end" - * @return the text or NULL if not found - */ - static protected String getKeyText(String in, String key, String subKey, - String endKey) { - String result = null; - - String line = in; - if (line != null && line.contains(key)) { - line = line.substring(line.indexOf(key) + key.length()); - if (subKey == null || subKey.isEmpty() || line.contains(subKey)) { - if (subKey != null) { - line = line.substring(line.indexOf(subKey) - + subKey.length()); - } - if (endKey == null || line.contains(endKey)) { - if (endKey != null) { - line = line.substring(0, line.indexOf(endKey)); - result = line; - } - } - } - } - - return result; - } - - /** - * Return the text between the key and the endKey (optional subKeys can be - * passed, in this case we will look for the subKeys first, then take the - * text between the key and the endKey). - * - * @param in - * the input - * @param key - * the key to match - * @param endKey - * the end key or NULL for "up to the end" - * @param afters - * the sub-keys to find before checking for key/endKey - * - * @return the text or NULL if not found - */ - static protected String getKeyTextAfter(String in, String key, - String endKey, String... afters) { - - if (in != null && !in.isEmpty()) { - int pos = indexOfAfter(in, 0, afters); - if (pos < 0) { - return null; - } - - in = in.substring(pos); - } - - return getKeyText(in, key, null, endKey); - } - - /** - * Return the first index after all the given "afters" have been found in - * the {@link String}, or -1 if it was not possible. - * - * @param in - * the input - * @param startAt - * start at this position in the string - * @param afters - * the sub-keys to find before checking for key/endKey - * - * @return the text or NULL if not found - */ - static protected int indexOfAfter(String in, int startAt, String... afters) { - int pos = -1; - if (in != null && !in.isEmpty()) { - pos = startAt; - if (afters != null) { - for (int i = 0; pos >= 0 && i < afters.length; i++) { - String subKey = afters[i]; - if (!subKey.isEmpty()) { - pos = in.indexOf(subKey, pos); - if (pos >= 0) { - pos += subKey.length(); - } - } - } - } - } - - return pos; - } -}