X-Git-Url: http://git.nikiroo.be/?a=blobdiff_plain;f=src%2Fbe%2Fnikiroo%2Ffanfix%2Fsupported%2FBasicSupport_Deprecated.java;fp=src%2Fbe%2Fnikiroo%2Ffanfix%2Fsupported%2FBasicSupport_Deprecated.java;h=591ba58d1f630727283762de72f9c27a0e4df664;hb=0ffa47548f474c1330d8d723300d9aa7a4894736;hp=0000000000000000000000000000000000000000;hpb=ecfb936ef1c22ff75a55d8fc80e9daf767a55f34;p=fanfix.git diff --git a/src/be/nikiroo/fanfix/supported/BasicSupport_Deprecated.java b/src/be/nikiroo/fanfix/supported/BasicSupport_Deprecated.java new file mode 100644 index 0000000..591ba58 --- /dev/null +++ b/src/be/nikiroo/fanfix/supported/BasicSupport_Deprecated.java @@ -0,0 +1,1325 @@ +package be.nikiroo.fanfix.supported; + +import java.io.BufferedReader; +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Date; +import java.util.List; +import java.util.Map.Entry; +import java.util.Scanner; + +import be.nikiroo.fanfix.Instance; +import be.nikiroo.fanfix.bundles.Config; +import be.nikiroo.fanfix.bundles.StringId; +import be.nikiroo.fanfix.data.Chapter; +import be.nikiroo.fanfix.data.MetaData; +import be.nikiroo.fanfix.data.Paragraph; +import be.nikiroo.fanfix.data.Paragraph.ParagraphType; +import be.nikiroo.fanfix.data.Story; +import be.nikiroo.utils.Image; +import be.nikiroo.utils.Progress; +import be.nikiroo.utils.StringUtils; + +/** + * DEPRECATED: use the new Jsoup 'Node' system. + *

+ * This class is the base class used by the other support classes. It can be + * used outside of this package, and have static method that you can use to get + * access to the correct support class. + *

+ * It will be used with 'resources' (usually web pages or files). + * + * @author niki + */ +@Deprecated +public abstract class BasicSupport_Deprecated extends BasicSupport { + private InputStream in; + private URL currentReferer; // with only one 'r', as in 'HTTP'... + + // quote chars + private char openQuote = Instance.getTrans().getCharacter( + StringId.OPEN_SINGLE_QUOTE); + private char closeQuote = Instance.getTrans().getCharacter( + StringId.CLOSE_SINGLE_QUOTE); + private char openDoubleQuote = Instance.getTrans().getCharacter( + StringId.OPEN_DOUBLE_QUOTE); + private char closeDoubleQuote = Instance.getTrans().getCharacter( + StringId.CLOSE_DOUBLE_QUOTE); + + // New methods not used in Deprecated mode + @Override + protected String getDesc() throws IOException { + throw new RuntimeException("should not be used by legacy code"); + } + + @Override + protected MetaData getMeta() throws IOException { + throw new RuntimeException("should not be used by legacy code"); + } + + @Override + protected List> getChapters(Progress pg) + throws IOException { + throw new RuntimeException("should not be used by legacy code"); + } + + @Override + protected String getChapterContent(URL chapUrl, int number, Progress pg) + throws IOException { + throw new RuntimeException("should not be used by legacy code"); + } + + @Override + public Story process(Progress pg) throws IOException { + return process(getSource(), pg); + } + + // + + /** + * Return the {@link MetaData} of this story. + * + * @param source + * the source of the story + * @param in + * the input (the main resource) + * + * @return the associated {@link MetaData}, never NULL + * + * @throws IOException + * in case of I/O error + */ + protected abstract MetaData getMeta(URL source, InputStream in) + throws IOException; + + /** + * Return the story description. + * + * @param source + * the source of the story + * @param in + * the input (the main resource) + * + * @return the description + * + * @throws IOException + * in case of I/O error + */ + protected abstract String getDesc(URL source, InputStream in) + throws IOException; + + /** + * Return the list of chapters (name and resource). + * + * @param source + * the source of the story + * @param in + * the input (the main resource) + * @param pg + * the optional progress reporter + * + * @return the chapters + * + * @throws IOException + * in case of I/O error + */ + protected abstract List> getChapters(URL source, + InputStream in, Progress pg) throws IOException; + + /** + * Return the content of the chapter (possibly HTML encoded, if + * {@link BasicSupport_Deprecated#isHtml()} is TRUE). + * + * @param source + * the source of the story + * @param in + * the input (the main resource) + * @param number + * the chapter number + * @param pg + * the optional progress reporter + * + * @return the content + * + * @throws IOException + * in case of I/O error + */ + protected abstract String getChapterContent(URL source, InputStream in, + int number, Progress pg) throws IOException; + + /** + * Process the given story resource into a partially filled {@link Story} + * object containing the name and metadata, except for the description. + * + * @param url + * the story resource + * + * @return the {@link Story} + * + * @throws IOException + * in case of I/O error + */ + public Story processMeta(URL url) throws IOException { + return processMeta(url, true, false, null); + } + + /** + * Process the given story resource into a partially filled {@link Story} + * object containing the name and metadata. + * + * @param url + * the story resource + * @param close + * close "this" and "in" when done + * @param getDesc + * retrieve the description of the story, or not + * @param pg + * the optional progress reporter + * + * @return the {@link Story}, never NULL + * + * @throws IOException + * in case of I/O error + */ + protected Story processMeta(URL url, boolean close, boolean getDesc, + Progress pg) throws IOException { + if (pg == null) { + pg = new Progress(); + } else { + pg.setMinMax(0, 100); + } + + login(); + pg.setProgress(10); + + url = getCanonicalUrl(url); + + setCurrentReferer(url); + + in = openInput(url); // NULL allowed here + try { + preprocess(url, getInput()); + pg.setProgress(30); + + Story story = new Story(); + MetaData meta = getMeta(url, getInput()); + if (meta.getCreationDate() == null + || meta.getCreationDate().isEmpty()) { + meta.setCreationDate(StringUtils.fromTime(new Date().getTime())); + } + story.setMeta(meta); + + pg.setProgress(50); + + if (meta.getCover() == null) { + meta.setCover(getDefaultCover(meta.getSubject())); + } + + pg.setProgress(60); + + if (getDesc) { + String descChapterName = Instance.getTrans().getString( + StringId.DESCRIPTION); + story.getMeta().setResume( + makeChapter(url, 0, descChapterName, + getDesc(url, getInput()), null)); + } + + pg.setProgress(100); + return story; + } finally { + if (close) { + close(); + + if (in != null) { + in.close(); + } + } + } + } + + /** + * Process the given story resource into a fully filled {@link Story} + * object. + * + * @param url + * the story resource + * @param pg + * the optional progress reporter + * + * @return the {@link Story}, never NULL + * + * @throws IOException + * in case of I/O error + */ + protected Story process(URL url, Progress pg) throws IOException { + if (pg == null) { + pg = new Progress(); + } else { + pg.setMinMax(0, 100); + } + + url = getCanonicalUrl(url); + pg.setProgress(1); + try { + Progress pgMeta = new Progress(); + pg.addProgress(pgMeta, 10); + Story story = processMeta(url, false, true, pgMeta); + if (!pgMeta.isDone()) { + pgMeta.setProgress(pgMeta.getMax()); // 10% + } + + pg.setName("Retrieving " + story.getMeta().getTitle()); + + setCurrentReferer(url); + + Progress pgGetChapters = new Progress(); + pg.addProgress(pgGetChapters, 10); + story.setChapters(new ArrayList()); + List> chapters = getChapters(url, getInput(), + pgGetChapters); + if (!pgGetChapters.isDone()) { + pgGetChapters.setProgress(pgGetChapters.getMax()); // 20% + } + + if (chapters != null) { + Progress pgChaps = new Progress("Extracting chapters", 0, + chapters.size() * 300); + pg.addProgress(pgChaps, 80); + + long words = 0; + int i = 1; + for (Entry chap : chapters) { + pgChaps.setName("Extracting chapter " + i); + InputStream chapIn = null; + if (chap.getValue() != null) { + setCurrentReferer(chap.getValue()); + chapIn = Instance.getCache().open(chap.getValue(), + this, false); + } + pgChaps.setProgress(i * 100); + try { + Progress pgGetChapterContent = new Progress(); + Progress pgMakeChapter = new Progress(); + pgChaps.addProgress(pgGetChapterContent, 100); + pgChaps.addProgress(pgMakeChapter, 100); + + String content = getChapterContent(url, chapIn, i, + pgGetChapterContent); + if (!pgGetChapterContent.isDone()) { + pgGetChapterContent.setProgress(pgGetChapterContent + .getMax()); + } + + Chapter cc = makeChapter(url, i, chap.getKey(), + content, pgMakeChapter); + if (!pgMakeChapter.isDone()) { + pgMakeChapter.setProgress(pgMakeChapter.getMax()); + } + + words += cc.getWords(); + story.getChapters().add(cc); + story.getMeta().setWords(words); + } finally { + if (chapIn != null) { + chapIn.close(); + } + } + + i++; + } + + pgChaps.setName("Extracting chapters"); + } else { + pg.setProgress(80); + } + + return story; + + } finally { + close(); + + if (in != null) { + in.close(); + } + } + } + + /** + * Prepare the support if needed before processing. + * + * @param source + * the source of the story + * @param in + * the input (the main resource) + * + * @throws IOException + * on I/O error + */ + @SuppressWarnings("unused") + protected void preprocess(URL source, InputStream in) throws IOException { + } + + /** + * Create a {@link Chapter} object from the given information, formatting + * the content as it should be. + * + * @param source + * the source of the story + * @param number + * the chapter number + * @param name + * the chapter name + * @param content + * the chapter content + * @param pg + * the optional progress reporter + * + * @return the {@link Chapter} + * + * @throws IOException + * in case of I/O error + */ + protected Chapter makeChapter(URL source, int number, String name, + String content, Progress pg) throws IOException { + // Chapter name: process it correctly, then remove the possible + // redundant "Chapter x: " in front of it, or "-" (as in + // "Chapter 5: - Fun!" after the ": " was automatically added) + String chapterName = processPara(name).getContent().trim(); + for (String lang : Instance.getConfig().getString(Config.CHAPTER) + .split(",")) { + String chapterWord = Instance.getConfig().getStringX( + Config.CHAPTER, lang); + if (chapterName.startsWith(chapterWord)) { + chapterName = chapterName.substring(chapterWord.length()) + .trim(); + break; + } + } + + if (chapterName.startsWith(Integer.toString(number))) { + chapterName = chapterName.substring( + Integer.toString(number).length()).trim(); + } + + while (chapterName.startsWith(":") || chapterName.startsWith("-")) { + chapterName = chapterName.substring(1).trim(); + } + // + + Chapter chap = new Chapter(number, chapterName); + + if (content != null) { + List paras = makeParagraphs(source, content, pg); + long words = 0; + for (Paragraph para : paras) { + words += para.getWords(); + } + chap.setParagraphs(paras); + chap.setWords(words); + } + + return chap; + + } + + /** + * Convert the given content into {@link Paragraph}s. + * + * @param source + * the source URL of the story + * @param content + * the textual content + * @param pg + * the optional progress reporter + * + * @return the {@link Paragraph}s + * + * @throws IOException + * in case of I/O error + */ + protected List makeParagraphs(URL source, String content, + Progress pg) throws IOException { + if (pg == null) { + pg = new Progress(); + } + + if (isHtml()) { + // Special


processing: + content = content.replaceAll("(
]*>)|(
)|(
)", + "
* * *
"); + } + + List paras = new ArrayList(); + + if (content != null && !content.trim().isEmpty()) { + if (isHtml()) { + String[] tab = content.split("(

|

|
|
)"); + pg.setMinMax(0, tab.length); + int i = 1; + for (String line : tab) { + if (line.startsWith("[") && line.endsWith("]")) { + pg.setName("Extracting image " + i); + } + paras.add(makeParagraph(source, line.trim())); + pg.setProgress(i++); + } + pg.setName(null); + } else { + List lines = new ArrayList(); + BufferedReader buff = null; + try { + buff = new BufferedReader( + new InputStreamReader(new ByteArrayInputStream( + content.getBytes("UTF-8")), "UTF-8")); + for (String line = buff.readLine(); line != null; line = buff + .readLine()) { + lines.add(line.trim()); + } + } finally { + if (buff != null) { + buff.close(); + } + } + + pg.setMinMax(0, lines.size()); + int i = 0; + for (String line : lines) { + if (line.startsWith("[") && line.endsWith("]")) { + pg.setName("Extracting image " + i); + } + paras.add(makeParagraph(source, line)); + pg.setProgress(i++); + } + pg.setName(null); + } + + // Check quotes for "bad" format + List newParas = new ArrayList(); + for (Paragraph para : paras) { + newParas.addAll(requotify(para)); + } + paras = newParas; + + // Remove double blanks/brks + fixBlanksBreaks(paras); + } + + return paras; + } + + /** + * Convert the given line into a single {@link Paragraph}. + * + * @param source + * the source URL of the story + * @param line + * the textual content of the paragraph + * + * @return the {@link Paragraph} + */ + private Paragraph makeParagraph(URL source, String line) { + Image image = null; + if (line.startsWith("[") && line.endsWith("]")) { + image = getImage(this, source, line.substring(1, line.length() - 1) + .trim()); + } + + if (image != null) { + return new Paragraph(image); + } + + return processPara(line); + } + + /** + * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of + * those {@link Paragraph}s. + *

+ * The resulting list will not contain a starting or trailing blank/break + * nor 2 blanks or breaks following each other. + * + * @param paras + * the list of {@link Paragraph}s to fix + */ + protected void fixBlanksBreaks(List paras) { + boolean space = false; + boolean brk = true; + for (int i = 0; i < paras.size(); i++) { + Paragraph para = paras.get(i); + boolean thisSpace = para.getType() == ParagraphType.BLANK; + boolean thisBrk = para.getType() == ParagraphType.BREAK; + + if (i > 0 && space && thisBrk) { + paras.remove(i - 1); + i--; + } else if ((space || brk) && (thisSpace || thisBrk)) { + paras.remove(i); + i--; + } + + space = thisSpace; + brk = thisBrk; + } + + // Remove blank/brk at start + if (paras.size() > 0 + && (paras.get(0).getType() == ParagraphType.BLANK || paras.get( + 0).getType() == ParagraphType.BREAK)) { + paras.remove(0); + } + + // Remove blank/brk at end + int last = paras.size() - 1; + if (paras.size() > 0 + && (paras.get(last).getType() == ParagraphType.BLANK || paras + .get(last).getType() == ParagraphType.BREAK)) { + paras.remove(last); + } + } + + /** + * Get the default cover related to this subject (see .info files). + * + * @param subject + * the subject + * + * @return the cover if any, or NULL + */ + static Image getDefaultCover(String subject) { + if (subject != null && !subject.isEmpty() + && Instance.getCoverDir() != null) { + try { + File fileCover = new File(Instance.getCoverDir(), subject); + return getImage(null, fileCover.toURI().toURL(), subject); + } catch (MalformedURLException e) { + } + } + + return null; + } + + /** + * Return the list of supported image extensions. + * + * @param emptyAllowed + * TRUE to allow an empty extension on first place, which can be + * used when you may already have an extension in your input but + * are not sure about it + * + * @return the extensions + */ + static String[] getImageExt(boolean emptyAllowed) { + if (emptyAllowed) { + return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" }; + } + + return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" }; + } + + /** + * Check if the given resource can be a local image or a remote image, then + * refresh the cache with it if it is. + * + * @param source + * the story source + * @param line + * the resource to check + * + * @return the image if found, or NULL + * + */ + static Image getImage(BasicSupport_Deprecated support, URL source, + String line) { + URL url = getImageUrl(support, source, line); + if (url != null) { + if ("file".equals(url.getProtocol())) { + if (new File(url.getPath()).isDirectory()) { + return null; + } + } + InputStream in = null; + try { + in = Instance.getCache().open(url, getSupport(url), true); + return new Image(in); + } catch (IOException e) { + } finally { + if (in != null) { + try { + in.close(); + } catch (IOException e) { + } + } + } + } + + return null; + } + + /** + * Check if the given resource can be a local image or a remote image, then + * refresh the cache with it if it is. + * + * @param source + * the story source + * @param line + * the resource to check + * + * @return the image URL if found, or NULL + * + */ + static URL getImageUrl(BasicSupport_Deprecated support, URL source, + String line) { + URL url = null; + + if (line != null) { + // try for files + if (source != null) { + try { + + String relPath = null; + String absPath = null; + try { + String path = new File(source.getFile()).getParent(); + relPath = new File(new File(path), line.trim()) + .getAbsolutePath(); + } catch (Exception e) { + // Cannot be converted to path (one possibility to take + // into account: absolute path on Windows) + } + try { + absPath = new File(line.trim()).getAbsolutePath(); + } catch (Exception e) { + // Cannot be converted to path (at all) + } + + for (String ext : getImageExt(true)) { + File absFile = new File(absPath + ext); + File relFile = new File(relPath + ext); + if (absPath != null && absFile.exists() + && absFile.isFile()) { + url = absFile.toURI().toURL(); + } else if (relPath != null && relFile.exists() + && relFile.isFile()) { + url = relFile.toURI().toURL(); + } + } + } catch (Exception e) { + // Should not happen since we control the correct arguments + } + } + + if (url == null) { + // try for URLs + try { + for (String ext : getImageExt(true)) { + if (Instance.getCache() + .check(new URL(line + ext), true)) { + url = new URL(line + ext); + break; + } + } + + // try out of cache + if (url == null) { + for (String ext : getImageExt(true)) { + try { + url = new URL(line + ext); + Instance.getCache().refresh(url, support, true); + break; + } catch (IOException e) { + // no image with this ext + url = null; + } + } + } + } catch (MalformedURLException e) { + // Not an url + } + } + + // refresh the cached file + if (url != null) { + try { + Instance.getCache().refresh(url, support, true); + } catch (IOException e) { + // woops, broken image + url = null; + } + } + } + + return url; + } + + /** + * Open the input file that will be used through the support. + *

+ * Can return NULL, in which case you are supposed to work without an + * {@link InputStream}. + * + * @param source + * the source {@link URL} + * + * @return the {@link InputStream} + * + * @throws IOException + * in case of I/O error + */ + protected InputStream openInput(URL source) throws IOException { + return Instance.getCache().open(source, this, false); + } + + /** + * Reset then return {@link BasicSupport_Deprecated#in}. + * + * @return {@link BasicSupport_Deprecated#in} + */ + protected InputStream getInput() { + return reset(in); + } + + /** + * Check quotes for bad format (i.e., quotes with normal paragraphs inside) + * and requotify them (i.e., separate them into QUOTE paragraphs and other + * paragraphs (quotes or not)). + * + * @param para + * the paragraph to requotify (not necessarily a quote) + * + * @return the correctly (or so we hope) quotified paragraphs + */ + protected List requotify(Paragraph para) { + List newParas = new ArrayList(); + + if (para.getType() == ParagraphType.QUOTE + && para.getContent().length() > 2) { + String line = para.getContent(); + boolean singleQ = line.startsWith("" + openQuote); + boolean doubleQ = line.startsWith("" + openDoubleQuote); + + // Do not try when more than one quote at a time + // (some stories are not easily readable if we do) + if (singleQ + && line.indexOf(closeQuote, 1) < line + .lastIndexOf(closeQuote)) { + newParas.add(para); + return newParas; + } + if (doubleQ + && line.indexOf(closeDoubleQuote, 1) < line + .lastIndexOf(closeDoubleQuote)) { + newParas.add(para); + return newParas; + } + // + + if (!singleQ && !doubleQ) { + line = openDoubleQuote + line + closeDoubleQuote; + newParas.add(new Paragraph(ParagraphType.QUOTE, line, para + .getWords())); + } else { + char open = singleQ ? openQuote : openDoubleQuote; + char close = singleQ ? closeQuote : closeDoubleQuote; + + int posDot = -1; + boolean inQuote = false; + int i = 0; + for (char car : line.toCharArray()) { + if (car == open) { + inQuote = true; + } else if (car == close) { + inQuote = false; + } else if (car == '.' && !inQuote) { + posDot = i; + break; + } + i++; + } + + if (posDot >= 0) { + String rest = line.substring(posDot + 1).trim(); + line = line.substring(0, posDot + 1).trim(); + long words = 1; + for (char car : line.toCharArray()) { + if (car == ' ') { + words++; + } + } + newParas.add(new Paragraph(ParagraphType.QUOTE, line, words)); + if (!rest.isEmpty()) { + newParas.addAll(requotify(processPara(rest))); + } + } else { + newParas.add(para); + } + } + } else { + newParas.add(para); + } + + return newParas; + } + + /** + * Process a {@link Paragraph} from a raw line of text. + *

+ * Will also fix quotes and HTML encoding if needed. + * + * @param line + * the raw line + * + * @return the processed {@link Paragraph} + */ + protected Paragraph processPara(String line) { + line = ifUnhtml(line).trim(); + + boolean space = true; + boolean brk = true; + boolean quote = false; + boolean tentativeCloseQuote = false; + char prev = '\0'; + int dashCount = 0; + long words = 1; + + StringBuilder builder = new StringBuilder(); + for (char car : line.toCharArray()) { + if (car != '-') { + if (dashCount > 0) { + // dash, ndash and mdash: - – — + // currently: always use mdash + builder.append(dashCount == 1 ? '-' : '—'); + } + dashCount = 0; + } + + if (tentativeCloseQuote) { + tentativeCloseQuote = false; + if (Character.isLetterOrDigit(car)) { + builder.append("'"); + } else { + // handle double-single quotes as double quotes + if (prev == car) { + builder.append(closeDoubleQuote); + continue; + } + + builder.append(closeQuote); + } + } + + switch (car) { + case ' ': // note: unbreakable space + case ' ': + case '\t': + case '\n': // just in case + case '\r': // just in case + if (builder.length() > 0 + && builder.charAt(builder.length() - 1) != ' ') { + words++; + } + builder.append(' '); + break; + + case '\'': + if (space || (brk && quote)) { + quote = true; + // handle double-single quotes as double quotes + if (prev == car) { + builder.deleteCharAt(builder.length() - 1); + builder.append(openDoubleQuote); + } else { + builder.append(openQuote); + } + } else if (prev == ' ' || prev == car) { + // handle double-single quotes as double quotes + if (prev == car) { + builder.deleteCharAt(builder.length() - 1); + builder.append(openDoubleQuote); + } else { + builder.append(openQuote); + } + } else { + // it is a quote ("I'm off") or a 'quote' ("This + // 'good' restaurant"...) + tentativeCloseQuote = true; + } + break; + + case '"': + if (space || (brk && quote)) { + quote = true; + builder.append(openDoubleQuote); + } else if (prev == ' ') { + builder.append(openDoubleQuote); + } else { + builder.append(closeDoubleQuote); + } + break; + + case '-': + if (space) { + quote = true; + } else { + dashCount++; + } + space = false; + break; + + case '*': + case '~': + case '/': + case '\\': + case '<': + case '>': + case '=': + case '+': + case '_': + case '–': + case '—': + space = false; + builder.append(car); + break; + + case '‘': + case '`': + case '‹': + case '﹁': + case '〈': + case '「': + if (space || (brk && quote)) { + quote = true; + builder.append(openQuote); + } else { + // handle double-single quotes as double quotes + if (prev == car) { + builder.deleteCharAt(builder.length() - 1); + builder.append(openDoubleQuote); + } else { + builder.append(openQuote); + } + } + space = false; + brk = false; + break; + + case '’': + case '›': + case '﹂': + case '〉': + case '」': + space = false; + brk = false; + // handle double-single quotes as double quotes + if (prev == car) { + builder.deleteCharAt(builder.length() - 1); + builder.append(closeDoubleQuote); + } else { + builder.append(closeQuote); + } + break; + + case '«': + case '“': + case '﹃': + case '《': + case '『': + if (space || (brk && quote)) { + quote = true; + builder.append(openDoubleQuote); + } else { + builder.append(openDoubleQuote); + } + space = false; + brk = false; + break; + + case '»': + case '”': + case '﹄': + case '》': + case '』': + space = false; + brk = false; + builder.append(closeDoubleQuote); + break; + + default: + space = false; + brk = false; + builder.append(car); + break; + } + + prev = car; + } + + if (tentativeCloseQuote) { + tentativeCloseQuote = false; + builder.append(closeQuote); + } + + line = builder.toString().trim(); + + ParagraphType type = ParagraphType.NORMAL; + if (space) { + type = ParagraphType.BLANK; + } else if (brk) { + type = ParagraphType.BREAK; + } else if (quote) { + type = ParagraphType.QUOTE; + } + + return new Paragraph(type, line, words); + } + + /** + * Remove the HTML from the input if + * {@link BasicSupport_Deprecated#isHtml()} is true. + * + * @param input + * the input + * + * @return the no html version if needed + */ + private String ifUnhtml(String input) { + if (isHtml() && input != null) { + return StringUtils.unhtml(input); + } + + return input; + } + + /** + * Reset the given {@link InputStream} and return it. + * + * @param in + * the {@link InputStream} to reset + * + * @return the same {@link InputStream} after reset + */ + static protected InputStream reset(InputStream in) { + try { + if (in != null) { + in.reset(); + } + } catch (IOException e) { + } + + return in; + } + + /** + * Return the first line from the given input which correspond to the given + * selectors. + * + * @param in + * the input + * @param needle + * a string that must be found inside the target line (also + * supports "^" at start to say "only if it starts with" the + * needle) + * @param relativeLine + * the line to return based upon the target line position (-1 = + * the line before, 0 = the target line...) + * + * @return the line + */ + static protected String getLine(InputStream in, String needle, + int relativeLine) { + return getLine(in, needle, relativeLine, true); + } + + /** + * Return a line from the given input which correspond to the given + * selectors. + * + * @param in + * the input + * @param needle + * a string that must be found inside the target line (also + * supports "^" at start to say "only if it starts with" the + * needle) + * @param relativeLine + * the line to return based upon the target line position (-1 = + * the line before, 0 = the target line...) + * @param first + * takes the first result (as opposed to the last one, which will + * also always spend the input) + * + * @return the line + */ + static protected String getLine(InputStream in, String needle, + int relativeLine, boolean first) { + String rep = null; + + reset(in); + + List lines = new ArrayList(); + @SuppressWarnings("resource") + Scanner scan = new Scanner(in, "UTF-8"); + int index = -1; + scan.useDelimiter("\\n"); + while (scan.hasNext()) { + lines.add(scan.next()); + + if (index == -1) { + if (needle.startsWith("^")) { + if (lines.get(lines.size() - 1).startsWith( + needle.substring(1))) { + index = lines.size() - 1; + } + + } else { + if (lines.get(lines.size() - 1).contains(needle)) { + index = lines.size() - 1; + } + } + } + + if (index >= 0 && index + relativeLine < lines.size()) { + rep = lines.get(index + relativeLine); + if (first) { + break; + } + } + } + + return rep; + } + + /** + * Return the text between the key and the endKey (and optional subKey can + * be passed, in this case we will look for the key first, then take the + * text between the subKey and the endKey). + *

+ * Will only match the first line with the given key if more than one are + * possible. Which also means that if the subKey or endKey is not found on + * that line, NULL will be returned. + * + * @param in + * the input + * @param key + * the key to match (also supports "^" at start to say + * "only if it starts with" the key) + * @param subKey + * the sub key or NULL if none + * @param endKey + * the end key or NULL for "up to the end" + * @return the text or NULL if not found + */ + static protected String getKeyLine(InputStream in, String key, + String subKey, String endKey) { + return getKeyText(getLine(in, key, 0), key, subKey, endKey); + } + + /** + * Return the text between the key and the endKey (and optional subKey can + * be passed, in this case we will look for the key first, then take the + * text between the subKey and the endKey). + * + * @param in + * the input + * @param key + * the key to match (also supports "^" at start to say + * "only if it starts with" the key) + * @param subKey + * the sub key or NULL if none + * @param endKey + * the end key or NULL for "up to the end" + * @return the text or NULL if not found + */ + static protected String getKeyText(String in, String key, String subKey, + String endKey) { + String result = null; + + String line = in; + if (line != null && line.contains(key)) { + line = line.substring(line.indexOf(key) + key.length()); + if (subKey == null || subKey.isEmpty() || line.contains(subKey)) { + if (subKey != null) { + line = line.substring(line.indexOf(subKey) + + subKey.length()); + } + if (endKey == null || line.contains(endKey)) { + if (endKey != null) { + line = line.substring(0, line.indexOf(endKey)); + result = line; + } + } + } + } + + return result; + } + + /** + * Return the text between the key and the endKey (optional subKeys can be + * passed, in this case we will look for the subKeys first, then take the + * text between the key and the endKey). + * + * @param in + * the input + * @param key + * the key to match + * @param endKey + * the end key or NULL for "up to the end" + * @param afters + * the sub-keys to find before checking for key/endKey + * + * @return the text or NULL if not found + */ + static protected String getKeyTextAfter(String in, String key, + String endKey, String... afters) { + + if (in != null && !in.isEmpty()) { + int pos = indexOfAfter(in, 0, afters); + if (pos < 0) { + return null; + } + + in = in.substring(pos); + } + + return getKeyText(in, key, null, endKey); + } + + /** + * Return the first index after all the given "afters" have been found in + * the {@link String}, or -1 if it was not possible. + * + * @param in + * the input + * @param startAt + * start at this position in the string + * @param afters + * the sub-keys to find before checking for key/endKey + * + * @return the text or NULL if not found + */ + static protected int indexOfAfter(String in, int startAt, String... afters) { + int pos = -1; + if (in != null && !in.isEmpty()) { + pos = startAt; + if (afters != null) { + for (int i = 0; pos >= 0 && i < afters.length; i++) { + String subKey = afters[i]; + if (!subKey.isEmpty()) { + pos = in.indexOf(subKey, pos); + if (pos >= 0) { + pos += subKey.length(); + } + } + } + } + } + + return pos; + } +}