X-Git-Url: http://git.nikiroo.be/?a=blobdiff_plain;f=src%2Fbe%2Fnikiroo%2Ffanfix%2Fsupported%2FBasicSupport_Deprecated.java;fp=src%2Fbe%2Fnikiroo%2Ffanfix%2Fsupported%2FBasicSupport_Deprecated.java;h=591ba58d1f630727283762de72f9c27a0e4df664;hb=0ffa47548f474c1330d8d723300d9aa7a4894736;hp=0000000000000000000000000000000000000000;hpb=ecfb936ef1c22ff75a55d8fc80e9daf767a55f34;p=fanfix.git

diff --git a/src/be/nikiroo/fanfix/supported/BasicSupport_Deprecated.java b/src/be/nikiroo/fanfix/supported/BasicSupport_Deprecated.java
new file mode 100644
index 0000000..591ba58
--- /dev/null
+++ b/src/be/nikiroo/fanfix/supported/BasicSupport_Deprecated.java
@@ -0,0 +1,1325 @@
+package be.nikiroo.fanfix.supported;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.List;
+import java.util.Map.Entry;
+import java.util.Scanner;
+
+import be.nikiroo.fanfix.Instance;
+import be.nikiroo.fanfix.bundles.Config;
+import be.nikiroo.fanfix.bundles.StringId;
+import be.nikiroo.fanfix.data.Chapter;
+import be.nikiroo.fanfix.data.MetaData;
+import be.nikiroo.fanfix.data.Paragraph;
+import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
+import be.nikiroo.fanfix.data.Story;
+import be.nikiroo.utils.Image;
+import be.nikiroo.utils.Progress;
+import be.nikiroo.utils.StringUtils;
+
+/**
+ * DEPRECATED: use the new Jsoup 'Node' system.
+ * <p>
+ * This class is the base class used by the other support classes. It can be
+ * used outside of this package, and have static method that you can use to get
+ * access to the correct support class.
+ * <p>
+ * It will be used with 'resources' (usually web pages or files).
+ * 
+ * @author niki
+ */
+@Deprecated
+public abstract class BasicSupport_Deprecated extends BasicSupport {
+	private InputStream in;
+	private URL currentReferer; // with only one 'r', as in 'HTTP'...
+
+	// quote chars
+	private char openQuote = Instance.getTrans().getCharacter(
+			StringId.OPEN_SINGLE_QUOTE);
+	private char closeQuote = Instance.getTrans().getCharacter(
+			StringId.CLOSE_SINGLE_QUOTE);
+	private char openDoubleQuote = Instance.getTrans().getCharacter(
+			StringId.OPEN_DOUBLE_QUOTE);
+	private char closeDoubleQuote = Instance.getTrans().getCharacter(
+			StringId.CLOSE_DOUBLE_QUOTE);
+
+	// New methods not used in Deprecated mode
+	@Override
+	protected String getDesc() throws IOException {
+		throw new RuntimeException("should not be used by legacy code");
+	}
+
+	@Override
+	protected MetaData getMeta() throws IOException {
+		throw new RuntimeException("should not be used by legacy code");
+	}
+
+	@Override
+	protected List<Entry<String, URL>> getChapters(Progress pg)
+			throws IOException {
+		throw new RuntimeException("should not be used by legacy code");
+	}
+
+	@Override
+	protected String getChapterContent(URL chapUrl, int number, Progress pg)
+			throws IOException {
+		throw new RuntimeException("should not be used by legacy code");
+	}
+
+	@Override
+	public Story process(Progress pg) throws IOException {
+		return process(getSource(), pg);
+	}
+
+	//
+
+	/**
+	 * Return the {@link MetaData} of this story.
+	 * 
+	 * @param source
+	 *            the source of the story
+	 * @param in
+	 *            the input (the main resource)
+	 * 
+	 * @return the associated {@link MetaData}, never NULL
+	 * 
+	 * @throws IOException
+	 *             in case of I/O error
+	 */
+	protected abstract MetaData getMeta(URL source, InputStream in)
+			throws IOException;
+
+	/**
+	 * Return the story description.
+	 * 
+	 * @param source
+	 *            the source of the story
+	 * @param in
+	 *            the input (the main resource)
+	 * 
+	 * @return the description
+	 * 
+	 * @throws IOException
+	 *             in case of I/O error
+	 */
+	protected abstract String getDesc(URL source, InputStream in)
+			throws IOException;
+
+	/**
+	 * Return the list of chapters (name and resource).
+	 * 
+	 * @param source
+	 *            the source of the story
+	 * @param in
+	 *            the input (the main resource)
+	 * @param pg
+	 *            the optional progress reporter
+	 * 
+	 * @return the chapters
+	 * 
+	 * @throws IOException
+	 *             in case of I/O error
+	 */
+	protected abstract List<Entry<String, URL>> getChapters(URL source,
+			InputStream in, Progress pg) throws IOException;
+
+	/**
+	 * Return the content of the chapter (possibly HTML encoded, if
+	 * {@link BasicSupport_Deprecated#isHtml()} is TRUE).
+	 * 
+	 * @param source
+	 *            the source of the story
+	 * @param in
+	 *            the input (the main resource)
+	 * @param number
+	 *            the chapter number
+	 * @param pg
+	 *            the optional progress reporter
+	 * 
+	 * @return the content
+	 * 
+	 * @throws IOException
+	 *             in case of I/O error
+	 */
+	protected abstract String getChapterContent(URL source, InputStream in,
+			int number, Progress pg) throws IOException;
+
+	/**
+	 * Process the given story resource into a partially filled {@link Story}
+	 * object containing the name and metadata, except for the description.
+	 * 
+	 * @param url
+	 *            the story resource
+	 * 
+	 * @return the {@link Story}
+	 * 
+	 * @throws IOException
+	 *             in case of I/O error
+	 */
+	public Story processMeta(URL url) throws IOException {
+		return processMeta(url, true, false, null);
+	}
+
+	/**
+	 * Process the given story resource into a partially filled {@link Story}
+	 * object containing the name and metadata.
+	 * 
+	 * @param url
+	 *            the story resource
+	 * @param close
+	 *            close "this" and "in" when done
+	 * @param getDesc
+	 *            retrieve the description of the story, or not
+	 * @param pg
+	 *            the optional progress reporter
+	 * 
+	 * @return the {@link Story}, never NULL
+	 * 
+	 * @throws IOException
+	 *             in case of I/O error
+	 */
+	protected Story processMeta(URL url, boolean close, boolean getDesc,
+			Progress pg) throws IOException {
+		if (pg == null) {
+			pg = new Progress();
+		} else {
+			pg.setMinMax(0, 100);
+		}
+
+		login();
+		pg.setProgress(10);
+
+		url = getCanonicalUrl(url);
+
+		setCurrentReferer(url);
+
+		in = openInput(url); // NULL allowed here
+		try {
+			preprocess(url, getInput());
+			pg.setProgress(30);
+
+			Story story = new Story();
+			MetaData meta = getMeta(url, getInput());
+			if (meta.getCreationDate() == null
+					|| meta.getCreationDate().isEmpty()) {
+				meta.setCreationDate(StringUtils.fromTime(new Date().getTime()));
+			}
+			story.setMeta(meta);
+
+			pg.setProgress(50);
+
+			if (meta.getCover() == null) {
+				meta.setCover(getDefaultCover(meta.getSubject()));
+			}
+
+			pg.setProgress(60);
+
+			if (getDesc) {
+				String descChapterName = Instance.getTrans().getString(
+						StringId.DESCRIPTION);
+				story.getMeta().setResume(
+						makeChapter(url, 0, descChapterName,
+								getDesc(url, getInput()), null));
+			}
+
+			pg.setProgress(100);
+			return story;
+		} finally {
+			if (close) {
+				close();
+
+				if (in != null) {
+					in.close();
+				}
+			}
+		}
+	}
+
+	/**
+	 * Process the given story resource into a fully filled {@link Story}
+	 * object.
+	 * 
+	 * @param url
+	 *            the story resource
+	 * @param pg
+	 *            the optional progress reporter
+	 * 
+	 * @return the {@link Story}, never NULL
+	 * 
+	 * @throws IOException
+	 *             in case of I/O error
+	 */
+	protected Story process(URL url, Progress pg) throws IOException {
+		if (pg == null) {
+			pg = new Progress();
+		} else {
+			pg.setMinMax(0, 100);
+		}
+
+		url = getCanonicalUrl(url);
+		pg.setProgress(1);
+		try {
+			Progress pgMeta = new Progress();
+			pg.addProgress(pgMeta, 10);
+			Story story = processMeta(url, false, true, pgMeta);
+			if (!pgMeta.isDone()) {
+				pgMeta.setProgress(pgMeta.getMax()); // 10%
+			}
+
+			pg.setName("Retrieving " + story.getMeta().getTitle());
+
+			setCurrentReferer(url);
+
+			Progress pgGetChapters = new Progress();
+			pg.addProgress(pgGetChapters, 10);
+			story.setChapters(new ArrayList<Chapter>());
+			List<Entry<String, URL>> chapters = getChapters(url, getInput(),
+					pgGetChapters);
+			if (!pgGetChapters.isDone()) {
+				pgGetChapters.setProgress(pgGetChapters.getMax()); // 20%
+			}
+
+			if (chapters != null) {
+				Progress pgChaps = new Progress("Extracting chapters", 0,
+						chapters.size() * 300);
+				pg.addProgress(pgChaps, 80);
+
+				long words = 0;
+				int i = 1;
+				for (Entry<String, URL> chap : chapters) {
+					pgChaps.setName("Extracting chapter " + i);
+					InputStream chapIn = null;
+					if (chap.getValue() != null) {
+						setCurrentReferer(chap.getValue());
+						chapIn = Instance.getCache().open(chap.getValue(),
+								this, false);
+					}
+					pgChaps.setProgress(i * 100);
+					try {
+						Progress pgGetChapterContent = new Progress();
+						Progress pgMakeChapter = new Progress();
+						pgChaps.addProgress(pgGetChapterContent, 100);
+						pgChaps.addProgress(pgMakeChapter, 100);
+
+						String content = getChapterContent(url, chapIn, i,
+								pgGetChapterContent);
+						if (!pgGetChapterContent.isDone()) {
+							pgGetChapterContent.setProgress(pgGetChapterContent
+									.getMax());
+						}
+
+						Chapter cc = makeChapter(url, i, chap.getKey(),
+								content, pgMakeChapter);
+						if (!pgMakeChapter.isDone()) {
+							pgMakeChapter.setProgress(pgMakeChapter.getMax());
+						}
+
+						words += cc.getWords();
+						story.getChapters().add(cc);
+						story.getMeta().setWords(words);
+					} finally {
+						if (chapIn != null) {
+							chapIn.close();
+						}
+					}
+
+					i++;
+				}
+
+				pgChaps.setName("Extracting chapters");
+			} else {
+				pg.setProgress(80);
+			}
+
+			return story;
+
+		} finally {
+			close();
+
+			if (in != null) {
+				in.close();
+			}
+		}
+	}
+
+	/**
+	 * Prepare the support if needed before processing.
+	 * 
+	 * @param source
+	 *            the source of the story
+	 * @param in
+	 *            the input (the main resource)
+	 * 
+	 * @throws IOException
+	 *             on I/O error
+	 */
+	@SuppressWarnings("unused")
+	protected void preprocess(URL source, InputStream in) throws IOException {
+	}
+
+	/**
+	 * Create a {@link Chapter} object from the given information, formatting
+	 * the content as it should be.
+	 * 
+	 * @param source
+	 *            the source of the story
+	 * @param number
+	 *            the chapter number
+	 * @param name
+	 *            the chapter name
+	 * @param content
+	 *            the chapter content
+	 * @param pg
+	 *            the optional progress reporter
+	 * 
+	 * @return the {@link Chapter}
+	 * 
+	 * @throws IOException
+	 *             in case of I/O error
+	 */
+	protected Chapter makeChapter(URL source, int number, String name,
+			String content, Progress pg) throws IOException {
+		// Chapter name: process it correctly, then remove the possible
+		// redundant "Chapter x: " in front of it, or "-" (as in
+		// "Chapter 5: - Fun!" after the ": " was automatically added)
+		String chapterName = processPara(name).getContent().trim();
+		for (String lang : Instance.getConfig().getString(Config.CHAPTER)
+				.split(",")) {
+			String chapterWord = Instance.getConfig().getStringX(
+					Config.CHAPTER, lang);
+			if (chapterName.startsWith(chapterWord)) {
+				chapterName = chapterName.substring(chapterWord.length())
+						.trim();
+				break;
+			}
+		}
+
+		if (chapterName.startsWith(Integer.toString(number))) {
+			chapterName = chapterName.substring(
+					Integer.toString(number).length()).trim();
+		}
+
+		while (chapterName.startsWith(":") || chapterName.startsWith("-")) {
+			chapterName = chapterName.substring(1).trim();
+		}
+		//
+
+		Chapter chap = new Chapter(number, chapterName);
+
+		if (content != null) {
+			List<Paragraph> paras = makeParagraphs(source, content, pg);
+			long words = 0;
+			for (Paragraph para : paras) {
+				words += para.getWords();
+			}
+			chap.setParagraphs(paras);
+			chap.setWords(words);
+		}
+
+		return chap;
+
+	}
+
+	/**
+	 * Convert the given content into {@link Paragraph}s.
+	 * 
+	 * @param source
+	 *            the source URL of the story
+	 * @param content
+	 *            the textual content
+	 * @param pg
+	 *            the optional progress reporter
+	 * 
+	 * @return the {@link Paragraph}s
+	 * 
+	 * @throws IOException
+	 *             in case of I/O error
+	 */
+	protected List<Paragraph> makeParagraphs(URL source, String content,
+			Progress pg) throws IOException {
+		if (pg == null) {
+			pg = new Progress();
+		}
+
+		if (isHtml()) {
+			// Special <HR> processing:
+			content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
+					"<br/>* * *<br/>");
+		}
+
+		List<Paragraph> paras = new ArrayList<Paragraph>();
+
+		if (content != null && !content.trim().isEmpty()) {
+			if (isHtml()) {
+				String[] tab = content.split("(<p>|</p>|<br>|<br/>)");
+				pg.setMinMax(0, tab.length);
+				int i = 1;
+				for (String line : tab) {
+					if (line.startsWith("[") && line.endsWith("]")) {
+						pg.setName("Extracting image " + i);
+					}
+					paras.add(makeParagraph(source, line.trim()));
+					pg.setProgress(i++);
+				}
+				pg.setName(null);
+			} else {
+				List<String> lines = new ArrayList<String>();
+				BufferedReader buff = null;
+				try {
+					buff = new BufferedReader(
+							new InputStreamReader(new ByteArrayInputStream(
+									content.getBytes("UTF-8")), "UTF-8"));
+					for (String line = buff.readLine(); line != null; line = buff
+							.readLine()) {
+						lines.add(line.trim());
+					}
+				} finally {
+					if (buff != null) {
+						buff.close();
+					}
+				}
+
+				pg.setMinMax(0, lines.size());
+				int i = 0;
+				for (String line : lines) {
+					if (line.startsWith("[") && line.endsWith("]")) {
+						pg.setName("Extracting image " + i);
+					}
+					paras.add(makeParagraph(source, line));
+					pg.setProgress(i++);
+				}
+				pg.setName(null);
+			}
+
+			// Check quotes for "bad" format
+			List<Paragraph> newParas = new ArrayList<Paragraph>();
+			for (Paragraph para : paras) {
+				newParas.addAll(requotify(para));
+			}
+			paras = newParas;
+
+			// Remove double blanks/brks
+			fixBlanksBreaks(paras);
+		}
+
+		return paras;
+	}
+
+	/**
+	 * Convert the given line into a single {@link Paragraph}.
+	 * 
+	 * @param source
+	 *            the source URL of the story
+	 * @param line
+	 *            the textual content of the paragraph
+	 * 
+	 * @return the {@link Paragraph}
+	 */
+	private Paragraph makeParagraph(URL source, String line) {
+		Image image = null;
+		if (line.startsWith("[") && line.endsWith("]")) {
+			image = getImage(this, source, line.substring(1, line.length() - 1)
+					.trim());
+		}
+
+		if (image != null) {
+			return new Paragraph(image);
+		}
+
+		return processPara(line);
+	}
+
+	/**
+	 * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
+	 * those {@link Paragraph}s.
+	 * <p>
+	 * The resulting list will not contain a starting or trailing blank/break
+	 * nor 2 blanks or breaks following each other.
+	 * 
+	 * @param paras
+	 *            the list of {@link Paragraph}s to fix
+	 */
+	protected void fixBlanksBreaks(List<Paragraph> paras) {
+		boolean space = false;
+		boolean brk = true;
+		for (int i = 0; i < paras.size(); i++) {
+			Paragraph para = paras.get(i);
+			boolean thisSpace = para.getType() == ParagraphType.BLANK;
+			boolean thisBrk = para.getType() == ParagraphType.BREAK;
+
+			if (i > 0 && space && thisBrk) {
+				paras.remove(i - 1);
+				i--;
+			} else if ((space || brk) && (thisSpace || thisBrk)) {
+				paras.remove(i);
+				i--;
+			}
+
+			space = thisSpace;
+			brk = thisBrk;
+		}
+
+		// Remove blank/brk at start
+		if (paras.size() > 0
+				&& (paras.get(0).getType() == ParagraphType.BLANK || paras.get(
+						0).getType() == ParagraphType.BREAK)) {
+			paras.remove(0);
+		}
+
+		// Remove blank/brk at end
+		int last = paras.size() - 1;
+		if (paras.size() > 0
+				&& (paras.get(last).getType() == ParagraphType.BLANK || paras
+						.get(last).getType() == ParagraphType.BREAK)) {
+			paras.remove(last);
+		}
+	}
+
+	/**
+	 * Get the default cover related to this subject (see <tt>.info</tt> files).
+	 * 
+	 * @param subject
+	 *            the subject
+	 * 
+	 * @return the cover if any, or NULL
+	 */
+	static Image getDefaultCover(String subject) {
+		if (subject != null && !subject.isEmpty()
+				&& Instance.getCoverDir() != null) {
+			try {
+				File fileCover = new File(Instance.getCoverDir(), subject);
+				return getImage(null, fileCover.toURI().toURL(), subject);
+			} catch (MalformedURLException e) {
+			}
+		}
+
+		return null;
+	}
+
+	/**
+	 * Return the list of supported image extensions.
+	 * 
+	 * @param emptyAllowed
+	 *            TRUE to allow an empty extension on first place, which can be
+	 *            used when you may already have an extension in your input but
+	 *            are not sure about it
+	 * 
+	 * @return the extensions
+	 */
+	static String[] getImageExt(boolean emptyAllowed) {
+		if (emptyAllowed) {
+			return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
+		}
+
+		return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
+	}
+
+	/**
+	 * Check if the given resource can be a local image or a remote image, then
+	 * refresh the cache with it if it is.
+	 * 
+	 * @param source
+	 *            the story source
+	 * @param line
+	 *            the resource to check
+	 * 
+	 * @return the image if found, or NULL
+	 * 
+	 */
+	static Image getImage(BasicSupport_Deprecated support, URL source,
+			String line) {
+		URL url = getImageUrl(support, source, line);
+		if (url != null) {
+			if ("file".equals(url.getProtocol())) {
+				if (new File(url.getPath()).isDirectory()) {
+					return null;
+				}
+			}
+			InputStream in = null;
+			try {
+				in = Instance.getCache().open(url, getSupport(url), true);
+				return new Image(in);
+			} catch (IOException e) {
+			} finally {
+				if (in != null) {
+					try {
+						in.close();
+					} catch (IOException e) {
+					}
+				}
+			}
+		}
+
+		return null;
+	}
+
+	/**
+	 * Check if the given resource can be a local image or a remote image, then
+	 * refresh the cache with it if it is.
+	 * 
+	 * @param source
+	 *            the story source
+	 * @param line
+	 *            the resource to check
+	 * 
+	 * @return the image URL if found, or NULL
+	 * 
+	 */
+	static URL getImageUrl(BasicSupport_Deprecated support, URL source,
+			String line) {
+		URL url = null;
+
+		if (line != null) {
+			// try for files
+			if (source != null) {
+				try {
+
+					String relPath = null;
+					String absPath = null;
+					try {
+						String path = new File(source.getFile()).getParent();
+						relPath = new File(new File(path), line.trim())
+								.getAbsolutePath();
+					} catch (Exception e) {
+						// Cannot be converted to path (one possibility to take
+						// into account: absolute path on Windows)
+					}
+					try {
+						absPath = new File(line.trim()).getAbsolutePath();
+					} catch (Exception e) {
+						// Cannot be converted to path (at all)
+					}
+
+					for (String ext : getImageExt(true)) {
+						File absFile = new File(absPath + ext);
+						File relFile = new File(relPath + ext);
+						if (absPath != null && absFile.exists()
+								&& absFile.isFile()) {
+							url = absFile.toURI().toURL();
+						} else if (relPath != null && relFile.exists()
+								&& relFile.isFile()) {
+							url = relFile.toURI().toURL();
+						}
+					}
+				} catch (Exception e) {
+					// Should not happen since we control the correct arguments
+				}
+			}
+
+			if (url == null) {
+				// try for URLs
+				try {
+					for (String ext : getImageExt(true)) {
+						if (Instance.getCache()
+								.check(new URL(line + ext), true)) {
+							url = new URL(line + ext);
+							break;
+						}
+					}
+
+					// try out of cache
+					if (url == null) {
+						for (String ext : getImageExt(true)) {
+							try {
+								url = new URL(line + ext);
+								Instance.getCache().refresh(url, support, true);
+								break;
+							} catch (IOException e) {
+								// no image with this ext
+								url = null;
+							}
+						}
+					}
+				} catch (MalformedURLException e) {
+					// Not an url
+				}
+			}
+
+			// refresh the cached file
+			if (url != null) {
+				try {
+					Instance.getCache().refresh(url, support, true);
+				} catch (IOException e) {
+					// woops, broken image
+					url = null;
+				}
+			}
+		}
+
+		return url;
+	}
+
+	/**
+	 * Open the input file that will be used through the support.
+	 * <p>
+	 * Can return NULL, in which case you are supposed to work without an
+	 * {@link InputStream}.
+	 * 
+	 * @param source
+	 *            the source {@link URL}
+	 * 
+	 * @return the {@link InputStream}
+	 * 
+	 * @throws IOException
+	 *             in case of I/O error
+	 */
+	protected InputStream openInput(URL source) throws IOException {
+		return Instance.getCache().open(source, this, false);
+	}
+
+	/**
+	 * Reset then return {@link BasicSupport_Deprecated#in}.
+	 * 
+	 * @return {@link BasicSupport_Deprecated#in}
+	 */
+	protected InputStream getInput() {
+		return reset(in);
+	}
+
+	/**
+	 * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
+	 * and requotify them (i.e., separate them into QUOTE paragraphs and other
+	 * paragraphs (quotes or not)).
+	 * 
+	 * @param para
+	 *            the paragraph to requotify (not necessarily a quote)
+	 * 
+	 * @return the correctly (or so we hope) quotified paragraphs
+	 */
+	protected List<Paragraph> requotify(Paragraph para) {
+		List<Paragraph> newParas = new ArrayList<Paragraph>();
+
+		if (para.getType() == ParagraphType.QUOTE
+				&& para.getContent().length() > 2) {
+			String line = para.getContent();
+			boolean singleQ = line.startsWith("" + openQuote);
+			boolean doubleQ = line.startsWith("" + openDoubleQuote);
+
+			// Do not try when more than one quote at a time
+			// (some stories are not easily readable if we do)
+			if (singleQ
+					&& line.indexOf(closeQuote, 1) < line
+							.lastIndexOf(closeQuote)) {
+				newParas.add(para);
+				return newParas;
+			}
+			if (doubleQ
+					&& line.indexOf(closeDoubleQuote, 1) < line
+							.lastIndexOf(closeDoubleQuote)) {
+				newParas.add(para);
+				return newParas;
+			}
+			//
+
+			if (!singleQ && !doubleQ) {
+				line = openDoubleQuote + line + closeDoubleQuote;
+				newParas.add(new Paragraph(ParagraphType.QUOTE, line, para
+						.getWords()));
+			} else {
+				char open = singleQ ? openQuote : openDoubleQuote;
+				char close = singleQ ? closeQuote : closeDoubleQuote;
+
+				int posDot = -1;
+				boolean inQuote = false;
+				int i = 0;
+				for (char car : line.toCharArray()) {
+					if (car == open) {
+						inQuote = true;
+					} else if (car == close) {
+						inQuote = false;
+					} else if (car == '.' && !inQuote) {
+						posDot = i;
+						break;
+					}
+					i++;
+				}
+
+				if (posDot >= 0) {
+					String rest = line.substring(posDot + 1).trim();
+					line = line.substring(0, posDot + 1).trim();
+					long words = 1;
+					for (char car : line.toCharArray()) {
+						if (car == ' ') {
+							words++;
+						}
+					}
+					newParas.add(new Paragraph(ParagraphType.QUOTE, line, words));
+					if (!rest.isEmpty()) {
+						newParas.addAll(requotify(processPara(rest)));
+					}
+				} else {
+					newParas.add(para);
+				}
+			}
+		} else {
+			newParas.add(para);
+		}
+
+		return newParas;
+	}
+
+	/**
+	 * Process a {@link Paragraph} from a raw line of text.
+	 * <p>
+	 * Will also fix quotes and HTML encoding if needed.
+	 * 
+	 * @param line
+	 *            the raw line
+	 * 
+	 * @return the processed {@link Paragraph}
+	 */
+	protected Paragraph processPara(String line) {
+		line = ifUnhtml(line).trim();
+
+		boolean space = true;
+		boolean brk = true;
+		boolean quote = false;
+		boolean tentativeCloseQuote = false;
+		char prev = '\0';
+		int dashCount = 0;
+		long words = 1;
+
+		StringBuilder builder = new StringBuilder();
+		for (char car : line.toCharArray()) {
+			if (car != '-') {
+				if (dashCount > 0) {
+					// dash, ndash and mdash: - â â
+					// currently: always use mdash
+					builder.append(dashCount == 1 ? '-' : 'â');
+				}
+				dashCount = 0;
+			}
+
+			if (tentativeCloseQuote) {
+				tentativeCloseQuote = false;
+				if (Character.isLetterOrDigit(car)) {
+					builder.append("'");
+				} else {
+					// handle double-single quotes as double quotes
+					if (prev == car) {
+						builder.append(closeDoubleQuote);
+						continue;
+					}
+
+					builder.append(closeQuote);
+				}
+			}
+
+			switch (car) {
+			case 'Â ': // note: unbreakable space
+			case ' ':
+			case '\t':
+			case '\n': // just in case
+			case '\r': // just in case
+				if (builder.length() > 0
+						&& builder.charAt(builder.length() - 1) != ' ') {
+					words++;
+				}
+				builder.append(' ');
+				break;
+
+			case '\'':
+				if (space || (brk && quote)) {
+					quote = true;
+					// handle double-single quotes as double quotes
+					if (prev == car) {
+						builder.deleteCharAt(builder.length() - 1);
+						builder.append(openDoubleQuote);
+					} else {
+						builder.append(openQuote);
+					}
+				} else if (prev == ' ' || prev == car) {
+					// handle double-single quotes as double quotes
+					if (prev == car) {
+						builder.deleteCharAt(builder.length() - 1);
+						builder.append(openDoubleQuote);
+					} else {
+						builder.append(openQuote);
+					}
+				} else {
+					// it is a quote ("I'm off") or a 'quote' ("This
+					// 'good' restaurant"...)
+					tentativeCloseQuote = true;
+				}
+				break;
+
+			case '"':
+				if (space || (brk && quote)) {
+					quote = true;
+					builder.append(openDoubleQuote);
+				} else if (prev == ' ') {
+					builder.append(openDoubleQuote);
+				} else {
+					builder.append(closeDoubleQuote);
+				}
+				break;
+
+			case '-':
+				if (space) {
+					quote = true;
+				} else {
+					dashCount++;
+				}
+				space = false;
+				break;
+
+			case '*':
+			case '~':
+			case '/':
+			case '\\':
+			case '<':
+			case '>':
+			case '=':
+			case '+':
+			case '_':
+			case 'â':
+			case 'â':
+				space = false;
+				builder.append(car);
+				break;
+
+			case 'â':
+			case '`':
+			case 'â¹':
+			case 'ï¹':
+			case 'ã':
+			case 'ã':
+				if (space || (brk && quote)) {
+					quote = true;
+					builder.append(openQuote);
+				} else {
+					// handle double-single quotes as double quotes
+					if (prev == car) {
+						builder.deleteCharAt(builder.length() - 1);
+						builder.append(openDoubleQuote);
+					} else {
+						builder.append(openQuote);
+					}
+				}
+				space = false;
+				brk = false;
+				break;
+
+			case 'â':
+			case 'âº':
+			case 'ï¹':
+			case 'ã':
+			case 'ã':
+				space = false;
+				brk = false;
+				// handle double-single quotes as double quotes
+				if (prev == car) {
+					builder.deleteCharAt(builder.length() - 1);
+					builder.append(closeDoubleQuote);
+				} else {
+					builder.append(closeQuote);
+				}
+				break;
+
+			case 'Â«':
+			case 'â':
+			case 'ï¹':
+			case 'ã':
+			case 'ã':
+				if (space || (brk && quote)) {
+					quote = true;
+					builder.append(openDoubleQuote);
+				} else {
+					builder.append(openDoubleQuote);
+				}
+				space = false;
+				brk = false;
+				break;
+
+			case 'Â»':
+			case 'â':
+			case 'ï¹':
+			case 'ã':
+			case 'ã':
+				space = false;
+				brk = false;
+				builder.append(closeDoubleQuote);
+				break;
+
+			default:
+				space = false;
+				brk = false;
+				builder.append(car);
+				break;
+			}
+
+			prev = car;
+		}
+
+		if (tentativeCloseQuote) {
+			tentativeCloseQuote = false;
+			builder.append(closeQuote);
+		}
+
+		line = builder.toString().trim();
+
+		ParagraphType type = ParagraphType.NORMAL;
+		if (space) {
+			type = ParagraphType.BLANK;
+		} else if (brk) {
+			type = ParagraphType.BREAK;
+		} else if (quote) {
+			type = ParagraphType.QUOTE;
+		}
+
+		return new Paragraph(type, line, words);
+	}
+
+	/**
+	 * Remove the HTML from the input <b>if</b>
+	 * {@link BasicSupport_Deprecated#isHtml()} is true.
+	 * 
+	 * @param input
+	 *            the input
+	 * 
+	 * @return the no html version if needed
+	 */
+	private String ifUnhtml(String input) {
+		if (isHtml() && input != null) {
+			return StringUtils.unhtml(input);
+		}
+
+		return input;
+	}
+
+	/**
+	 * Reset the given {@link InputStream} and return it.
+	 * 
+	 * @param in
+	 *            the {@link InputStream} to reset
+	 * 
+	 * @return the same {@link InputStream} after reset
+	 */
+	static protected InputStream reset(InputStream in) {
+		try {
+			if (in != null) {
+				in.reset();
+			}
+		} catch (IOException e) {
+		}
+
+		return in;
+	}
+
+	/**
+	 * Return the first line from the given input which correspond to the given
+	 * selectors.
+	 * 
+	 * @param in
+	 *            the input
+	 * @param needle
+	 *            a string that must be found inside the target line (also
+	 *            supports "^" at start to say "only if it starts with" the
+	 *            needle)
+	 * @param relativeLine
+	 *            the line to return based upon the target line position (-1 =
+	 *            the line before, 0 = the target line...)
+	 * 
+	 * @return the line
+	 */
+	static protected String getLine(InputStream in, String needle,
+			int relativeLine) {
+		return getLine(in, needle, relativeLine, true);
+	}
+
+	/**
+	 * Return a line from the given input which correspond to the given
+	 * selectors.
+	 * 
+	 * @param in
+	 *            the input
+	 * @param needle
+	 *            a string that must be found inside the target line (also
+	 *            supports "^" at start to say "only if it starts with" the
+	 *            needle)
+	 * @param relativeLine
+	 *            the line to return based upon the target line position (-1 =
+	 *            the line before, 0 = the target line...)
+	 * @param first
+	 *            takes the first result (as opposed to the last one, which will
+	 *            also always spend the input)
+	 * 
+	 * @return the line
+	 */
+	static protected String getLine(InputStream in, String needle,
+			int relativeLine, boolean first) {
+		String rep = null;
+
+		reset(in);
+
+		List<String> lines = new ArrayList<String>();
+		@SuppressWarnings("resource")
+		Scanner scan = new Scanner(in, "UTF-8");
+		int index = -1;
+		scan.useDelimiter("\\n");
+		while (scan.hasNext()) {
+			lines.add(scan.next());
+
+			if (index == -1) {
+				if (needle.startsWith("^")) {
+					if (lines.get(lines.size() - 1).startsWith(
+							needle.substring(1))) {
+						index = lines.size() - 1;
+					}
+
+				} else {
+					if (lines.get(lines.size() - 1).contains(needle)) {
+						index = lines.size() - 1;
+					}
+				}
+			}
+
+			if (index >= 0 && index + relativeLine < lines.size()) {
+				rep = lines.get(index + relativeLine);
+				if (first) {
+					break;
+				}
+			}
+		}
+
+		return rep;
+	}
+
+	/**
+	 * Return the text between the key and the endKey (and optional subKey can
+	 * be passed, in this case we will look for the key first, then take the
+	 * text between the subKey and the endKey).
+	 * <p>
+	 * Will only match the first line with the given key if more than one are
+	 * possible. Which also means that if the subKey or endKey is not found on
+	 * that line, NULL will be returned.
+	 * 
+	 * @param in
+	 *            the input
+	 * @param key
+	 *            the key to match (also supports "^" at start to say
+	 *            "only if it starts with" the key)
+	 * @param subKey
+	 *            the sub key or NULL if none
+	 * @param endKey
+	 *            the end key or NULL for "up to the end"
+	 * @return the text or NULL if not found
+	 */
+	static protected String getKeyLine(InputStream in, String key,
+			String subKey, String endKey) {
+		return getKeyText(getLine(in, key, 0), key, subKey, endKey);
+	}
+
+	/**
+	 * Return the text between the key and the endKey (and optional subKey can
+	 * be passed, in this case we will look for the key first, then take the
+	 * text between the subKey and the endKey).
+	 * 
+	 * @param in
+	 *            the input
+	 * @param key
+	 *            the key to match (also supports "^" at start to say
+	 *            "only if it starts with" the key)
+	 * @param subKey
+	 *            the sub key or NULL if none
+	 * @param endKey
+	 *            the end key or NULL for "up to the end"
+	 * @return the text or NULL if not found
+	 */
+	static protected String getKeyText(String in, String key, String subKey,
+			String endKey) {
+		String result = null;
+
+		String line = in;
+		if (line != null && line.contains(key)) {
+			line = line.substring(line.indexOf(key) + key.length());
+			if (subKey == null || subKey.isEmpty() || line.contains(subKey)) {
+				if (subKey != null) {
+					line = line.substring(line.indexOf(subKey)
+							+ subKey.length());
+				}
+				if (endKey == null || line.contains(endKey)) {
+					if (endKey != null) {
+						line = line.substring(0, line.indexOf(endKey));
+						result = line;
+					}
+				}
+			}
+		}
+
+		return result;
+	}
+
+	/**
+	 * Return the text between the key and the endKey (optional subKeys can be
+	 * passed, in this case we will look for the subKeys first, then take the
+	 * text between the key and the endKey).
+	 * 
+	 * @param in
+	 *            the input
+	 * @param key
+	 *            the key to match
+	 * @param endKey
+	 *            the end key or NULL for "up to the end"
+	 * @param afters
+	 *            the sub-keys to find before checking for key/endKey
+	 * 
+	 * @return the text or NULL if not found
+	 */
+	static protected String getKeyTextAfter(String in, String key,
+			String endKey, String... afters) {
+
+		if (in != null && !in.isEmpty()) {
+			int pos = indexOfAfter(in, 0, afters);
+			if (pos < 0) {
+				return null;
+			}
+
+			in = in.substring(pos);
+		}
+
+		return getKeyText(in, key, null, endKey);
+	}
+
+	/**
+	 * Return the first index after all the given "afters" have been found in
+	 * the {@link String}, or -1 if it was not possible.
+	 * 
+	 * @param in
+	 *            the input
+	 * @param startAt
+	 *            start at this position in the string
+	 * @param afters
+	 *            the sub-keys to find before checking for key/endKey
+	 * 
+	 * @return the text or NULL if not found
+	 */
+	static protected int indexOfAfter(String in, int startAt, String... afters) {
+		int pos = -1;
+		if (in != null && !in.isEmpty()) {
+			pos = startAt;
+			if (afters != null) {
+				for (int i = 0; pos >= 0 && i < afters.length; i++) {
+					String subKey = afters[i];
+					if (!subKey.isEmpty()) {
+						pos = in.indexOf(subKey, pos);
+						if (pos >= 0) {
+							pos += subKey.length();
+						}
+					}
+				}
+			}
+		}
+
+		return pos;
+	}
+}