X-Git-Url: http://git.nikiroo.be/?a=blobdiff_plain;f=src%2Fbe%2Fnikiroo%2Ffanfix%2Fsupported%2FBasicSupport.java;h=0ee12fec463cef8b1c6dd31093ad2401c9ef4587;hb=e8eeea0a321493d270c35f594a8bf392cc95f4df;hp=74f11156e82c265a0eee1ddbd09b7c417a98ed12;hpb=08fe2e33007063e30fe22dc1d290f8afaa18eb1d;p=nikiroo-utils.git

diff --git a/src/be/nikiroo/fanfix/supported/BasicSupport.java b/src/be/nikiroo/fanfix/supported/BasicSupport.java
index 74f1115..0ee12fe 100644
--- a/src/be/nikiroo/fanfix/supported/BasicSupport.java
+++ b/src/be/nikiroo/fanfix/supported/BasicSupport.java
@@ -1,13 +1,16 @@
 package be.nikiroo.fanfix.supported;
 
+import java.awt.image.BufferedImage;
+import java.io.BufferedReader;
 import java.io.ByteArrayInputStream;
 import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.InputStreamReader;
 import java.net.MalformedURLException;
 import java.net.URL;
-import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
+import java.util.Date;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -20,8 +23,10 @@ import be.nikiroo.fanfix.bundles.StringId;
 import be.nikiroo.fanfix.data.Chapter;
 import be.nikiroo.fanfix.data.MetaData;
 import be.nikiroo.fanfix.data.Paragraph;
-import be.nikiroo.fanfix.data.Story;
 import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
+import be.nikiroo.fanfix.data.Story;
+import be.nikiroo.utils.IOUtils;
+import be.nikiroo.utils.Progress;
 import be.nikiroo.utils.StringUtils;
 
 /**
@@ -55,8 +60,12 @@ public abstract class BasicSupport {
 		MANGAFOX,
 		/** Furry website with comics support */
 		E621,
+		/** Furry website with stories */
+		YIFFSTAR,
 		/** CBZ files */
-		CBZ;
+		CBZ,
+		/** HTML files */
+		HTML;
 
 		/**
 		 * A description of this support type (more information than the
@@ -142,19 +151,18 @@ public abstract class BasicSupport {
 		}
 	}
 
-	/** Only used by {@link BasicSupport#getInput()} just so it is always reset. */
 	private InputStream in;
 	private SupportType type;
-	private URL currentReferer; // with on 'r', as in 'HTTP'...
+	private URL currentReferer; // with only one 'r', as in 'HTTP'...
 
 	// quote chars
-	private char openQuote = Instance.getTrans().getChar(
+	private char openQuote = Instance.getTrans().getCharacter(
 			StringId.OPEN_SINGLE_QUOTE);
-	private char closeQuote = Instance.getTrans().getChar(
+	private char closeQuote = Instance.getTrans().getCharacter(
 			StringId.CLOSE_SINGLE_QUOTE);
-	private char openDoubleQuote = Instance.getTrans().getChar(
+	private char openDoubleQuote = Instance.getTrans().getCharacter(
 			StringId.OPEN_DOUBLE_QUOTE);
-	private char closeDoubleQuote = Instance.getTrans().getChar(
+	private char closeDoubleQuote = Instance.getTrans().getCharacter(
 			StringId.CLOSE_DOUBLE_QUOTE);
 
 	/**
@@ -182,70 +190,7 @@ public abstract class BasicSupport {
 	 */
 	protected abstract boolean isHtml();
 
-	/**
-	 * Return the story title.
-	 * 
-	 * @param source
-	 *            the source of the story
-	 * @param in
-	 *            the input (the main resource)
-	 * 
-	 * @return the title
-	 * 
-	 * @throws IOException
-	 *             in case of I/O error
-	 */
-	protected abstract String getTitle(URL source, InputStream in)
-			throws IOException;
-
-	/**
-	 * Return the story author.
-	 * 
-	 * @param source
-	 *            the source of the story
-	 * @param in
-	 *            the input (the main resource)
-	 * 
-	 * @return the author
-	 * 
-	 * @throws IOException
-	 *             in case of I/O error
-	 */
-	protected abstract String getAuthor(URL source, InputStream in)
-			throws IOException;
-
-	/**
-	 * Return the story publication date.
-	 * 
-	 * @param source
-	 *            the source of the story
-	 * @param in
-	 *            the input (the main resource)
-	 * 
-	 * @return the date
-	 * 
-	 * @throws IOException
-	 *             in case of I/O error
-	 */
-	protected abstract String getDate(URL source, InputStream in)
-			throws IOException;
-
-	/**
-	 * Return the subject of the story (for instance, if it is a fanfiction,
-	 * what is the original work; if it is a technical text, what is the
-	 * technical subject...).
-	 * 
-	 * @param source
-	 *            the source of the story
-	 * @param in
-	 *            the input (the main resource)
-	 * 
-	 * @return the subject
-	 * 
-	 * @throws IOException
-	 *             in case of I/O error
-	 */
-	protected abstract String getSubject(URL source, InputStream in)
+	protected abstract MetaData getMeta(URL source, InputStream in)
 			throws IOException;
 
 	/**
@@ -264,24 +209,6 @@ public abstract class BasicSupport {
 	protected abstract String getDesc(URL source, InputStream in)
 			throws IOException;
 
-	/**
-	 * Return the story cover resource if any, or NULL if none.
-	 * <p>
-	 * The default cover should not be checked for here.
-	 * 
-	 * @param source
-	 *            the source of the story
-	 * @param in
-	 *            the input (the main resource)
-	 * 
-	 * @return the cover or NULL
-	 * 
-	 * @throws IOException
-	 *             in case of I/O error
-	 */
-	protected abstract URL getCover(URL source, InputStream in)
-			throws IOException;
-
 	/**
 	 * Return the list of chapters (name and resource).
 	 * 
@@ -289,6 +216,8 @@ public abstract class BasicSupport {
 	 *            the source of the story
 	 * @param in
 	 *            the input (the main resource)
+	 * @param pg
+	 *            the optional progress reporter
 	 * 
 	 * @return the chapters
 	 * 
@@ -296,7 +225,7 @@ public abstract class BasicSupport {
 	 *             in case of I/O error
 	 */
 	protected abstract List<Entry<String, URL>> getChapters(URL source,
-			InputStream in) throws IOException;
+			InputStream in, Progress pg) throws IOException;
 
 	/**
 	 * Return the content of the chapter (possibly HTML encoded, if
@@ -308,6 +237,8 @@ public abstract class BasicSupport {
 	 *            the input (the main resource)
 	 * @param number
 	 *            the chapter number
+	 * @param pg
+	 *            the optional progress reporter
 	 * 
 	 * @return the content
 	 * 
@@ -315,16 +246,16 @@ public abstract class BasicSupport {
 	 *             in case of I/O error
 	 */
 	protected abstract String getChapterContent(URL source, InputStream in,
-			int number) throws IOException;
+			int number, Progress pg) throws IOException;
 
 	/**
-	 * Check if this {@link BasicSupport} is mainly catered to image files.
+	 * Log into the support (can be a no-op depending upon the support).
 	 * 
-	 * @return TRUE if it is
+	 * @throws IOException
+	 *             in case of I/O error
 	 */
-	public boolean isImageDocument(URL source, InputStream in)
-			throws IOException {
-		return false;
+	public void login() throws IOException {
+
 	}
 
 	/**
@@ -335,11 +266,29 @@ public abstract class BasicSupport {
 	 * it.
 	 * 
 	 * @return the cookies
+	 * 
+	 * @throws IOException
+	 *             in case of I/O error
 	 */
-	public Map<String, String> getCookies() {
+	public Map<String, String> getCookies() throws IOException {
 		return new HashMap<String, String>();
 	}
 
+	/**
+	 * Return the canonical form of the main {@link URL}.
+	 * 
+	 * @param source
+	 *            the source {@link URL}
+	 * 
+	 * @return the canonical form of this {@link URL}
+	 * 
+	 * @throws IOException
+	 *             in case of I/O error
+	 */
+	public URL getCanonicalUrl(URL source) throws IOException {
+		return source;
+	}
+
 	/**
 	 * Process the given story resource into a partially filled {@link Story}
 	 * object containing the name and metadata, except for the description.
@@ -353,7 +302,7 @@ public abstract class BasicSupport {
 	 *             in case of I/O error
 	 */
 	public Story processMeta(URL url) throws IOException {
-		return processMeta(url, true, false);
+		return processMeta(url, true, false, null);
 	}
 
 	/**
@@ -365,46 +314,63 @@ public abstract class BasicSupport {
 	 * 
 	 * @param close
 	 *            close "this" and "in" when done
+	 * @param pg
+	 *            the optional progress reporter
 	 * 
 	 * @return the {@link Story}
 	 * 
 	 * @throws IOException
 	 *             in case of I/O error
 	 */
-	protected Story processMeta(URL url, boolean close, boolean getDesc)
-			throws IOException {
-		in = Instance.getCache().open(url, this, false);
+	protected Story processMeta(URL url, boolean close, boolean getDesc,
+			Progress pg) throws IOException {
+		if (pg == null) {
+			pg = new Progress();
+		} else {
+			pg.setMinMax(0, 100);
+		}
+
+		login();
+		pg.setProgress(10);
+
+		url = getCanonicalUrl(url);
+
+		setCurrentReferer(url);
+
+		in = openInput(url);
 		if (in == null) {
 			return null;
 		}
 
 		try {
-			preprocess(getInput());
+			preprocess(url, getInput());
+			pg.setProgress(30);
 
 			Story story = new Story();
-			story.setMeta(new MetaData());
-			story.getMeta().setTitle(ifUnhtml(getTitle(url, getInput())));
-			story.getMeta().setAuthor(
-					fixAuthor(ifUnhtml(getAuthor(url, getInput()))));
-			story.getMeta().setDate(ifUnhtml(getDate(url, getInput())));
-			story.getMeta().setTags(getTags(url, getInput()));
-			story.getMeta().setSource(getSourceName());
-			story.getMeta().setPublisher(
-					ifUnhtml(getPublisher(url, getInput())));
-			story.getMeta().setUuid(getUuid(url, getInput()));
-			story.getMeta().setLuid(getLuid(url, getInput()));
-			story.getMeta().setLang(getLang(url, getInput()));
-			story.getMeta().setSubject(ifUnhtml(getSubject(url, getInput())));
-			story.getMeta().setImageDocument(isImageDocument(url, getInput()));
+			MetaData meta = getMeta(url, getInput());
+			if (meta.getCreationDate() == null
+					|| meta.getCreationDate().isEmpty()) {
+				meta.setCreationDate(StringUtils.fromTime(new Date().getTime()));
+			}
+			story.setMeta(meta);
+
+			pg.setProgress(50);
+
+			if (meta != null && meta.getCover() == null) {
+				meta.setCover(getDefaultCover(meta.getSubject()));
+			}
+
+			pg.setProgress(60);
 
 			if (getDesc) {
 				String descChapterName = Instance.getTrans().getString(
 						StringId.DESCRIPTION);
 				story.getMeta().setResume(
 						makeChapter(url, 0, descChapterName,
-								getDesc(url, getInput())));
+								getDesc(url, getInput()), null));
 			}
 
+			pg.setProgress(100);
 			return story;
 		} finally {
 			if (close) {
@@ -418,6 +384,8 @@ public abstract class BasicSupport {
 					in.close();
 				}
 			}
+
+			setCurrentReferer(null);
 		}
 	}
 
@@ -427,64 +395,96 @@ public abstract class BasicSupport {
 	 * 
 	 * @param url
 	 *            the story resource
+	 * @param pg
+	 *            the optional progress reporter
 	 * 
 	 * @return the {@link Story}
 	 * 
 	 * @throws IOException
 	 *             in case of I/O error
 	 */
-	public Story process(URL url) throws IOException {
-		setCurrentReferer(url);
+	public Story process(URL url, Progress pg) throws IOException {
+		if (pg == null) {
+			pg = new Progress();
+		} else {
+			pg.setMinMax(0, 100);
+		}
 
+		url = getCanonicalUrl(url);
+		pg.setProgress(1);
 		try {
-			Story story = processMeta(url, false, true);
+			Progress pgMeta = new Progress();
+			pg.addProgress(pgMeta, 10);
+			Story story = processMeta(url, false, true, pgMeta);
+			if (!pgMeta.isDone()) {
+				pgMeta.setProgress(pgMeta.getMax()); // 10%
+			}
+
 			if (story == null) {
+				pg.setProgress(90);
 				return null;
 			}
 
-			story.setChapters(new ArrayList<Chapter>());
+			pg.setName("Retrieving " + story.getMeta().getTitle());
 
-			URL cover = getCover(url, getInput());
-			if (cover == null) {
-				String subject = story.getMeta() == null ? null : story
-						.getMeta().getSubject();
-				if (subject != null && !subject.isEmpty()
-						&& Instance.getCoverDir() != null) {
-					File fileCover = new File(Instance.getCoverDir(), subject);
-					cover = getImage(fileCover.toURI().toURL(), subject);
-				}
-			}
+			setCurrentReferer(url);
 
-			if (cover != null) {
-				InputStream coverIn = null;
-				try {
-					coverIn = Instance.getCache().open(cover, this, true);
-					story.getMeta().setCover(StringUtils.toImage(coverIn));
-				} catch (IOException e) {
-					Instance.syserr(new IOException(Instance.getTrans()
-							.getString(StringId.ERR_BS_NO_COVER, cover), e));
-				} finally {
-					if (coverIn != null)
-						coverIn.close();
-				}
+			Progress pgGetChapters = new Progress();
+			pg.addProgress(pgGetChapters, 10);
+			story.setChapters(new ArrayList<Chapter>());
+			List<Entry<String, URL>> chapters = getChapters(url, getInput(),
+					pgGetChapters);
+			if (!pgGetChapters.isDone()) {
+				pgGetChapters.setProgress(pgGetChapters.getMax()); // 20%
 			}
 
-			List<Entry<String, URL>> chapters = getChapters(url, getInput());
-			int i = 1;
 			if (chapters != null) {
+				Progress pgChaps = new Progress("Extracting chapters", 0,
+						chapters.size() * 300);
+				pg.addProgress(pgChaps, 80);
+
+				long words = 0;
+				int i = 1;
 				for (Entry<String, URL> chap : chapters) {
+					pgChaps.setName("Extracting chapter " + i);
 					setCurrentReferer(chap.getValue());
 					InputStream chapIn = Instance.getCache().open(
 							chap.getValue(), this, true);
+					pgChaps.setProgress(i * 100);
 					try {
-						story.getChapters().add(
-								makeChapter(url, i, chap.getKey(),
-										getChapterContent(url, chapIn, i)));
+						Progress pgGetChapterContent = new Progress();
+						Progress pgMakeChapter = new Progress();
+						pgChaps.addProgress(pgGetChapterContent, 100);
+						pgChaps.addProgress(pgMakeChapter, 100);
+
+						String content = getChapterContent(url, chapIn, i,
+								pgGetChapterContent);
+						if (!pgGetChapterContent.isDone()) {
+							pgGetChapterContent.setProgress(pgGetChapterContent
+									.getMax());
+						}
+
+						Chapter cc = makeChapter(url, i, chap.getKey(),
+								content, pgMakeChapter);
+						if (!pgMakeChapter.isDone()) {
+							pgMakeChapter.setProgress(pgMakeChapter.getMax());
+						}
+
+						words += cc.getWords();
+						story.getChapters().add(cc);
+						if (story.getMeta() != null) {
+							story.getMeta().setWords(words);
+						}
 					} finally {
 						chapIn.close();
 					}
+
 					i++;
 				}
+
+				pgChaps.setName("Extracting chapters");
+			} else {
+				pg.setProgress(80);
 			}
 
 			return story;
@@ -500,12 +500,12 @@ public abstract class BasicSupport {
 				in.close();
 			}
 
-			currentReferer = null;
+			setCurrentReferer(null);
 		}
 	}
 
 	/**
-	 * The support type.$
+	 * The support type.
 	 * 
 	 * @return the type
 	 */
@@ -548,177 +548,17 @@ public abstract class BasicSupport {
 	}
 
 	/**
-	 * Return the story publisher (by default,
-	 * {@link BasicSupport#getSourceName()}).
-	 * 
-	 * @param source
-	 *            the source of the story
-	 * @param in
-	 *            the input (the main resource)
-	 * 
-	 * @return the publisher
-	 * 
-	 * @throws IOException
-	 *             in case of I/O error
-	 */
-	protected String getPublisher(URL source, InputStream in)
-			throws IOException {
-		return getSourceName();
-	}
-
-	/**
-	 * Return the story UUID, a unique value representing the story (it is often
-	 * an URL).
-	 * <p>
-	 * By default, this is the {@link URL} of the resource.
-	 * 
-	 * @param source
-	 *            the source of the story
-	 * @param in
-	 *            the input (the main resource)
-	 * 
-	 * @return the uuid
-	 * 
-	 * @throws IOException
-	 *             in case of I/O error
-	 */
-	protected String getUuid(URL source, InputStream in) throws IOException {
-		return source.toString();
-	}
-
-	/**
-	 * Return the story Library UID, a unique value representing the story (it
-	 * is often a number) in the local library.
-	 * <p>
-	 * By default, this is empty.
-	 * 
-	 * @param source
-	 *            the source of the story
-	 * @param in
-	 *            the input (the main resource)
-	 * 
-	 * @return the id
-	 * 
-	 * @throws IOException
-	 *             in case of I/O error
-	 */
-	protected String getLuid(URL source, InputStream in) throws IOException {
-		return "";
-	}
-
-	/**
-	 * Return the 2-letter language code of this story.
-	 * <p>
-	 * By default, this is 'EN'.
-	 * 
-	 * @param source
-	 *            the source of the story
-	 * @param in
-	 *            the input (the main resource)
-	 * 
-	 * @return the language
-	 * 
-	 * @throws IOException
-	 *             in case of I/O error
-	 */
-	protected String getLang(URL source, InputStream in) throws IOException {
-		return "EN";
-	}
-
-	/**
-	 * Return the list of tags for this story.
+	 * Prepare the support if needed before processing.
 	 * 
 	 * @param source
 	 *            the source of the story
 	 * @param in
 	 *            the input (the main resource)
 	 * 
-	 * @return the tags
-	 * 
-	 * @throws IOException
-	 *             in case of I/O error
-	 */
-	protected List<String> getTags(URL source, InputStream in)
-			throws IOException {
-		return new ArrayList<String>();
-	}
-
-	/**
-	 * Return the first line from the given input which correspond to the given
-	 * selectors.
-	 * <p>
-	 * Do not reset the input, which will be pointing at the line just after the
-	 * result (input will be spent if no result is found).
-	 * 
-	 * @param in
-	 *            the input
-	 * @param needle
-	 *            a string that must be found inside the target line
-	 * @param relativeLine
-	 *            the line to return based upon the target line position (-1 =
-	 *            the line before, 0 = the target line...)
-	 * 
-	 * @return the line
-	 */
-	protected String getLine(InputStream in, String needle, int relativeLine) {
-		return getLine(in, needle, relativeLine, true);
-	}
-
-	/**
-	 * Return a line from the given input which correspond to the given
-	 * selectors.
-	 * <p>
-	 * Do not reset the input, which will be pointing at the line just after the
-	 * result (input will be spent if no result is found) when first is TRUE,
-	 * and will always be spent if first is FALSE.
-	 * 
-	 * @param in
-	 *            the input
-	 * @param needle
-	 *            a string that must be found inside the target line
-	 * @param relativeLine
-	 *            the line to return based upon the target line position (-1 =
-	 *            the line before, 0 = the target line...)
-	 * @param first
-	 *            takes the first result (as opposed to the last one, which will
-	 *            also always spend the input)
-	 * 
-	 * @return the line
-	 */
-	protected String getLine(InputStream in, String needle, int relativeLine,
-			boolean first) {
-		String rep = null;
-
-		List<String> lines = new ArrayList<String>();
-		@SuppressWarnings("resource")
-		Scanner scan = new Scanner(in, "UTF-8");
-		int index = -1;
-		scan.useDelimiter("\\n");
-		while (scan.hasNext()) {
-			lines.add(scan.next());
-
-			if (index == -1 && lines.get(lines.size() - 1).contains(needle)) {
-				index = lines.size() - 1;
-			}
-
-			if (index >= 0 && index + relativeLine < lines.size()) {
-				rep = lines.get(index + relativeLine);
-				if (first) {
-					break;
-				}
-			}
-		}
-
-		return rep;
-	}
-
-	/**
-	 * Prepare the support if needed before processing.
-	 * 
 	 * @throws IOException
 	 *             on I/O error
 	 */
-	protected void preprocess(InputStream in) throws IOException {
+	protected void preprocess(URL source, InputStream in) throws IOException {
 	}
 
 	/**
@@ -740,6 +580,8 @@ public abstract class BasicSupport {
 	 *            the chapter name
 	 * @param content
 	 *            the chapter content
+	 * @param pg
+	 *            the optional progress reporter
 	 * 
 	 * @return the {@link Chapter}
 	 * 
@@ -747,8 +589,7 @@ public abstract class BasicSupport {
 	 *             in case of I/O error
 	 */
 	protected Chapter makeChapter(URL source, int number, String name,
-			String content) throws IOException {
-
+			String content, Progress pg) throws IOException {
 		// Chapter name: process it correctly, then remove the possible
 		// redundant "Chapter x: " in front of it
 		String chapterName = processPara(name).getContent().trim();
@@ -775,39 +616,89 @@ public abstract class BasicSupport {
 
 		Chapter chap = new Chapter(number, chapterName);
 
-		if (content == null) {
-			return chap;
+		if (content != null) {
+			List<Paragraph> paras = makeParagraphs(source, content, pg);
+			long words = 0;
+			for (Paragraph para : paras) {
+				words += para.getWords();
+			}
+			chap.setParagraphs(paras);
+			chap.setWords(words);
+		}
+
+		return chap;
+
+	}
+
+	/**
+	 * Convert the given content into {@link Paragraph}s.
+	 * 
+	 * @param source
+	 *            the source URL of the story
+	 * @param content
+	 *            the textual content
+	 * @param pg
+	 *            the optional progress reporter
+	 * 
+	 * @return the {@link Paragraph}s
+	 * 
+	 * @throws IOException
+	 *             in case of I/O error
+	 */
+	protected List<Paragraph> makeParagraphs(URL source, String content,
+			Progress pg) throws IOException {
+		if (pg == null) {
+			pg = new Progress();
 		}
 
 		if (isHtml()) {
 			// Special <HR> processing:
 			content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
-					"\n* * *\n");
+					"<br/>* * *<br/>");
 		}
 
-		InputStream in = new ByteArrayInputStream(
-				content.getBytes(StandardCharsets.UTF_8));
-		try {
-			@SuppressWarnings("resource")
-			Scanner scan = new Scanner(in, "UTF-8");
-			scan.useDelimiter("(\\n|</p>)"); // \n for test, </p> for html
-
-			List<Paragraph> paras = new ArrayList<Paragraph>();
-			while (scan.hasNext()) {
-				String line = scan.next().trim();
-				boolean image = false;
-				if (line.startsWith("[") && line.endsWith("]")) {
-					URL url = getImage(source,
-							line.substring(1, line.length() - 1).trim());
-					if (url != null) {
-						paras.add(new Paragraph(url));
-						image = true;
+		List<Paragraph> paras = new ArrayList<Paragraph>();
+
+		if (content != null && !content.trim().isEmpty()) {
+			if (isHtml()) {
+				String[] tab = content.split("(<p>|</p>|<br>|<br/>)");
+				pg.setMinMax(0, tab.length);
+				int i = 1;
+				for (String line : tab) {
+					if (line.startsWith("[") && line.endsWith("]")) {
+						pg.setName("Extracting image " + i);
+					}
+					paras.add(makeParagraph(source, line.trim()));
+					pg.setProgress(i++);
+				}
+				pg.setName(null);
+			} else {
+				List<String> lines = new ArrayList<String>();
+				BufferedReader buff = null;
+				try {
+					buff = new BufferedReader(
+							new InputStreamReader(new ByteArrayInputStream(
+									content.getBytes("UTF-8")), "UTF-8"));
+					for (String line = buff.readLine(); line != null; line = buff
+							.readLine()) {
+						lines.add(line.trim());
+					}
+				} finally {
+					if (buff != null) {
+						buff.close();
 					}
 				}
 
-				if (!image) {
-					paras.add(processPara(line));
+				pg.setMinMax(0, lines.size());
+				int i = 0;
+				for (String line : lines) {
+					if (line.startsWith("[") && line.endsWith("]")) {
+						pg.setName("Extracting image " + i);
+					}
+					paras.add(makeParagraph(source, line));
+					pg.setProgress(i++);
 				}
+				pg.setName(null);
 			}
 
 			// Check quotes for "bad" format
@@ -818,54 +709,114 @@ public abstract class BasicSupport {
 			paras = newParas;
 
 			// Remove double blanks/brks
-			boolean space = false;
-			boolean brk = true;
-			for (int i = 0; i < paras.size(); i++) {
-				Paragraph para = paras.get(i);
-				boolean thisSpace = para.getType() == ParagraphType.BLANK;
-				boolean thisBrk = para.getType() == ParagraphType.BREAK;
-
-				if (space && thisBrk) {
-					paras.remove(i - 1);
-					i--;
-				} else if ((space || brk) && (thisSpace || thisBrk)) {
-					paras.remove(i);
-					i--;
-				}
+			fixBlanksBreaks(paras);
+		}
 
-				space = thisSpace;
-				brk = thisBrk;
-			}
+		return paras;
+	}
 
-			// Remove blank/brk at start
-			if (paras.size() > 0
-					&& (paras.get(0).getType() == ParagraphType.BLANK || paras
-							.get(0).getType() == ParagraphType.BREAK)) {
-				paras.remove(0);
-			}
+	/**
+	 * Convert the given line into a single {@link Paragraph}.
+	 * 
+	 * @param source
+	 *            the source URL of the story
+	 * @param line
+	 *            the textual content of the paragraph
+	 * 
+	 * @return the {@link Paragraph}
+	 */
+	private Paragraph makeParagraph(URL source, String line) {
+		URL image = null;
+		if (line.startsWith("[") && line.endsWith("]")) {
+			image = getImageUrl(this, source,
+					line.substring(1, line.length() - 1).trim());
+		}
 
-			// Remove blank/brk at end
-			int last = paras.size() - 1;
-			if (paras.size() > 0
-					&& (paras.get(last).getType() == ParagraphType.BLANK || paras
-							.get(last).getType() == ParagraphType.BREAK)) {
-				paras.remove(last);
+		if (image != null) {
+			return new Paragraph(image);
+		} else {
+			return processPara(line);
+		}
+	}
+
+	/**
+	 * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
+	 * those {@link Paragraph}s.
+	 * <p>
+	 * The resulting list will not contain a starting or trailing blank/break
+	 * nor 2 blanks or breaks following each other.
+	 * 
+	 * @param paras
+	 *            the list of {@link Paragraph}s to fix
+	 */
+	protected void fixBlanksBreaks(List<Paragraph> paras) {
+		boolean space = false;
+		boolean brk = true;
+		for (int i = 0; i < paras.size(); i++) {
+			Paragraph para = paras.get(i);
+			boolean thisSpace = para.getType() == ParagraphType.BLANK;
+			boolean thisBrk = para.getType() == ParagraphType.BREAK;
+
+			if (i > 0 && space && thisBrk) {
+				paras.remove(i - 1);
+				i--;
+			} else if ((space || brk) && (thisSpace || thisBrk)) {
+				paras.remove(i);
+				i--;
 			}
 
-			chap.setParagraphs(paras);
+			space = thisSpace;
+			brk = thisBrk;
+		}
 
-			return chap;
-		} finally {
-			in.close();
+		// Remove blank/brk at start
+		if (paras.size() > 0
+				&& (paras.get(0).getType() == ParagraphType.BLANK || paras.get(
+						0).getType() == ParagraphType.BREAK)) {
+			paras.remove(0);
+		}
+
+		// Remove blank/brk at end
+		int last = paras.size() - 1;
+		if (paras.size() > 0
+				&& (paras.get(last).getType() == ParagraphType.BLANK || paras
+						.get(last).getType() == ParagraphType.BREAK)) {
+			paras.remove(last);
 		}
 	}
 
+	/**
+	 * Get the default cover related to this subject (see <tt>.info</tt> files).
+	 * 
+	 * @param subject
+	 *            the subject
+	 * 
+	 * @return the cover if any, or NULL
+	 */
+	static BufferedImage getDefaultCover(String subject) {
+		if (subject != null && !subject.isEmpty()
+				&& Instance.getCoverDir() != null) {
+			try {
+				File fileCover = new File(Instance.getCoverDir(), subject);
+				return getImage(null, fileCover.toURI().toURL(), subject);
+			} catch (MalformedURLException e) {
+			}
+		}
+
+		return null;
+	}
+
 	/**
 	 * Return the list of supported image extensions.
 	 * 
+	 * @param emptyAllowed
+	 *            TRUE to allow an empty extension on first place, which can be
+	 *            used when you may already have an extension in your input but
+	 *            are not sure about it
+	 * 
 	 * @return the extensions
 	 */
-	protected String[] getImageExt(boolean emptyAllowed) {
+	static String[] getImageExt(boolean emptyAllowed) {
 		if (emptyAllowed) {
 			return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
 		} else {
@@ -882,60 +833,99 @@ public abstract class BasicSupport {
 	 * @param line
 	 *            the resource to check
 	 * 
-	 * @return the image URL if found, or NULL
+	 * @return the image if found, or NULL
 	 * 
 	 */
-	protected URL getImage(URL source, String line) {
-		String path = new File(source.getFile()).getParent();
-		URL url = null;
-
-		// try for files
-		try {
-			String urlBase = new File(new File(path), line.trim()).toURI()
-					.toURL().toString();
-			for (String ext : getImageExt(true)) {
-				if (new File(urlBase + ext).exists()) {
-					url = new File(urlBase + ext).toURI().toURL();
+	static BufferedImage getImage(BasicSupport support, URL source, String line) {
+		URL url = getImageUrl(support, source, line);
+		if (url != null) {
+			InputStream in = null;
+			try {
+				in = Instance.getCache().open(url, getSupport(url), true);
+				return IOUtils.toImage(in);
+			} catch (IOException e) {
+			} finally {
+				if (in != null) {
+					try {
+						in.close();
+					} catch (IOException e) {
+					}
 				}
 			}
-		} catch (Exception e) {
-			// Nothing to do here
 		}
 
-		if (url == null) {
-			// try for URLs
-			try {
-				for (String ext : getImageExt(true)) {
-					if (Instance.getCache().check(new URL(line + ext))) {
-						url = new URL(line + ext);
+		return null;
+	}
+
+	/**
+	 * Check if the given resource can be a local image or a remote image, then
+	 * refresh the cache with it if it is.
+	 * 
+	 * @param source
+	 *            the story source
+	 * @param line
+	 *            the resource to check
+	 * 
+	 * @return the image URL if found, or NULL
+	 * 
+	 */
+	static URL getImageUrl(BasicSupport support, URL source, String line) {
+		URL url = null;
+
+		if (line != null) {
+			// try for files
+			String path = null;
+			if (source != null) {
+				path = new File(source.getFile()).getParent();
+				try {
+					String basePath = new File(new File(path), line.trim())
+							.getAbsolutePath();
+					for (String ext : getImageExt(true)) {
+						if (new File(basePath + ext).exists()) {
+							url = new File(basePath + ext).toURI().toURL();
+						}
 					}
+				} catch (Exception e) {
+					// Nothing to do here
 				}
+			}
 
-				// try out of cache
-				if (url == null) {
+			if (url == null) {
+				// try for URLs
+				try {
 					for (String ext : getImageExt(true)) {
-						try {
+						if (Instance.getCache().check(new URL(line + ext))) {
 							url = new URL(line + ext);
-							Instance.getCache().refresh(url, this, true);
 							break;
-						} catch (IOException e) {
-							// no image with this ext
-							url = null;
 						}
 					}
+
+					// try out of cache
+					if (url == null) {
+						for (String ext : getImageExt(true)) {
+							try {
+								url = new URL(line + ext);
+								Instance.getCache().refresh(url, support, true);
+								break;
+							} catch (IOException e) {
+								// no image with this ext
+								url = null;
+							}
+						}
+					}
+				} catch (MalformedURLException e) {
+					// Not an url
 				}
-			} catch (MalformedURLException e) {
-				// Not an url
 			}
-		}
 
-		// refresh the cached file
-		if (url != null) {
-			try {
-				Instance.getCache().refresh(url, this, true);
-			} catch (IOException e) {
-				// woops, broken image
-				url = null;
+			// refresh the cached file
+			if (url != null) {
+				try {
+					Instance.getCache().refresh(url, support, true);
+				} catch (IOException e) {
+					// woops, broken image
+					url = null;
+				}
 			}
 		}
 
@@ -943,18 +933,45 @@ public abstract class BasicSupport {
 	}
 
 	/**
-	 * Reset then return {@link BasicSupport#in}.
+	 * Open the input file that will be used through the support.
 	 * 
-	 * @return {@link BasicSupport#in}
+	 * @param source
+	 *            the source {@link URL}
+	 * 
+	 * @return the {@link InputStream}
 	 * 
 	 * @throws IOException
 	 *             in case of I/O error
 	 */
-	protected InputStream getInput() throws IOException {
-		in.reset();
+	protected InputStream openInput(URL source) throws IOException {
+		return Instance.getCache().open(source, this, false);
+	}
+
+	/**
+	 * Reset the given {@link InputStream} and return it.
+	 * 
+	 * @param in
+	 *            the {@link InputStream} to reset
+	 * 
+	 * @return the same {@link InputStream} after reset
+	 */
+	protected InputStream reset(InputStream in) {
+		try {
+			in.reset();
+		} catch (IOException e) {
+		}
 		return in;
 	}
 
+	/**
+	 * Reset then return {@link BasicSupport#in}.
+	 * 
+	 * @return {@link BasicSupport#in}
+	 */
+	protected InputStream getInput() {
+		return reset(in);
+	}
+
 	/**
 	 * Fix the author name if it is prefixed with some "by" {@link String}.
 	 * 
@@ -963,7 +980,7 @@ public abstract class BasicSupport {
 	 * 
 	 * @return the author without prefixes
 	 */
-	private String fixAuthor(String author) {
+	protected String fixAuthor(String author) {
 		if (author != null) {
 			for (String suffix : new String[] { " ", ":" }) {
 				for (String byString : Instance.getConfig()
@@ -990,34 +1007,71 @@ public abstract class BasicSupport {
 	 * paragraphs (quotes or not)).
 	 * 
 	 * @param para
-	 *            the paragraph to requotify (not necessaraly a quote)
+	 *            the paragraph to requotify (not necessarily a quote)
 	 * 
 	 * @return the correctly (or so we hope) quotified paragraphs
 	 */
-	private List<Paragraph> requotify(Paragraph para) {
+	protected List<Paragraph> requotify(Paragraph para) {
 		List<Paragraph> newParas = new ArrayList<Paragraph>();
 
-		if (para.getType() == ParagraphType.QUOTE) {
+		if (para.getType() == ParagraphType.QUOTE
+				&& para.getContent().length() > 2) {
 			String line = para.getContent();
 			boolean singleQ = line.startsWith("" + openQuote);
 			boolean doubleQ = line.startsWith("" + openDoubleQuote);
 
+			// Do not try when more than one quote at a time
+			// (some stories are not easily readable if we do)
+			if (singleQ
+					&& line.indexOf(closeQuote, 1) < line
+							.lastIndexOf(closeQuote)) {
+				newParas.add(para);
+				return newParas;
+			}
+			if (doubleQ
+					&& line.indexOf(closeDoubleQuote, 1) < line
+							.lastIndexOf(closeDoubleQuote)) {
+				newParas.add(para);
+				return newParas;
+			}
+			//
+
 			if (!singleQ && !doubleQ) {
 				line = openDoubleQuote + line + closeDoubleQuote;
-				newParas.add(new Paragraph(ParagraphType.QUOTE, line));
+				newParas.add(new Paragraph(ParagraphType.QUOTE, line, para
+						.getWords()));
 			} else {
+				char open = singleQ ? openQuote : openDoubleQuote;
 				char close = singleQ ? closeQuote : closeDoubleQuote;
-				int posClose = line.indexOf(close);
-				int posDot = line.indexOf(".");
-				while (posDot >= 0 && posDot < posClose) {
-					posDot = line.indexOf(".", posDot + 1);
+
+				int posDot = -1;
+				boolean inQuote = false;
+				int i = 0;
+				for (char car : line.toCharArray()) {
+					if (car == open) {
+						inQuote = true;
+					} else if (car == close) {
+						inQuote = false;
+					} else if (car == '.' && !inQuote) {
+						posDot = i;
+						break;
+					}
+					i++;
 				}
 
 				if (posDot >= 0) {
 					String rest = line.substring(posDot + 1).trim();
 					line = line.substring(0, posDot + 1).trim();
-					newParas.add(new Paragraph(ParagraphType.QUOTE, line));
-					newParas.addAll(requotify(processPara(rest)));
+					long words = 1;
+					for (char car : line.toCharArray()) {
+						if (car == ' ') {
+							words++;
+						}
+					}
+					newParas.add(new Paragraph(ParagraphType.QUOTE, line, words));
+					if (!rest.isEmpty()) {
+						newParas.addAll(requotify(processPara(rest)));
+					}
 				} else {
 					newParas.add(para);
 				}
@@ -1039,7 +1093,7 @@ public abstract class BasicSupport {
 	 * 
 	 * @return the processed {@link Paragraph}
 	 */
-	private Paragraph processPara(String line) {
+	protected Paragraph processPara(String line) {
 		line = ifUnhtml(line).trim();
 
 		boolean space = true;
@@ -1048,6 +1102,7 @@ public abstract class BasicSupport {
 		boolean tentativeCloseQuote = false;
 		char prev = '\0';
 		int dashCount = 0;
+		long words = 1;
 
 		StringBuilder builder = new StringBuilder();
 		for (char car : line.toCharArray()) {
@@ -1062,11 +1117,16 @@ public abstract class BasicSupport {
 
 			if (tentativeCloseQuote) {
 				tentativeCloseQuote = false;
-				if ((car >= 'a' && car <= 'z') || (car >= 'A' && car <= 'Z')
-						|| (car >= '0' && car <= '9')) {
+				if (Character.isLetterOrDigit(car)) {
 					builder.append("'");
 				} else {
-					builder.append(closeQuote);
+					// handle double-single quotes as double quotes
+					if (prev == car) {
+						builder.append(closeDoubleQuote);
+						continue;
+					} else {
+						builder.append(closeQuote);
+					}
 				}
 			}
 
@@ -1076,15 +1136,31 @@ public abstract class BasicSupport {
 			case '\t':
 			case '\n': // just in case
 			case '\r': // just in case
+				if (builder.length() > 0
+						&& builder.charAt(builder.length() - 1) != ' ') {
+					words++;
+				}
 				builder.append(' ');
 				break;
 
 			case '\'':
 				if (space || (brk && quote)) {
 					quote = true;
-					builder.append(openQuote);
-				} else if (prev == ' ') {
-					builder.append(openQuote);
+					// handle double-single quotes as double quotes
+					if (prev == car) {
+						builder.deleteCharAt(builder.length() - 1);
+						builder.append(openDoubleQuote);
+					} else {
+						builder.append(openQuote);
+					}
+				} else if (prev == ' ' || prev == car) {
+					// handle double-single quotes as double quotes
+					if (prev == car) {
+						builder.deleteCharAt(builder.length() - 1);
+						builder.append(openDoubleQuote);
+					} else {
+						builder.append(openQuote);
+					}
 				} else {
 					// it is a quote ("I'm off") or a 'quote' ("This
 					// 'good' restaurant"...)
@@ -1137,7 +1213,13 @@ public abstract class BasicSupport {
 					quote = true;
 					builder.append(openQuote);
 				} else {
-					builder.append(openQuote);
+					// handle double-single quotes as double quotes
+					if (prev == car) {
+						builder.deleteCharAt(builder.length() - 1);
+						builder.append(openDoubleQuote);
+					} else {
+						builder.append(openQuote);
+					}
 				}
 				space = false;
 				brk = false;
@@ -1150,7 +1232,13 @@ public abstract class BasicSupport {
 			case 'ã':
 				space = false;
 				brk = false;
-				builder.append(closeQuote);
+				// handle double-single quotes as double quotes
+				if (prev == car) {
+					builder.deleteCharAt(builder.length() - 1);
+					builder.append(closeDoubleQuote);
+				} else {
+					builder.append(closeQuote);
+				}
 				break;
 
 			case 'Â«':
@@ -1204,11 +1292,11 @@ public abstract class BasicSupport {
 			type = ParagraphType.QUOTE;
 		}
 
-		return new Paragraph(type, line);
+		return new Paragraph(type, line, words);
 	}
 
 	/**
-	 * Remove the HTML from the inpit <b>if</b> {@link BasicSupport#isHtml()} is
+	 * Remove the HTML from the input <b>if</b> {@link BasicSupport#isHtml()} is
 	 * true.
 	 * 
 	 * @param input
@@ -1248,8 +1336,8 @@ public abstract class BasicSupport {
 			}
 		}
 
-		for (SupportType type : new SupportType[] { SupportType.TEXT,
-				SupportType.INFO_TEXT }) {
+		for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
+				SupportType.TEXT }) {
 			BasicSupport support = getSupport(type);
 			if (support != null && support.supports(url)) {
 				return support;
@@ -1283,10 +1371,96 @@ public abstract class BasicSupport {
 			return new MangaFox().setType(type);
 		case E621:
 			return new E621().setType(type);
+		case YIFFSTAR:
+			return new YiffStar().setType(type);
 		case CBZ:
 			return new Cbz().setType(type);
+		case HTML:
+			return new Html().setType(type);
 		}
 
 		return null;
 	}
+
+	/**
+	 * Return the first line from the given input which correspond to the given
+	 * selectors.
+	 * 
+	 * @param in
+	 *            the input
+	 * @param needle
+	 *            a string that must be found inside the target line (also
+	 *            supports "^" at start to say "only if it starts with" the
+	 *            needle)
+	 * @param relativeLine
+	 *            the line to return based upon the target line position (-1 =
+	 *            the line before, 0 = the target line...)
+	 * 
+	 * @return the line
+	 */
+	static String getLine(InputStream in, String needle, int relativeLine) {
+		return getLine(in, needle, relativeLine, true);
+	}
+
+	/**
+	 * Return a line from the given input which correspond to the given
+	 * selectors.
+	 * 
+	 * @param in
+	 *            the input
+	 * @param needle
+	 *            a string that must be found inside the target line (also
+	 *            supports "^" at start to say "only if it starts with" the
+	 *            needle)
+	 * @param relativeLine
+	 *            the line to return based upon the target line position (-1 =
+	 *            the line before, 0 = the target line...)
+	 * @param first
+	 *            takes the first result (as opposed to the last one, which will
+	 *            also always spend the input)
+	 * 
+	 * @return the line
+	 */
+	static String getLine(InputStream in, String needle, int relativeLine,
+			boolean first) {
+		String rep = null;
+
+		try {
+			in.reset();
+		} catch (IOException e) {
+			Instance.syserr(e);
+		}
+
+		List<String> lines = new ArrayList<String>();
+		@SuppressWarnings("resource")
+		Scanner scan = new Scanner(in, "UTF-8");
+		int index = -1;
+		scan.useDelimiter("\\n");
+		while (scan.hasNext()) {
+			lines.add(scan.next());
+
+			if (index == -1) {
+				if (needle.startsWith("^")) {
+					if (lines.get(lines.size() - 1).startsWith(
+							needle.substring(1))) {
+						index = lines.size() - 1;
+					}
+
+				} else {
+					if (lines.get(lines.size() - 1).contains(needle)) {
+						index = lines.size() - 1;
+					}
+				}
+			}
+
+			if (index >= 0 && index + relativeLine < lines.size()) {
+				rep = lines.get(index + relativeLine);
+				if (first) {
+					break;
+				}
+			}
+		}
+
+		return rep;
+	}
 }