Update nikiroo-utils (fix for e-hentai language)
[fanfix.git] / src / be / nikiroo / fanfix / supported / BasicSupport.java
index d6801c0dfaf7ba8cb26aff123392196241076d4e..471147ea0548ede456aa66e01a6337c9ae4ef1ef 100644 (file)
@@ -1,13 +1,16 @@
 package be.nikiroo.fanfix.supported;
 
 import java.awt.image.BufferedImage;
+import java.io.BufferedReader;
 import java.io.ByteArrayInputStream;
 import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.InputStreamReader;
 import java.net.MalformedURLException;
 import java.net.URL;
 import java.util.ArrayList;
+import java.util.Date;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -23,6 +26,7 @@ import be.nikiroo.fanfix.data.Paragraph;
 import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
 import be.nikiroo.fanfix.data.Story;
 import be.nikiroo.utils.IOUtils;
+import be.nikiroo.utils.Progress;
 import be.nikiroo.utils.StringUtils;
 
 /**
@@ -56,8 +60,14 @@ public abstract class BasicSupport {
                MANGAFOX,
                /** Furry website with comics support */
                E621,
+               /** Furry website with stories */
+               YIFFSTAR,
+               /** Comics and images groups, mostly but not only NSFW */
+               E_HENTAI,
                /** CBZ files */
-               CBZ;
+               CBZ,
+               /** HTML files */
+               HTML;
 
                /**
                 * A description of this support type (more information than the
@@ -145,16 +155,16 @@ public abstract class BasicSupport {
 
        private InputStream in;
        private SupportType type;
-       private URL currentReferer; // with on 'r', as in 'HTTP'...
+       private URL currentReferer; // with only one 'r', as in 'HTTP'...
 
        // quote chars
-       private char openQuote = Instance.getTrans().getChar(
+       private char openQuote = Instance.getTrans().getCharacter(
                        StringId.OPEN_SINGLE_QUOTE);
-       private char closeQuote = Instance.getTrans().getChar(
+       private char closeQuote = Instance.getTrans().getCharacter(
                        StringId.CLOSE_SINGLE_QUOTE);
-       private char openDoubleQuote = Instance.getTrans().getChar(
+       private char openDoubleQuote = Instance.getTrans().getCharacter(
                        StringId.OPEN_DOUBLE_QUOTE);
-       private char closeDoubleQuote = Instance.getTrans().getChar(
+       private char closeDoubleQuote = Instance.getTrans().getCharacter(
                        StringId.CLOSE_DOUBLE_QUOTE);
 
        /**
@@ -208,6 +218,8 @@ public abstract class BasicSupport {
         *            the source of the story
         * @param in
         *            the input (the main resource)
+        * @param pg
+        *            the optional progress reporter
         * 
         * @return the chapters
         * 
@@ -215,7 +227,7 @@ public abstract class BasicSupport {
         *             in case of I/O error
         */
        protected abstract List<Entry<String, URL>> getChapters(URL source,
-                       InputStream in) throws IOException;
+                       InputStream in, Progress pg) throws IOException;
 
        /**
         * Return the content of the chapter (possibly HTML encoded, if
@@ -227,6 +239,8 @@ public abstract class BasicSupport {
         *            the input (the main resource)
         * @param number
         *            the chapter number
+        * @param pg
+        *            the optional progress reporter
         * 
         * @return the content
         * 
@@ -234,7 +248,17 @@ public abstract class BasicSupport {
         *             in case of I/O error
         */
        protected abstract String getChapterContent(URL source, InputStream in,
-                       int number) throws IOException;
+                       int number, Progress pg) throws IOException;
+
+       /**
+        * Log into the support (can be a no-op depending upon the support).
+        * 
+        * @throws IOException
+        *             in case of I/O error
+        */
+       public void login() throws IOException {
+
+       }
 
        /**
         * Return the list of cookies (values included) that must be used to
@@ -244,11 +268,29 @@ public abstract class BasicSupport {
         * it.
         * 
         * @return the cookies
+        * 
+        * @throws IOException
+        *             in case of I/O error
         */
-       public Map<String, String> getCookies() {
+       public Map<String, String> getCookies() throws IOException {
                return new HashMap<String, String>();
        }
 
+       /**
+        * Return the canonical form of the main {@link URL}.
+        * 
+        * @param source
+        *            the source {@link URL}
+        * 
+        * @return the canonical form of this {@link URL}
+        * 
+        * @throws IOException
+        *             in case of I/O error
+        */
+       public URL getCanonicalUrl(URL source) throws IOException {
+               return source;
+       }
+
        /**
         * Process the given story resource into a partially filled {@link Story}
         * object containing the name and metadata, except for the description.
@@ -262,7 +304,7 @@ public abstract class BasicSupport {
         *             in case of I/O error
         */
        public Story processMeta(URL url) throws IOException {
-               return processMeta(url, true, false);
+               return processMeta(url, true, false, null);
        }
 
        /**
@@ -274,38 +316,63 @@ public abstract class BasicSupport {
         * 
         * @param close
         *            close "this" and "in" when done
+        * @param pg
+        *            the optional progress reporter
         * 
         * @return the {@link Story}
         * 
         * @throws IOException
         *             in case of I/O error
         */
-       protected Story processMeta(URL url, boolean close, boolean getDesc)
-                       throws IOException {
-               in = Instance.getCache().open(url, this, false);
+       protected Story processMeta(URL url, boolean close, boolean getDesc,
+                       Progress pg) throws IOException {
+               if (pg == null) {
+                       pg = new Progress();
+               } else {
+                       pg.setMinMax(0, 100);
+               }
+
+               login();
+               pg.setProgress(10);
+
+               url = getCanonicalUrl(url);
+
+               setCurrentReferer(url);
+
+               in = openInput(url);
                if (in == null) {
                        return null;
                }
 
                try {
                        preprocess(url, getInput());
+                       pg.setProgress(30);
 
                        Story story = new Story();
                        MetaData meta = getMeta(url, getInput());
+                       if (meta.getCreationDate() == null
+                                       || meta.getCreationDate().isEmpty()) {
+                               meta.setCreationDate(StringUtils.fromTime(new Date().getTime()));
+                       }
                        story.setMeta(meta);
 
+                       pg.setProgress(50);
+
                        if (meta != null && meta.getCover() == null) {
                                meta.setCover(getDefaultCover(meta.getSubject()));
                        }
 
+                       pg.setProgress(60);
+
                        if (getDesc) {
                                String descChapterName = Instance.getTrans().getString(
                                                StringId.DESCRIPTION);
                                story.getMeta().setResume(
                                                makeChapter(url, 0, descChapterName,
-                                                               getDesc(url, getInput())));
+                                                               getDesc(url, getInput()), null));
                        }
 
+                       pg.setProgress(100);
                        return story;
                } finally {
                        if (close) {
@@ -319,6 +386,8 @@ public abstract class BasicSupport {
                                        in.close();
                                }
                        }
+
+                       setCurrentReferer(null);
                }
        }
 
@@ -328,40 +397,96 @@ public abstract class BasicSupport {
         * 
         * @param url
         *            the story resource
+        * @param pg
+        *            the optional progress reporter
         * 
         * @return the {@link Story}
         * 
         * @throws IOException
         *             in case of I/O error
         */
-       public Story process(URL url) throws IOException {
-               setCurrentReferer(url);
+       public Story process(URL url, Progress pg) throws IOException {
+               if (pg == null) {
+                       pg = new Progress();
+               } else {
+                       pg.setMinMax(0, 100);
+               }
 
+               url = getCanonicalUrl(url);
+               pg.setProgress(1);
                try {
-                       Story story = processMeta(url, false, true);
+                       Progress pgMeta = new Progress();
+                       pg.addProgress(pgMeta, 10);
+                       Story story = processMeta(url, false, true, pgMeta);
+                       if (!pgMeta.isDone()) {
+                               pgMeta.setProgress(pgMeta.getMax()); // 10%
+                       }
+
                        if (story == null) {
+                               pg.setProgress(90);
                                return null;
                        }
 
+                       pg.setName("Retrieving " + story.getMeta().getTitle());
+
+                       setCurrentReferer(url);
+
+                       Progress pgGetChapters = new Progress();
+                       pg.addProgress(pgGetChapters, 10);
                        story.setChapters(new ArrayList<Chapter>());
+                       List<Entry<String, URL>> chapters = getChapters(url, getInput(),
+                                       pgGetChapters);
+                       if (!pgGetChapters.isDone()) {
+                               pgGetChapters.setProgress(pgGetChapters.getMax()); // 20%
+                       }
 
-                       List<Entry<String, URL>> chapters = getChapters(url, getInput());
-                       int i = 1;
                        if (chapters != null) {
+                               Progress pgChaps = new Progress("Extracting chapters", 0,
+                                               chapters.size() * 300);
+                               pg.addProgress(pgChaps, 80);
+
+                               long words = 0;
+                               int i = 1;
                                for (Entry<String, URL> chap : chapters) {
+                                       pgChaps.setName("Extracting chapter " + i);
                                        setCurrentReferer(chap.getValue());
                                        InputStream chapIn = Instance.getCache().open(
                                                        chap.getValue(), this, true);
+                                       pgChaps.setProgress(i * 100);
                                        try {
-                                               story.getChapters().add(
-                                                               makeChapter(url, i, chap.getKey(),
-                                                                               getChapterContent(url, chapIn, i)));
+                                               Progress pgGetChapterContent = new Progress();
+                                               Progress pgMakeChapter = new Progress();
+                                               pgChaps.addProgress(pgGetChapterContent, 100);
+                                               pgChaps.addProgress(pgMakeChapter, 100);
+
+                                               String content = getChapterContent(url, chapIn, i,
+                                                               pgGetChapterContent);
+                                               if (!pgGetChapterContent.isDone()) {
+                                                       pgGetChapterContent.setProgress(pgGetChapterContent
+                                                                       .getMax());
+                                               }
+
+                                               Chapter cc = makeChapter(url, i, chap.getKey(),
+                                                               content, pgMakeChapter);
+                                               if (!pgMakeChapter.isDone()) {
+                                                       pgMakeChapter.setProgress(pgMakeChapter.getMax());
+                                               }
+
+                                               words += cc.getWords();
+                                               story.getChapters().add(cc);
+                                               if (story.getMeta() != null) {
+                                                       story.getMeta().setWords(words);
+                                               }
                                        } finally {
                                                chapIn.close();
                                        }
 
                                        i++;
                                }
+
+                               pgChaps.setName("Extracting chapters");
+                       } else {
+                               pg.setProgress(80);
                        }
 
                        return story;
@@ -377,12 +502,12 @@ public abstract class BasicSupport {
                                in.close();
                        }
 
-                       currentReferer = null;
+                       setCurrentReferer(null);
                }
        }
 
        /**
-        * The support type.$
+        * The support type.
         * 
         * @return the type
         */
@@ -457,6 +582,8 @@ public abstract class BasicSupport {
         *            the chapter name
         * @param content
         *            the chapter content
+        * @param pg
+        *            the optional progress reporter
         * 
         * @return the {@link Chapter}
         * 
@@ -464,8 +591,7 @@ public abstract class BasicSupport {
         *             in case of I/O error
         */
        protected Chapter makeChapter(URL source, int number, String name,
-                       String content) throws IOException {
-
+                       String content, Progress pg) throws IOException {
                // Chapter name: process it correctly, then remove the possible
                // redundant "Chapter x: " in front of it
                String chapterName = processPara(name).getContent().trim();
@@ -492,38 +618,89 @@ public abstract class BasicSupport {
 
                Chapter chap = new Chapter(number, chapterName);
 
-               if (content == null) {
-                       return chap;
+               if (content != null) {
+                       List<Paragraph> paras = makeParagraphs(source, content, pg);
+                       long words = 0;
+                       for (Paragraph para : paras) {
+                               words += para.getWords();
+                       }
+                       chap.setParagraphs(paras);
+                       chap.setWords(words);
+               }
+
+               return chap;
+
+       }
+
+       /**
+        * Convert the given content into {@link Paragraph}s.
+        * 
+        * @param source
+        *            the source URL of the story
+        * @param content
+        *            the textual content
+        * @param pg
+        *            the optional progress reporter
+        * 
+        * @return the {@link Paragraph}s
+        * 
+        * @throws IOException
+        *             in case of I/O error
+        */
+       protected List<Paragraph> makeParagraphs(URL source, String content,
+                       Progress pg) throws IOException {
+               if (pg == null) {
+                       pg = new Progress();
                }
 
                if (isHtml()) {
                        // Special <HR> processing:
                        content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
-                                       "\n* * *\n");
+                                       "<br/>* * *<br/>");
                }
 
-               InputStream in = new ByteArrayInputStream(content.getBytes("UTF-8"));
-               try {
-                       @SuppressWarnings("resource")
-                       Scanner scan = new Scanner(in, "UTF-8");
-                       scan.useDelimiter("(\\n|</p>)"); // \n for test, </p> for html
-
-                       List<Paragraph> paras = new ArrayList<Paragraph>();
-                       while (scan.hasNext()) {
-                               String line = scan.next().trim();
-                               boolean image = false;
-                               if (line.startsWith("[") && line.endsWith("]")) {
-                                       URL url = getImageUrl(this, source,
-                                                       line.substring(1, line.length() - 1).trim());
-                                       if (url != null) {
-                                               paras.add(new Paragraph(url));
-                                               image = true;
+               List<Paragraph> paras = new ArrayList<Paragraph>();
+
+               if (content != null && !content.trim().isEmpty()) {
+                       if (isHtml()) {
+                               String[] tab = content.split("(<p>|</p>|<br>|<br/>)");
+                               pg.setMinMax(0, tab.length);
+                               int i = 1;
+                               for (String line : tab) {
+                                       if (line.startsWith("[") && line.endsWith("]")) {
+                                               pg.setName("Extracting image " + i);
+                                       }
+                                       paras.add(makeParagraph(source, line.trim()));
+                                       pg.setProgress(i++);
+                               }
+                               pg.setName(null);
+                       } else {
+                               List<String> lines = new ArrayList<String>();
+                               BufferedReader buff = null;
+                               try {
+                                       buff = new BufferedReader(
+                                                       new InputStreamReader(new ByteArrayInputStream(
+                                                                       content.getBytes("UTF-8")), "UTF-8"));
+                                       for (String line = buff.readLine(); line != null; line = buff
+                                                       .readLine()) {
+                                               lines.add(line.trim());
+                                       }
+                               } finally {
+                                       if (buff != null) {
+                                               buff.close();
                                        }
                                }
 
-                               if (!image) {
-                                       paras.add(processPara(line));
+                               pg.setMinMax(0, lines.size());
+                               int i = 0;
+                               for (String line : lines) {
+                                       if (line.startsWith("[") && line.endsWith("]")) {
+                                               pg.setName("Extracting image " + i);
+                                       }
+                                       paras.add(makeParagraph(source, line));
+                                       pg.setProgress(i++);
                                }
+                               pg.setName(null);
                        }
 
                        // Check quotes for "bad" format
@@ -534,48 +711,90 @@ public abstract class BasicSupport {
                        paras = newParas;
 
                        // Remove double blanks/brks
-                       boolean space = false;
-                       boolean brk = true;
-                       for (int i = 0; i < paras.size(); i++) {
-                               Paragraph para = paras.get(i);
-                               boolean thisSpace = para.getType() == ParagraphType.BLANK;
-                               boolean thisBrk = para.getType() == ParagraphType.BREAK;
-
-                               if (space && thisBrk) {
-                                       paras.remove(i - 1);
-                                       i--;
-                               } else if ((space || brk) && (thisSpace || thisBrk)) {
-                                       paras.remove(i);
-                                       i--;
-                               }
+                       fixBlanksBreaks(paras);
+               }
 
-                               space = thisSpace;
-                               brk = thisBrk;
-                       }
+               return paras;
+       }
 
-                       // Remove blank/brk at start
-                       if (paras.size() > 0
-                                       && (paras.get(0).getType() == ParagraphType.BLANK || paras
-                                                       .get(0).getType() == ParagraphType.BREAK)) {
-                               paras.remove(0);
-                       }
+       /**
+        * Convert the given line into a single {@link Paragraph}.
+        * 
+        * @param source
+        *            the source URL of the story
+        * @param line
+        *            the textual content of the paragraph
+        * 
+        * @return the {@link Paragraph}
+        */
+       private Paragraph makeParagraph(URL source, String line) {
+               URL image = null;
+               if (line.startsWith("[") && line.endsWith("]")) {
+                       image = getImageUrl(this, source,
+                                       line.substring(1, line.length() - 1).trim());
+               }
 
-                       // Remove blank/brk at end
-                       int last = paras.size() - 1;
-                       if (paras.size() > 0
-                                       && (paras.get(last).getType() == ParagraphType.BLANK || paras
-                                                       .get(last).getType() == ParagraphType.BREAK)) {
-                               paras.remove(last);
+               if (image != null) {
+                       return new Paragraph(image);
+               } else {
+                       return processPara(line);
+               }
+       }
+
+       /**
+        * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
+        * those {@link Paragraph}s.
+        * <p>
+        * The resulting list will not contain a starting or trailing blank/break
+        * nor 2 blanks or breaks following each other.
+        * 
+        * @param paras
+        *            the list of {@link Paragraph}s to fix
+        */
+       protected void fixBlanksBreaks(List<Paragraph> paras) {
+               boolean space = false;
+               boolean brk = true;
+               for (int i = 0; i < paras.size(); i++) {
+                       Paragraph para = paras.get(i);
+                       boolean thisSpace = para.getType() == ParagraphType.BLANK;
+                       boolean thisBrk = para.getType() == ParagraphType.BREAK;
+
+                       if (i > 0 && space && thisBrk) {
+                               paras.remove(i - 1);
+                               i--;
+                       } else if ((space || brk) && (thisSpace || thisBrk)) {
+                               paras.remove(i);
+                               i--;
                        }
 
-                       chap.setParagraphs(paras);
+                       space = thisSpace;
+                       brk = thisBrk;
+               }
 
-                       return chap;
-               } finally {
-                       in.close();
+               // Remove blank/brk at start
+               if (paras.size() > 0
+                               && (paras.get(0).getType() == ParagraphType.BLANK || paras.get(
+                                               0).getType() == ParagraphType.BREAK)) {
+                       paras.remove(0);
+               }
+
+               // Remove blank/brk at end
+               int last = paras.size() - 1;
+               if (paras.size() > 0
+                               && (paras.get(last).getType() == ParagraphType.BLANK || paras
+                                               .get(last).getType() == ParagraphType.BREAK)) {
+                       paras.remove(last);
                }
        }
 
+       /**
+        * Get the default cover related to this subject (see <tt>.info</tt> files).
+        * 
+        * @param subject
+        *            the subject
+        * 
+        * @return the cover if any, or NULL
+        */
        static BufferedImage getDefaultCover(String subject) {
                if (subject != null && !subject.isEmpty()
                                && Instance.getCoverDir() != null) {
@@ -592,6 +811,11 @@ public abstract class BasicSupport {
        /**
         * Return the list of supported image extensions.
         * 
+        * @param emptyAllowed
+        *            TRUE to allow an empty extension on first place, which can be
+        *            used when you may already have an extension in your input but
+        *            are not sure about it
+        * 
         * @return the extensions
         */
        static String[] getImageExt(boolean emptyAllowed) {
@@ -602,6 +826,18 @@ public abstract class BasicSupport {
                }
        }
 
+       /**
+        * Check if the given resource can be a local image or a remote image, then
+        * refresh the cache with it if it is.
+        * 
+        * @param source
+        *            the story source
+        * @param line
+        *            the resource to check
+        * 
+        * @return the image if found, or NULL
+        * 
+        */
        static BufferedImage getImage(BasicSupport support, URL source, String line) {
                URL url = getImageUrl(support, source, line);
                if (url != null) {
@@ -640,19 +876,35 @@ public abstract class BasicSupport {
 
                if (line != null) {
                        // try for files
-                       String path = null;
                        if (source != null) {
-                               path = new File(source.getFile()).getParent();
                                try {
-                                       String basePath = new File(new File(path), line.trim())
-                                                       .getAbsolutePath();
+
+                                       String relPath = null;
+                                       String absPath = null;
+                                       try {
+                                               String path = new File(source.getFile()).getParent();
+                                               relPath = new File(new File(path), line.trim())
+                                                               .getAbsolutePath();
+                                       } catch (Exception e) {
+                                               // Cannot be converted to path (one possibility to take
+                                               // into account: absolute path on Windows)
+                                       }
+                                       try {
+                                               absPath = new File(line.trim()).getAbsolutePath();
+                                       } catch (Exception e) {
+                                               // Cannot be converted to path (at all)
+                                       }
+
                                        for (String ext : getImageExt(true)) {
-                                               if (new File(basePath + ext).exists()) {
-                                                       url = new File(basePath + ext).toURI().toURL();
+                                               if (absPath != null && new File(absPath + ext).exists()) {
+                                                       url = new File(absPath + ext).toURI().toURL();
+                                               } else if (relPath != null
+                                                               && new File(relPath + ext).exists()) {
+                                                       url = new File(relPath + ext).toURI().toURL();
                                                }
                                        }
                                } catch (Exception e) {
-                                       // Nothing to do here
+                                       // Should not happen since we control the correct arguments
                                }
                        }
 
@@ -698,6 +950,29 @@ public abstract class BasicSupport {
                return url;
        }
 
+       /**
+        * Open the input file that will be used through the support.
+        * 
+        * @param source
+        *            the source {@link URL}
+        * 
+        * @return the {@link InputStream}
+        * 
+        * @throws IOException
+        *             in case of I/O error
+        */
+       protected InputStream openInput(URL source) throws IOException {
+               return Instance.getCache().open(source, this, false);
+       }
+
+       /**
+        * Reset the given {@link InputStream} and return it.
+        * 
+        * @param in
+        *            the {@link InputStream} to reset
+        * 
+        * @return the same {@link InputStream} after reset
+        */
        protected InputStream reset(InputStream in) {
                try {
                        in.reset();
@@ -750,11 +1025,11 @@ public abstract class BasicSupport {
         * paragraphs (quotes or not)).
         * 
         * @param para
-        *            the paragraph to requotify (not necessaraly a quote)
+        *            the paragraph to requotify (not necessarily a quote)
         * 
         * @return the correctly (or so we hope) quotified paragraphs
         */
-       private List<Paragraph> requotify(Paragraph para) {
+       protected List<Paragraph> requotify(Paragraph para) {
                List<Paragraph> newParas = new ArrayList<Paragraph>();
 
                if (para.getType() == ParagraphType.QUOTE
@@ -781,7 +1056,8 @@ public abstract class BasicSupport {
 
                        if (!singleQ && !doubleQ) {
                                line = openDoubleQuote + line + closeDoubleQuote;
-                               newParas.add(new Paragraph(ParagraphType.QUOTE, line));
+                               newParas.add(new Paragraph(ParagraphType.QUOTE, line, para
+                                               .getWords()));
                        } else {
                                char open = singleQ ? openQuote : openDoubleQuote;
                                char close = singleQ ? closeQuote : closeDoubleQuote;
@@ -804,7 +1080,13 @@ public abstract class BasicSupport {
                                if (posDot >= 0) {
                                        String rest = line.substring(posDot + 1).trim();
                                        line = line.substring(0, posDot + 1).trim();
-                                       newParas.add(new Paragraph(ParagraphType.QUOTE, line));
+                                       long words = 1;
+                                       for (char car : line.toCharArray()) {
+                                               if (car == ' ') {
+                                                       words++;
+                                               }
+                                       }
+                                       newParas.add(new Paragraph(ParagraphType.QUOTE, line, words));
                                        if (!rest.isEmpty()) {
                                                newParas.addAll(requotify(processPara(rest)));
                                        }
@@ -829,7 +1111,7 @@ public abstract class BasicSupport {
         * 
         * @return the processed {@link Paragraph}
         */
-       private Paragraph processPara(String line) {
+       protected Paragraph processPara(String line) {
                line = ifUnhtml(line).trim();
 
                boolean space = true;
@@ -838,6 +1120,7 @@ public abstract class BasicSupport {
                boolean tentativeCloseQuote = false;
                char prev = '\0';
                int dashCount = 0;
+               long words = 1;
 
                StringBuilder builder = new StringBuilder();
                for (char car : line.toCharArray()) {
@@ -852,11 +1135,16 @@ public abstract class BasicSupport {
 
                        if (tentativeCloseQuote) {
                                tentativeCloseQuote = false;
-                               if ((car >= 'a' && car <= 'z') || (car >= 'A' && car <= 'Z')
-                                               || (car >= '0' && car <= '9')) {
+                               if (Character.isLetterOrDigit(car)) {
                                        builder.append("'");
                                } else {
-                                       builder.append(closeQuote);
+                                       // handle double-single quotes as double quotes
+                                       if (prev == car) {
+                                               builder.append(closeDoubleQuote);
+                                               continue;
+                                       } else {
+                                               builder.append(closeQuote);
+                                       }
                                }
                        }
 
@@ -866,15 +1154,31 @@ public abstract class BasicSupport {
                        case '\t':
                        case '\n': // just in case
                        case '\r': // just in case
+                               if (builder.length() > 0
+                                               && builder.charAt(builder.length() - 1) != ' ') {
+                                       words++;
+                               }
                                builder.append(' ');
                                break;
 
                        case '\'':
                                if (space || (brk && quote)) {
                                        quote = true;
-                                       builder.append(openQuote);
-                               } else if (prev == ' ') {
-                                       builder.append(openQuote);
+                                       // handle double-single quotes as double quotes
+                                       if (prev == car) {
+                                               builder.deleteCharAt(builder.length() - 1);
+                                               builder.append(openDoubleQuote);
+                                       } else {
+                                               builder.append(openQuote);
+                                       }
+                               } else if (prev == ' ' || prev == car) {
+                                       // handle double-single quotes as double quotes
+                                       if (prev == car) {
+                                               builder.deleteCharAt(builder.length() - 1);
+                                               builder.append(openDoubleQuote);
+                                       } else {
+                                               builder.append(openQuote);
+                                       }
                                } else {
                                        // it is a quote ("I'm off") or a 'quote' ("This
                                        // 'good' restaurant"...)
@@ -927,7 +1231,13 @@ public abstract class BasicSupport {
                                        quote = true;
                                        builder.append(openQuote);
                                } else {
-                                       builder.append(openQuote);
+                                       // handle double-single quotes as double quotes
+                                       if (prev == car) {
+                                               builder.deleteCharAt(builder.length() - 1);
+                                               builder.append(openDoubleQuote);
+                                       } else {
+                                               builder.append(openQuote);
+                                       }
                                }
                                space = false;
                                brk = false;
@@ -940,7 +1250,13 @@ public abstract class BasicSupport {
                        case '」':
                                space = false;
                                brk = false;
-                               builder.append(closeQuote);
+                               // handle double-single quotes as double quotes
+                               if (prev == car) {
+                                       builder.deleteCharAt(builder.length() - 1);
+                                       builder.append(closeDoubleQuote);
+                               } else {
+                                       builder.append(closeQuote);
+                               }
                                break;
 
                        case '«':
@@ -994,11 +1310,11 @@ public abstract class BasicSupport {
                        type = ParagraphType.QUOTE;
                }
 
-               return new Paragraph(type, line);
+               return new Paragraph(type, line, words);
        }
 
        /**
-        * Remove the HTML from the inpit <b>if</b> {@link BasicSupport#isHtml()} is
+        * Remove the HTML from the input <b>if</b> {@link BasicSupport#isHtml()} is
         * true.
         * 
         * @param input
@@ -1038,8 +1354,8 @@ public abstract class BasicSupport {
                        }
                }
 
-               for (SupportType type : new SupportType[] { SupportType.TEXT,
-                               SupportType.INFO_TEXT }) {
+               for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
+                               SupportType.TEXT }) {
                        BasicSupport support = getSupport(type);
                        if (support != null && support.supports(url)) {
                                return support;
@@ -1073,8 +1389,14 @@ public abstract class BasicSupport {
                        return new MangaFox().setType(type);
                case E621:
                        return new E621().setType(type);
+               case YIFFSTAR:
+                       return new YiffStar().setType(type);
+               case E_HENTAI:
+                       return new EHentai().setType(type);
                case CBZ:
                        return new Cbz().setType(type);
+               case HTML:
+                       return new Html().setType(type);
                }
 
                return null;
@@ -1161,4 +1483,48 @@ public abstract class BasicSupport {
 
                return rep;
        }
+
+       /**
+        * Return the text between the key and the endKey (and optional subKey can
+        * be passed, in this case we will look for the key first, then take the
+        * text between the subKey and the endKey).
+        * <p>
+        * Will only match the first line with the given key if more than one are
+        * possible. Which also means that if the subKey or endKey is not found on
+        * that line, NULL will be returned.
+        * 
+        * @param in
+        *            the input
+        * @param key
+        *            the key to match (also supports "^" at start to say
+        *            "only if it starts with" the key)
+        * @param subKey
+        *            the sub key or NULL if none
+        * @param endKey
+        *            the end key or NULL for "up to the end"
+        * @return the text or NULL if not found
+        */
+       static String getKeyLine(InputStream in, String key, String subKey,
+                       String endKey) {
+               String result = null;
+
+               String line = getLine(in, key, 0);
+               if (line != null && line.contains(key)) {
+                       line = line.substring(line.indexOf(key) + key.length());
+                       if (subKey == null || subKey.isEmpty() || line.contains(subKey)) {
+                               if (subKey != null) {
+                                       line = line.substring(line.indexOf(subKey)
+                                                       + subKey.length());
+                               }
+                               if (endKey == null || line.contains(endKey)) {
+                                       if (endKey != null) {
+                                               line = line.substring(0, line.indexOf(endKey));
+                                               result = line;
+                                       }
+                               }
+                       }
+               }
+
+               return result;
+       }
 }