prepare new system for getting metas in libraries

[nikiroo-utils.git] / src / be / nikiroo / fanfix / supported / BasicSupport.java
diff --git a/src/be/nikiroo/fanfix/supported/BasicSupport.java b/src/be/nikiroo/fanfix/supported/BasicSupport.java

index ded09c2234ba9cf50f8db06cdfa6f72280539e07..bc91e8b40d0688e96b8ae8698f9e252ae2fe3bec 100644 (file)
--- a/src/be/nikiroo/fanfix/supported/BasicSupport.java
+++ b/src/be/nikiroo/fanfix/supported/BasicSupport.java
@@ -1,27 +1,26 @@
  package be.nikiroo.fanfix.supported;
  
-import java.io.ByteArrayInputStream;
-import java.io.File;
  import java.io.IOException;
  import java.io.InputStream;
-import java.net.MalformedURLException;
  import java.net.URL;
-import java.nio.charset.StandardCharsets;
  import java.util.ArrayList;
+import java.util.Date;
  import java.util.HashMap;
  import java.util.List;
  import java.util.Map;
  import java.util.Map.Entry;
-import java.util.Scanner;
+
+import org.jsoup.helper.DataUtil;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.nodes.Node;
  
  import be.nikiroo.fanfix.Instance;
-import be.nikiroo.fanfix.bundles.Config;
  import be.nikiroo.fanfix.bundles.StringId;
  import be.nikiroo.fanfix.data.Chapter;
  import be.nikiroo.fanfix.data.MetaData;
-import be.nikiroo.fanfix.data.Paragraph;
  import be.nikiroo.fanfix.data.Story;
-import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
+import be.nikiroo.utils.Progress;
  import be.nikiroo.utils.StringUtils;
  
  /**
@@ -34,135 +33,14 @@ import be.nikiroo.utils.StringUtils;
   * @author niki
   */
  public abstract class BasicSupport {
-       /**
-        * The supported input types for which we can get a {@link BasicSupport}
-        * object.
-        * 
-        * @author niki
-        */
-       public enum SupportType {
-               /** EPUB files created with this program */
-               EPUB,
-               /** Pure text file with some rules */
-               TEXT,
-               /** TEXT but with associated .info file */
-               INFO_TEXT,
-               /** My Little Pony fanfictions */
-               FIMFICTION,
-               /** Fanfictions from a lot of different universes */
-               FANFICTION,
-               /** Website with lots of Mangas */
-               MANGAFOX,
-               /** Furry website with comics support */
-               E621,
-               /** CBZ files */
-               CBZ;
-
-               /**
-                * A description of this support type (more information than the
-                * {@link BasicSupport#getSourceName()}).
-                * 
-                * @return the description
-                */
-               public String getDesc() {
-                       String desc = Instance.getTrans().getStringX(StringId.INPUT_DESC,
-                                       this.name());
-
-                       if (desc == null) {
-                               desc = Instance.getTrans().getString(StringId.INPUT_DESC, this);
-                       }
-
-                       return desc;
-               }
-
-               /**
-                * The name of this support type (a short version).
-                * 
-                * @return the name
-                */
-               public String getSourceName() {
-                       BasicSupport support = BasicSupport.getSupport(this);
-                       if (support != null) {
-                               return support.getSourceName();
-                       }
-
-                       return null;
-               }
-
-               @Override
-               public String toString() {
-                       return super.toString().toLowerCase();
-               }
-
-               /**
-                * Call {@link SupportType#valueOf(String.toUpperCase())}.
-                * 
-                * @param typeName
-                *            the possible type name
-                * 
-                * @return NULL or the type
-                */
-               public static SupportType valueOfUC(String typeName) {
-                       return SupportType.valueOf(typeName == null ? null : typeName
-                                       .toUpperCase());
-               }
-
-               /**
-                * Call {@link SupportType#valueOf(String.toUpperCase())} but return
-                * NULL for NULL instead of raising exception.
-                * 
-                * @param typeName
-                *            the possible type name
-                * 
-                * @return NULL or the type
-                */
-               public static SupportType valueOfNullOkUC(String typeName) {
-                       if (typeName == null) {
-                               return null;
-                       }
-
-                       return SupportType.valueOfUC(typeName);
-               }
-
-               /**
-                * Call {@link SupportType#valueOf(String.toUpperCase())} but return
-                * NULL in case of error instead of raising an exception.
-                * 
-                * @param typeName
-                *            the possible type name
-                * 
-                * @return NULL or the type
-                */
-               public static SupportType valueOfAllOkUC(String typeName) {
-                       try {
-                               return SupportType.valueOfUC(typeName);
-                       } catch (Exception e) {
-                               return null;
-                       }
-               }
-       }
-
-       /** Only used by {@link BasicSupport#getInput()} just so it is always reset. */
-       private InputStream in;
+       private Document sourceNode;
+       private URL source;
         private SupportType type;
-       private URL currentReferer; // with on 'r', as in 'HTTP'...
-
-       // quote chars
-       private char openQuote = Instance.getTrans().getChar(
-                       StringId.OPEN_SINGLE_QUOTE);
-       private char closeQuote = Instance.getTrans().getChar(
-                       StringId.CLOSE_SINGLE_QUOTE);
-       private char openDoubleQuote = Instance.getTrans().getChar(
-                       StringId.OPEN_DOUBLE_QUOTE);
-       private char closeDoubleQuote = Instance.getTrans().getChar(
-                       StringId.CLOSE_DOUBLE_QUOTE);
-
-       /**
-        * The name of this support class.
-        * 
-        * @return the name
-        */
-       protected abstract String getSourceName();
+       private URL currentReferer; // with only one 'r', as in 'HTTP'...
+       
+       static protected BasicSupportHelper bsHelper = new BasicSupportHelper();
+       static protected BasicSupportImages bsImages = new BasicSupportImages();
+       static protected BasicSupportPara bsPara = new BasicSupportPara(new BasicSupportHelper(), new BasicSupportImages());
  
         /**
          * Check if the given resource is supported by this {@link BasicSupport}.
@@ -183,149 +61,59 @@ public abstract class BasicSupport {
         protected abstract boolean isHtml();
  
         /**
-        * Return the story title.
+        * Return the {@link MetaData} of this story.
          * 
-        * @param source
-        *            the source of the story
-        * @param in
-        *            the input (the main resource)
-        * 
-        * @return the title
+        * @return the associated {@link MetaData}, never NULL
          * 
          * @throws IOException
          *             in case of I/O error
          */
-       protected abstract String getTitle(URL source, InputStream in)
-                       throws IOException;
-
-       /**
-        * Return the story author.
-        * 
-        * @param source
-        *            the source of the story
-        * @param in
-        *            the input (the main resource)
-        * 
-        * @return the author
-        * 
-        * @throws IOException
-        *             in case of I/O error
-        */
-       protected abstract String getAuthor(URL source, InputStream in)
-                       throws IOException;
-
-       /**
-        * Return the story publication date.
-        * 
-        * @param source
-        *            the source of the story
-        * @param in
-        *            the input (the main resource)
-        * 
-        * @return the date
-        * 
-        * @throws IOException
-        *             in case of I/O error
-        */
-       protected abstract String getDate(URL source, InputStream in)
-                       throws IOException;
-
-       /**
-        * Return the subject of the story (for instance, if it is a fanfiction,
-        * what is the original work; if it is a technical text, what is the
-        * technical subject...).
-        * 
-        * @param source
-        *            the source of the story
-        * @param in
-        *            the input (the main resource)
-        * 
-        * @return the subject
-        * 
-        * @throws IOException
-        *             in case of I/O error
-        */
-       protected abstract String getSubject(URL source, InputStream in)
-                       throws IOException;
+       protected abstract MetaData getMeta() throws IOException;
  
         /**
          * Return the story description.
          * 
-        * @param source
-        *            the source of the story
-        * @param in
-        *            the input (the main resource)
-        * 
          * @return the description
          * 
          * @throws IOException
          *             in case of I/O error
          */
-       protected abstract String getDesc(URL source, InputStream in)
-                       throws IOException;
+       protected abstract String getDesc() throws IOException;
  
         /**
-        * Return the story cover resource if any, or NULL if none.
+        * Return the list of chapters (name and resource).
          * <p>
-        * The default cover should not be checked for here.
+        * Can be NULL if this {@link BasicSupport} do no use chapters.
          * 
-        * @param source
-        *            the source of the story
-        * @param in
-        *            the input (the main resource)
+        * @param pg
+        *            the optional progress reporter
          * 
-        * @return the cover or NULL
+        * @return the chapters or NULL
          * 
          * @throws IOException
          *             in case of I/O error
          */
-       protected abstract URL getCover(URL source, InputStream in)
+       protected abstract List<Entry<String, URL>> getChapters(Progress pg)
                         throws IOException;
  
-       /**
-        * Return the list of chapters (name and resource).
-        * 
-        * @param source
-        *            the source of the story
-        * @param in
-        *            the input (the main resource)
-        * 
-        * @return the chapters
-        * 
-        * @throws IOException
-        *             in case of I/O error
-        */
-       protected abstract List<Entry<String, URL>> getChapters(URL source,
-                       InputStream in) throws IOException;
-
         /**
          * Return the content of the chapter (possibly HTML encoded, if
          * {@link BasicSupport#isHtml()} is TRUE).
          * 
-        * @param source
-        *            the source of the story
-        * @param in
-        *            the input (the main resource)
+        * @param chapUrl
+        *            the chapter {@link URL}
          * @param number
          *            the chapter number
+        * @param pg
+        *            the optional progress reporter
          * 
          * @return the content
          * 
          * @throws IOException
          *             in case of I/O error
          */
-       protected abstract String getChapterContent(URL source, InputStream in,
-                       int number) throws IOException;
-
-       /**
-        * Check if this {@link BasicSupport} is mainly catered to image files.
-        * 
-        * @return TRUE if it is
-        */
-       public boolean isImageDocument(URL source, InputStream in)
-                       throws IOException {
-               return false;
-       }
+       protected abstract String getChapterContent(URL chapUrl, int number,
+                       Progress pg) throws IOException;
  
         /**
          * Return the list of cookies (values included) that must be used to
@@ -341,176 +129,43 @@ public abstract class BasicSupport {
         }
  
         /**
-        * Process the given story resource into a partially filled {@link Story}
-        * object containing the name and metadata, except for the description.
+        * OAuth authorisation (aka, "bearer XXXXXXX").
          * 
-        * @param url
-        *            the story resource
-        * 
-        * @return the {@link Story}
-        * 
-        * @throws IOException
-        *             in case of I/O error
+        * @return the OAuth string
          */
-       public Story processMeta(URL url) throws IOException {
-               return processMeta(url, true, false);
+       public String getOAuth() {
+               return null;
         }
  
         /**
-        * Process the given story resource into a partially filled {@link Story}
-        * object containing the name and metadata.
-        * 
-        * @param url
-        *            the story resource
+        * Return the canonical form of the main {@link URL}.
          * 
-        * @param close
-        *            close "this" and "in" when done
-        * 
-        * @return the {@link Story}
+        * @param source
+        *            the source {@link URL}, which can be NULL
          * 
-        * @throws IOException
-        *             in case of I/O error
+        * @return the canonical form of this {@link URL} or NULL if the source was
+        *         NULL
          */
-       protected Story processMeta(URL url, boolean close, boolean getDesc)
-                       throws IOException {
-               in = Instance.getCache().open(url, this, false);
-               if (in == null) {
-                       return null;
-               }
-
-               try {
-                       preprocess(getInput());
-
-                       Story story = new Story();
-                       story.setMeta(new MetaData());
-                       story.getMeta().setTitle(ifUnhtml(getTitle(url, getInput())));
-                       story.getMeta().setAuthor(
-                                       fixAuthor(ifUnhtml(getAuthor(url, getInput()))));
-                       story.getMeta().setDate(ifUnhtml(getDate(url, getInput())));
-                       story.getMeta().setTags(getTags(url, getInput()));
-                       story.getMeta().setSource(getSourceName());
-                       story.getMeta().setPublisher(
-                                       ifUnhtml(getPublisher(url, getInput())));
-                       story.getMeta().setUuid(getUuid(url, getInput()));
-                       story.getMeta().setLuid(getLuid(url, getInput()));
-                       story.getMeta().setLang(getLang(url, getInput()));
-                       story.getMeta().setSubject(ifUnhtml(getSubject(url, getInput())));
-                       story.getMeta().setImageDocument(isImageDocument(url, getInput()));
-
-                       if (getDesc) {
-                               String descChapterName = Instance.getTrans().getString(
-                                               StringId.DESCRIPTION);
-                               story.getMeta().setResume(
-                                               makeChapter(url, 0, descChapterName,
-                                                               getDesc(url, getInput())));
-                       }
-
-                       return story;
-               } finally {
-                       if (close) {
-                               try {
-                                       close();
-                               } catch (IOException e) {
-                                       Instance.syserr(e);
-                               }
-
-                               if (in != null) {
-                                       in.close();
-                               }
-                       }
-               }
+       protected URL getCanonicalUrl(URL source) {
+               return source;
         }
  
         /**
-        * Process the given story resource into a fully filled {@link Story}
-        * object.
+        * The main {@link Node} for this {@link Story}.
          * 
-        * @param url
-        *            the story resource
-        * 
-        * @return the {@link Story}
-        * 
-        * @throws IOException
-        *             in case of I/O error
+        * @return the node
          */
-       public Story process(URL url) throws IOException {
-               setCurrentReferer(url);
-
-               try {
-                       Story story = processMeta(url, false, true);
-                       if (story == null) {
-                               return null;
-                       }
-
-                       story.setChapters(new ArrayList<Chapter>());
-
-                       URL cover = getCover(url, getInput());
-                       if (cover == null) {
-                               String subject = story.getMeta() == null ? null : story
-                                               .getMeta().getSubject();
-                               if (subject != null && !subject.isEmpty()
-                                               && Instance.getCoverDir() != null) {
-                                       File fileCover = new File(Instance.getCoverDir(), subject);
-                                       cover = getImage(fileCover.toURI().toURL(), subject);
-                               }
-                       }
-
-                       if (cover != null) {
-                               InputStream coverIn = null;
-                               try {
-                                       coverIn = Instance.getCache().open(cover, this, true);
-                                       story.getMeta().setCover(StringUtils.toImage(coverIn));
-                               } catch (IOException e) {
-                                       Instance.syserr(new IOException(Instance.getTrans()
-                                                       .getString(StringId.ERR_BS_NO_COVER, cover), e));
-                               } finally {
-                                       if (coverIn != null)
-                                               coverIn.close();
-                               }
-                       }
-
-                       List<Entry<String, URL>> chapters = getChapters(url, getInput());
-                       int i = 1;
-                       if (chapters != null) {
-                               for (Entry<String, URL> chap : chapters) {
-                                       setCurrentReferer(chap.getValue());
-                                       InputStream chapIn = Instance.getCache().open(
-                                                       chap.getValue(), this, true);
-                                       try {
-                                               story.getChapters().add(
-                                                               makeChapter(url, i, chap.getKey(),
-                                                                               getChapterContent(url, chapIn, i)));
-                                       } finally {
-                                               chapIn.close();
-                                       }
-                                       i++;
-                               }
-                       }
-
-                       return story;
-
-               } finally {
-                       try {
-                               close();
-                       } catch (IOException e) {
-                               Instance.syserr(e);
-                       }
-
-                       if (in != null) {
-                               in.close();
-                       }
-
-                       currentReferer = null;
-               }
+       protected Element getSourceNode() {
+               return sourceNode;
         }
  
         /**
-        * The support type.$
+        * The main {@link URL} for this {@link Story}.
          * 
-        * @return the type
+        * @return the URL
          */
-       public SupportType getType() {
-               return type;
+       protected URL getSource() {
+               return source;
         }
  
         /**
@@ -537,705 +192,230 @@ public abstract class BasicSupport {
         /**
          * The support type.
          * 
-        * @param type
-        *            the new type
-        * 
-        * @return this
+        * @return the type
          */
-       protected BasicSupport setType(SupportType type) {
-               this.type = type;
-               return this;
+       public SupportType getType() {
+               return type;
         }
  
         /**
-        * Return the story publisher (by default,
-        * {@link BasicSupport#getSourceName()}).
-        * 
-        * @param source
-        *            the source of the story
-        * @param in
-        *            the input (the main resource)
-        * 
-        * @return the publisher
+        * The support type.
          * 
-        * @throws IOException
-        *             in case of I/O error
+        * @param type
+        *            the new type
          */
-       protected String getPublisher(URL source, InputStream in)
-                       throws IOException {
-               return getSourceName();
+       protected void setType(SupportType type) {
+               this.type = type;
         }
  
         /**
-        * Return the story UUID, a unique value representing the story (it is often
-        * an URL).
+        * Open an input link that will be used for the support.
          * <p>
-        * By default, this is the {@link URL} of the resource.
+        * Can return NULL, in which case you are supposed to work without a source
+        * node.
          * 
          * @param source
-        *            the source of the story
-        * @param in
-        *            the input (the main resource)
+        *            the source {@link URL}
          * 
-        * @return the uuid
+        * @return the {@link InputStream}
          * 
          * @throws IOException
          *             in case of I/O error
          */
-       protected String getUuid(URL source, InputStream in) throws IOException {
-               return source.toString();
+       protected Document loadDocument(URL source) throws IOException {
+               String url = getCanonicalUrl(source).toString();
+               return DataUtil.load(Instance.getInstance().getCache().open(source, this, false), "UTF-8", url.toString());
         }
  
         /**
-        * Return the story Library UID, a unique value representing the story (it
-        * is often a number) in the local library.
-        * <p>
-        * By default, this is empty.
-        * 
-        * @param source
-        *            the source of the story
-        * @param in
-        *            the input (the main resource)
-        * 
-        * @return the id
+        * Log into the support (can be a no-op depending upon the support).
          * 
          * @throws IOException
          *             in case of I/O error
          */
-       protected String getLuid(URL source, InputStream in) throws IOException {
-               return "";
+       protected void login() throws IOException {
         }
  
         /**
-        * Return the 2-letter language code of this story.
-        * <p>
-        * By default, this is 'EN'.
-        * 
-        * @param source
-        *            the source of the story
-        * @param in
-        *            the input (the main resource)
-        * 
-        * @return the language
-        * 
-        * @throws IOException
-        *             in case of I/O error
+        * Now that we have processed the {@link Story}, close the resources if any.
          */
-       protected String getLang(URL source, InputStream in) throws IOException {
-               return "EN";
+       protected void close() {
+               setCurrentReferer(null);
         }
  
         /**
-        * Return the list of tags for this story.
+        * Process the given story resource into a partially filled {@link Story}
+        * object containing the name and metadata.
          * 
-        * @param source
-        *            the source of the story
-        * @param in
-        *            the input (the main resource)
+        * @param getDesc
+        *            retrieve the description of the story, or not
+        * @param pg
+        *            the optional progress reporter
          * 
-        * @return the tags
+        * @return the {@link Story}, never NULL
          * 
          * @throws IOException
          *             in case of I/O error
          */
-       protected List<String> getTags(URL source, InputStream in)
+       protected Story processMeta(boolean getDesc, Progress pg)
                         throws IOException {
-               return new ArrayList<String>();
-       }
+               if (pg == null) {
+                       pg = new Progress();
+               } else {
+                       pg.setMinMax(0, 100);
+               }
  
-       /**
-        * Return the first line from the given input which correspond to the given
-        * selectors.
-        * <p>
-        * Do not reset the input, which will be pointing at the line just after the
-        * result (input will be spent if no result is found).
-        * 
-        * @param in
-        *            the input
-        * @param needle
-        *            a string that must be found inside the target line (also
-        *            supports "^" at start to say "only if it starts with" the
-        *            needle)
-        * @param relativeLine
-        *            the line to return based upon the target line position (-1 =
-        *            the line before, 0 = the target line...)
-        * 
-        * @return the line
-        */
-       protected String getLine(InputStream in, String needle, int relativeLine) {
-               return getLine(in, needle, relativeLine, true);
-       }
+               pg.setProgress(30);
  
-       /**
-        * Return a line from the given input which correspond to the given
-        * selectors.
-        * <p>
-        * Do not reset the input, which will be pointing at the line just after the
-        * result (input will be spent if no result is found) when first is TRUE,
-        * and will always be spent if first is FALSE.
-        * 
-        * @param in
-        *            the input
-        * @param needle
-        *            a string that must be found inside the target line (also
-        *            supports "^" at start to say "only if it starts with" the
-        *            needle)
-        * @param relativeLine
-        *            the line to return based upon the target line position (-1 =
-        *            the line before, 0 = the target line...)
-        * @param first
-        *            takes the first result (as opposed to the last one, which will
-        *            also always spend the input)
-        * 
-        * @return the line
-        */
-       protected String getLine(InputStream in, String needle, int relativeLine,
-                       boolean first) {
-               String rep = null;
+               Story story = new Story();
+               MetaData meta = getMeta();
+               if (meta.getCreationDate() == null || meta.getCreationDate().isEmpty()) {
+                       meta.setCreationDate(StringUtils.fromTime(new Date().getTime()));
+               }
+               story.setMeta(meta);
  
-               List<String> lines = new ArrayList<String>();
-               @SuppressWarnings("resource")
-               Scanner scan = new Scanner(in, "UTF-8");
-               int index = -1;
-               scan.useDelimiter("\\n");
-               while (scan.hasNext()) {
-                       lines.add(scan.next());
+               pg.setProgress(50);
  
-                       if (index == -1) {
-                               if (needle.startsWith("^")) {
-                                       if (lines.get(lines.size() - 1).startsWith(
-                                                       needle.substring(1))) {
-                                               index = lines.size() - 1;
-                                       }
+               if (meta.getCover() == null) {
+                       meta.setCover(bsHelper.getDefaultCover(meta.getSubject()));
+               }
  
-                               } else {
-                                       if (lines.get(lines.size() - 1).contains(needle)) {
-                                               index = lines.size() - 1;
-                                       }
-                               }
-                       }
+               pg.setProgress(60);
  
-                       if (index >= 0 && index + relativeLine < lines.size()) {
-                               rep = lines.get(index + relativeLine);
-                               if (first) {
-                                       break;
-                               }
-                       }
+               if (getDesc) {
+                       String descChapterName = Instance.getInstance().getTrans().getString(StringId.DESCRIPTION);
+                       story.getMeta().setResume(bsPara.makeChapter(this, source, 0, descChapterName, //
+                                       getDesc(), isHtml(), null));
                 }
  
-               return rep;
-       }
-
-       /**
-        * Prepare the support if needed before processing.
-        * 
-        * @throws IOException
-        *             on I/O error
-        */
-       protected void preprocess(InputStream in) throws IOException {
+               pg.done();
+               return story;
         }
  
         /**
-        * Now that we have processed the {@link Story}, close the resources if any.
+        * Process the given story resource into a fully filled {@link Story}
+        * object.
          * 
-        * @throws IOException
-        *             on I/O error
-        */
-       protected void close() throws IOException {
-       }
-
-       /**
-        * Create a {@link Chapter} object from the given information, formatting
-        * the content as it should be.
+        * @param pg
+        *            the optional progress reporter
          * 
-        * @param number
-        *            the chapter number
-        * @param name
-        *            the chapter name
-        * @param content
-        *            the chapter content
-        * 
-        * @return the {@link Chapter}
+        * @return the {@link Story}, never NULL
          * 
          * @throws IOException
          *             in case of I/O error
          */
-       protected Chapter makeChapter(URL source, int number, String name,
-                       String content) throws IOException {
-
-               // Chapter name: process it correctly, then remove the possible
-               // redundant "Chapter x: " in front of it
-               String chapterName = processPara(name).getContent().trim();
-               for (String lang : Instance.getConfig().getString(Config.CHAPTER)
-                               .split(",")) {
-                       String chapterWord = Instance.getConfig().getStringX(
-                                       Config.CHAPTER, lang);
-                       if (chapterName.startsWith(chapterWord)) {
-                               chapterName = chapterName.substring(chapterWord.length())
-                                               .trim();
-                               break;
-                       }
-               }
+       // TODO: ADD final when BasicSupport_Deprecated is gone
+       public Story process(Progress pg) throws IOException {
+               setCurrentReferer(source);
+               login();
+               sourceNode = loadDocument(source);
  
-               if (chapterName.startsWith(Integer.toString(number))) {
-                       chapterName = chapterName.substring(
-                                       Integer.toString(number).length()).trim();
-               }
-
-               if (chapterName.startsWith(":")) {
-                       chapterName = chapterName.substring(1).trim();
-               }
-               //
-
-               Chapter chap = new Chapter(number, chapterName);
-
-               if (content == null) {
-                       return chap;
-               }
-
-               if (isHtml()) {
-                       // Special <HR> processing:
-                       content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
-                                       "\n* * *\n");
-               }
-
-               InputStream in = new ByteArrayInputStream(
-                               content.getBytes(StandardCharsets.UTF_8));
                 try {
-                       @SuppressWarnings("resource")
-                       Scanner scan = new Scanner(in, "UTF-8");
-                       scan.useDelimiter("(\\n|</p>)"); // \n for test, </p> for html
-
-                       List<Paragraph> paras = new ArrayList<Paragraph>();
-                       while (scan.hasNext()) {
-                               String line = scan.next().trim();
-                               boolean image = false;
-                               if (line.startsWith("[") && line.endsWith("]")) {
-                                       URL url = getImage(source,
-                                                       line.substring(1, line.length() - 1).trim());
-                                       if (url != null) {
-                                               paras.add(new Paragraph(url));
-                                               image = true;
-                                       }
-                               }
-
-                               if (!image) {
-                                       paras.add(processPara(line));
-                               }
-                       }
-
-                       // Check quotes for "bad" format
-                       List<Paragraph> newParas = new ArrayList<Paragraph>();
-                       for (Paragraph para : paras) {
-                               newParas.addAll(requotify(para));
-                       }
-                       paras = newParas;
-
-                       // Remove double blanks/brks
-                       boolean space = false;
-                       boolean brk = true;
-                       for (int i = 0; i < paras.size(); i++) {
-                               Paragraph para = paras.get(i);
-                               boolean thisSpace = para.getType() == ParagraphType.BLANK;
-                               boolean thisBrk = para.getType() == ParagraphType.BREAK;
-
-                               if (space && thisBrk) {
-                                       paras.remove(i - 1);
-                                       i--;
-                               } else if ((space || brk) && (thisSpace || thisBrk)) {
-                                       paras.remove(i);
-                                       i--;
-                               }
-
-                               space = thisSpace;
-                               brk = thisBrk;
-                       }
-
-                       // Remove blank/brk at start
-                       if (paras.size() > 0
-                                       && (paras.get(0).getType() == ParagraphType.BLANK || paras
-                                                       .get(0).getType() == ParagraphType.BREAK)) {
-                               paras.remove(0);
-                       }
-
-                       // Remove blank/brk at end
-                       int last = paras.size() - 1;
-                       if (paras.size() > 0
-                                       && (paras.get(last).getType() == ParagraphType.BLANK || paras
-                                                       .get(last).getType() == ParagraphType.BREAK)) {
-                               paras.remove(last);
-                       }
-
-                       chap.setParagraphs(paras);
-
-                       return chap;
+                       return doProcess(pg);
                 } finally {
-                       in.close();
-               }
-       }
-
-       /**
-        * Return the list of supported image extensions.
-        * 
-        * @return the extensions
-        */
-       protected String[] getImageExt(boolean emptyAllowed) {
-               if (emptyAllowed) {
-                       return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
-               } else {
-                       return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
+                       close();
                 }
         }
  
         /**
-        * Check if the given resource can be a local image or a remote image, then
-        * refresh the cache with it if it is.
+        * Actual processing step, without the calls to other methods.
+        * <p>
+        * Will convert the story resource into a fully filled {@link Story} object.
          * 
-        * @param source
-        *            the story source
-        * @param line
-        *            the resource to check
+        * @param pg
+        *            the optional progress reporter
          * 
-        * @return the image URL if found, or NULL
-        * 
-        */
-       protected URL getImage(URL source, String line) {
-               String path = new File(source.getFile()).getParent();
-               URL url = null;
-
-               // try for files
-               try {
-                       String urlBase = new File(new File(path), line.trim()).toURI()
-                                       .toURL().toString();
-                       for (String ext : getImageExt(true)) {
-                               if (new File(urlBase + ext).exists()) {
-                                       url = new File(urlBase + ext).toURI().toURL();
-                               }
-                       }
-               } catch (Exception e) {
-                       // Nothing to do here
-               }
-
-               if (url == null) {
-                       // try for URLs
-                       try {
-                               for (String ext : getImageExt(true)) {
-                                       if (Instance.getCache().check(new URL(line + ext))) {
-                                               url = new URL(line + ext);
-                                       }
-                               }
-
-                               // try out of cache
-                               if (url == null) {
-                                       for (String ext : getImageExt(true)) {
-                                               try {
-                                                       url = new URL(line + ext);
-                                                       Instance.getCache().refresh(url, this, true);
-                                                       break;
-                                               } catch (IOException e) {
-                                                       // no image with this ext
-                                                       url = null;
-                                               }
-                                       }
-                               }
-                       } catch (MalformedURLException e) {
-                               // Not an url
-                       }
-               }
-
-               // refresh the cached file
-               if (url != null) {
-                       try {
-                               Instance.getCache().refresh(url, this, true);
-                       } catch (IOException e) {
-                               // woops, broken image
-                               url = null;
-                       }
-               }
-
-               return url;
-       }
-
-       /**
-        * Reset then return {@link BasicSupport#in}.
-        * 
-        * @return {@link BasicSupport#in}
+        * @return the {@link Story}, never NULL
          * 
          * @throws IOException
          *             in case of I/O error
          */
-       protected InputStream getInput() throws IOException {
-               in.reset();
-               return in;
-       }
-
-       /**
-        * Fix the author name if it is prefixed with some "by" {@link String}.
-        * 
-        * @param author
-        *            the author with a possible prefix
-        * 
-        * @return the author without prefixes
-        */
-       private String fixAuthor(String author) {
-               if (author != null) {
-                       for (String suffix : new String[] { " ", ":" }) {
-                               for (String byString : Instance.getConfig()
-                                               .getString(Config.BYS).split(",")) {
-                                       byString += suffix;
-                                       if (author.toUpperCase().startsWith(byString.toUpperCase())) {
-                                               author = author.substring(byString.length()).trim();
-                                       }
-                               }
-                       }
-
-                       // Special case (without suffix):
-                       if (author.startsWith("©")) {
-                               author = author.substring(1);
-                       }
-               }
-
-               return author;
-       }
-
-       /**
-        * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
-        * and requotify them (i.e., separate them into QUOTE paragraphs and other
-        * paragraphs (quotes or not)).
-        * 
-        * @param para
-        *            the paragraph to requotify (not necessaraly a quote)
-        * 
-        * @return the correctly (or so we hope) quotified paragraphs
-        */
-       private List<Paragraph> requotify(Paragraph para) {
-               List<Paragraph> newParas = new ArrayList<Paragraph>();
-
-               if (para.getType() == ParagraphType.QUOTE) {
-                       String line = para.getContent();
-                       boolean singleQ = line.startsWith("" + openQuote);
-                       boolean doubleQ = line.startsWith("" + openDoubleQuote);
-
-                       if (!singleQ && !doubleQ) {
-                               line = openDoubleQuote + line + closeDoubleQuote;
-                               newParas.add(new Paragraph(ParagraphType.QUOTE, line));
-                       } else {
-                               char close = singleQ ? closeQuote : closeDoubleQuote;
-                               int posClose = line.indexOf(close);
-                               int posDot = line.indexOf(".");
-                               while (posDot >= 0 && posDot < posClose) {
-                                       posDot = line.indexOf(".", posDot + 1);
-                               }
-
-                               if (posDot >= 0) {
-                                       String rest = line.substring(posDot + 1).trim();
-                                       line = line.substring(0, posDot + 1).trim();
-                                       newParas.add(new Paragraph(ParagraphType.QUOTE, line));
-                                       newParas.addAll(requotify(processPara(rest)));
-                               } else {
-                                       newParas.add(para);
-                               }
-                       }
+       protected Story doProcess(Progress pg) throws IOException {
+               if (pg == null) {
+                       pg = new Progress();
                 } else {
-                       newParas.add(para);
+                       pg.setMinMax(0, 100);
                 }
  
-               return newParas;
-       }
-
-       /**
-        * Process a {@link Paragraph} from a raw line of text.
-        * <p>
-        * Will also fix quotes and HTML encoding if needed.
-        * 
-        * @param line
-        *            the raw line
-        * 
-        * @return the processed {@link Paragraph}
-        */
-       private Paragraph processPara(String line) {
-               line = ifUnhtml(line).trim();
+               pg.setProgress(1);
+               Progress pgMeta = new Progress();
+               pg.addProgress(pgMeta, 10);
+               Story story = processMeta(true, pgMeta);
+               pgMeta.done(); // 10%
  
-               boolean space = true;
-               boolean brk = true;
-               boolean quote = false;
-               boolean tentativeCloseQuote = false;
-               char prev = '\0';
-               int dashCount = 0;
+               pg.setName("Retrieving " + story.getMeta().getTitle());
  
-               StringBuilder builder = new StringBuilder();
-               for (char car : line.toCharArray()) {
-                       if (car != '-') {
-                               if (dashCount > 0) {
-                                       // dash, ndash and mdash: - – —
-                                       // currently: always use mdash
-                                       builder.append(dashCount == 1 ? '-' : '—');
-                               }
-                               dashCount = 0;
-                       }
+               Progress pgGetChapters = new Progress();
+               pg.addProgress(pgGetChapters, 10);
+               story.setChapters(new ArrayList<Chapter>());
+               List<Entry<String, URL>> chapters = getChapters(pgGetChapters);
+               pgGetChapters.done(); // 20%
  
-                       if (tentativeCloseQuote) {
-                               tentativeCloseQuote = false;
-                               if ((car >= 'a' && car <= 'z') || (car >= 'A' && car <= 'Z')
-                                               || (car >= '0' && car <= '9')) {
-                                       builder.append("'");
-                               } else {
-                                       builder.append(closeQuote);
-                               }
-                       }
+               if (chapters != null) {
+                       Progress pgChaps = new Progress("Extracting chapters", 0,
+                                       chapters.size() * 300);
+                       pg.addProgress(pgChaps, 80);
  
-                       switch (car) {
-                       case ' ': // note: unbreakable space
-                       case ' ':
-                       case '\t':
-                       case '\n': // just in case
-                       case '\r': // just in case
-                               builder.append(' ');
-                               break;
-
-                       case '\'':
-                               if (space || (brk && quote)) {
-                                       quote = true;
-                                       builder.append(openQuote);
-                               } else if (prev == ' ') {
-                                       builder.append(openQuote);
-                               } else {
-                                       // it is a quote ("I'm off") or a 'quote' ("This
-                                       // 'good' restaurant"...)
-                                       tentativeCloseQuote = true;
-                               }
-                               break;
-
-                       case '"':
-                               if (space || (brk && quote)) {
-                                       quote = true;
-                                       builder.append(openDoubleQuote);
-                               } else if (prev == ' ') {
-                                       builder.append(openDoubleQuote);
-                               } else {
-                                       builder.append(closeDoubleQuote);
-                               }
-                               break;
-
-                       case '-':
-                               if (space) {
-                                       quote = true;
-                               } else {
-                                       dashCount++;
-                               }
-                               space = false;
-                               break;
-
-                       case '*':
-                       case '~':
-                       case '/':
-                       case '\\':
-                       case '<':
-                       case '>':
-                       case '=':
-                       case '+':
-                       case '_':
-                       case '–':
-                       case '—':
-                               space = false;
-                               builder.append(car);
-                               break;
-
-                       case '‘':
-                       case '`':
-                       case '‹':
-                       case '﹁':
-                       case '〈':
-                       case '「':
-                               if (space || (brk && quote)) {
-                                       quote = true;
-                                       builder.append(openQuote);
-                               } else {
-                                       builder.append(openQuote);
+                       long words = 0;
+                       int i = 1;
+                       for (Entry<String, URL> chap : chapters) {
+                               pgChaps.setName("Extracting chapter " + i);
+                               URL chapUrl = chap.getValue();
+                               String chapName = chap.getKey();
+                               if (chapUrl != null) {
+                                       setCurrentReferer(chapUrl);
                                 }
-                               space = false;
-                               brk = false;
-                               break;
  
-                       case '’':
-                       case '›':
-                       case '﹂':
-                       case '〉':
-                       case '」':
-                               space = false;
-                               brk = false;
-                               builder.append(closeQuote);
-                               break;
+                               pgChaps.setProgress(i * 100);
+                               Progress pgGetChapterContent = new Progress();
+                               Progress pgMakeChapter = new Progress();
+                               pgChaps.addProgress(pgGetChapterContent, 100);
+                               pgChaps.addProgress(pgMakeChapter, 100);
  
-                       case '«':
-                       case '“':
-                       case '﹃':
-                       case '《':
-                       case '『':
-                               if (space || (brk && quote)) {
-                                       quote = true;
-                                       builder.append(openDoubleQuote);
-                               } else {
-                                       builder.append(openDoubleQuote);
-                               }
-                               space = false;
-                               brk = false;
-                               break;
+                               String content = getChapterContent(chapUrl, i,
+                                               pgGetChapterContent);
+                               pgGetChapterContent.done();
+                               Chapter cc = bsPara.makeChapter(this, chapUrl, i,
+                                               chapName, content, isHtml(), pgMakeChapter);
+                               pgMakeChapter.done();
  
-                       case '»':
-                       case '”':
-                       case '﹄':
-                       case '》':
-                       case '』':
-                               space = false;
-                               brk = false;
-                               builder.append(closeDoubleQuote);
-                               break;
+                               words += cc.getWords();
+                               story.getChapters().add(cc);
+                               story.getMeta().setWords(words);
  
-                       default:
-                               space = false;
-                               brk = false;
-                               builder.append(car);
-                               break;
+                               i++;
                         }
  
-                       prev = car;
+                       pgChaps.setName("Extracting chapters");
+                       pgChaps.done();
                 }
  
-               if (tentativeCloseQuote) {
-                       tentativeCloseQuote = false;
-                       builder.append(closeQuote);
-               }
+               pg.done();
  
-               line = builder.toString().trim();
-
-               ParagraphType type = ParagraphType.NORMAL;
-               if (space) {
-                       type = ParagraphType.BLANK;
-               } else if (brk) {
-                       type = ParagraphType.BREAK;
-               } else if (quote) {
-                       type = ParagraphType.QUOTE;
-               }
-
-               return new Paragraph(type, line);
+               return story;
         }
  
         /**
-        * Remove the HTML from the inpit <b>if</b> {@link BasicSupport#isHtml()} is
-        * true.
+        * Create a chapter from the given data.
          * 
-        * @param input
-        *            the input
+        * @param source
+        *            the source URL for this content, which can be used to try and
+        *            find images if images are present in the format [image-url]
+        * @param number
+        *            the chapter number (0 = description)
+        * @param name
+        *            the chapter name
+        * @param content
+        *            the content of the chapter
+        * @return the {@link Chapter}
          * 
-        * @return the no html version if needed
+        * @throws IOException
+        *             in case of I/O error
          */
-       private String ifUnhtml(String input) {
-               if (isHtml() && input != null) {
-                       return StringUtils.unhtml(input);
-               }
-
-               return input;
+       public Chapter makeChapter(URL source, int number, String name,
+                       String content) throws IOException {
+               return bsPara.makeChapter(this, source, number, name,
+                               content, isHtml(), null);
         }
  
         /**
@@ -1255,16 +435,16 @@ public abstract class BasicSupport {
                 // TEXT and INFO_TEXT always support files (not URLs though)
                 for (SupportType type : SupportType.values()) {
                         if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
-                               BasicSupport support = getSupport(type);
+                               BasicSupport support = getSupport(type, url);
                                 if (support != null && support.supports(url)) {
                                         return support;
                                 }
                         }
                 }
  
-               for (SupportType type : new SupportType[] { SupportType.TEXT,
-                               SupportType.INFO_TEXT }) {
-                       BasicSupport support = getSupport(type);
+               for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
+                               SupportType.TEXT }) {
+                       BasicSupport support = getSupport(type, url);
                         if (support != null && support.supports(url)) {
                                 return support;
                         }
@@ -1277,30 +457,66 @@ public abstract class BasicSupport {
          * Return a {@link BasicSupport} implementation supporting the given type.
          * 
          * @param type
-        *            the type
+        *            the type, must not be NULL
+        * @param url
+        *            the {@link URL} to support (can be NULL to get an
+        *            "abstract support"; if not NULL, will be used as the source
+        *            URL)
          * 
          * @return an implementation that supports it, or NULL
          */
-       public static BasicSupport getSupport(SupportType type) {
+       public static BasicSupport getSupport(SupportType type, URL url) {
+               BasicSupport support = null;
+
                 switch (type) {
                 case EPUB:
-                       return new Epub().setType(type);
+                       support = new Epub();
+                       break;
                 case INFO_TEXT:
-                       return new InfoText().setType(type);
+                       support = new InfoText();
+                       break;
                 case FIMFICTION:
-                       return new Fimfiction().setType(type);
+                       try {
+                               // Can fail if no client key or NO in options
+                               support = new FimfictionApi();
+                       } catch (IOException e) {
+                               support = new Fimfiction();
+                       }
+                       break;
                 case FANFICTION:
-                       return new Fanfiction().setType(type);
+                       support = new Fanfiction();
+                       break;
                 case TEXT:
-                       return new Text().setType(type);
-               case MANGAFOX:
-                       return new MangaFox().setType(type);
+                       support = new Text();
+                       break;
+               case MANGAHUB:
+                       support = new MangaHub();
+                       break;
                 case E621:
-                       return new E621().setType(type);
+                       support = new E621();
+                       break;
+               case YIFFSTAR:
+                       support = new YiffStar();
+                       break;
+               case E_HENTAI:
+                       support = new EHentai();
+                       break;
+               case MANGA_LEL:
+                       support = new MangaLel();
+                       break;
                 case CBZ:
-                       return new Cbz().setType(type);
+                       support = new Cbz();
+                       break;
+               case HTML:
+                       support = new Html();
+                       break;
                 }
  
-               return null;
+               if (support != null) {
+                       support.setType(type);
+                       support.source = support.getCanonicalUrl(url);
+               }
+
+               return support;
         }
  }