(Changelog update)

[nikiroo-utils.git] / src / be / nikiroo / fanfix / supported / BasicSupport.java
diff --git a/src/be/nikiroo/fanfix/supported/BasicSupport.java b/src/be/nikiroo/fanfix/supported/BasicSupport.java

index 93155961eaea1ff36957553e102d420181a93b7c..2b4715ab0beec0af04f5d52a70f88f55a61193fd 100644 (file)
--- a/src/be/nikiroo/fanfix/supported/BasicSupport.java
+++ b/src/be/nikiroo/fanfix/supported/BasicSupport.java
@@ -1,10 +1,12 @@
  package be.nikiroo.fanfix.supported;
  
  import java.awt.image.BufferedImage;
+import java.io.BufferedReader;
  import java.io.ByteArrayInputStream;
  import java.io.File;
  import java.io.IOException;
  import java.io.InputStream;
+import java.io.InputStreamReader;
  import java.net.MalformedURLException;
  import java.net.URL;
  import java.util.ArrayList;
@@ -23,6 +25,7 @@ import be.nikiroo.fanfix.data.Paragraph;
  import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
  import be.nikiroo.fanfix.data.Story;
  import be.nikiroo.utils.IOUtils;
+import be.nikiroo.utils.Progress;
  import be.nikiroo.utils.StringUtils;
  
  /**
@@ -57,7 +60,9 @@ public abstract class BasicSupport {
                 /** Furry website with comics support */
                 E621,
                 /** CBZ files */
-               CBZ;
+               CBZ,
+               /** HTML files */
+               HTML;
  
                 /**
                  * A description of this support type (more information than the
@@ -145,7 +150,7 @@ public abstract class BasicSupport {
  
         private InputStream in;
         private SupportType type;
-       private URL currentReferer; // with on 'r', as in 'HTTP'...
+       private URL currentReferer; // with only one 'r', as in 'HTTP'...
  
         // quote chars
         private char openQuote = Instance.getTrans().getChar(
@@ -282,7 +287,7 @@ public abstract class BasicSupport {
          */
         protected Story processMeta(URL url, boolean close, boolean getDesc)
                         throws IOException {
-               in = Instance.getCache().open(url, this, false);
+               in = openInput(url);
                 if (in == null) {
                         return null;
                 }
@@ -328,26 +333,42 @@ public abstract class BasicSupport {
          * 
          * @param url
          *            the story resource
+        * @param pg
+        *            the optional progress reporter
          * 
          * @return the {@link Story}
          * 
          * @throws IOException
          *             in case of I/O error
          */
-       public Story process(URL url) throws IOException {
+       public Story process(URL url, Progress pg) throws IOException {
+               if (pg == null) {
+                       pg = new Progress();
+               } else {
+                       pg.setMinMax(0, 100);
+               }
+
                 setCurrentReferer(url);
  
+               pg.setProgress(1);
                 try {
                         Story story = processMeta(url, false, true);
+                       pg.setProgress(10);
                         if (story == null) {
+                               pg.setProgress(100);
                                 return null;
                         }
  
                         story.setChapters(new ArrayList<Chapter>());
  
                         List<Entry<String, URL>> chapters = getChapters(url, getInput());
+                       pg.setProgress(20);
+
                         int i = 1;
                         if (chapters != null) {
+                               Progress pgChaps = new Progress(0, chapters.size());
+                               pg.addProgress(pgChaps, 80);
+
                                 for (Entry<String, URL> chap : chapters) {
                                         setCurrentReferer(chap.getValue());
                                         InputStream chapIn = Instance.getCache().open(
@@ -360,8 +381,10 @@ public abstract class BasicSupport {
                                                 chapIn.close();
                                         }
  
-                                       i++;
+                                       pgChaps.setProgress(i++);
                                 }
+                       } else {
+                               pg.setProgress(100);
                         }
  
                         return story;
@@ -491,90 +514,137 @@ public abstract class BasicSupport {
  
                 Chapter chap = new Chapter(number, chapterName);
  
-               if (content == null) {
-                       return chap;
+               if (content != null) {
+                       chap.setParagraphs(makeParagraphs(source, content));
                 }
  
+               return chap;
+
+       }
+
+       /**
+        * Convert the given content into {@link Paragraph}s.
+        * 
+        * @param source
+        *            the source URL of the story
+        * @param content
+        *            the textual content
+        * 
+        * @return the {@link Paragraph}s
+        * 
+        * @throws IOException
+        *             in case of I/O error
+        */
+       protected List<Paragraph> makeParagraphs(URL source, String content)
+                       throws IOException {
                 if (isHtml()) {
                         // Special <HR> processing:
                         content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
                                         "\n* * *\n");
                 }
  
+               List<Paragraph> paras = new ArrayList<Paragraph>();
                 InputStream in = new ByteArrayInputStream(content.getBytes("UTF-8"));
                 try {
-                       @SuppressWarnings("resource")
-                       Scanner scan = new Scanner(in, "UTF-8");
-                       scan.useDelimiter("(\\n|</p>)"); // \n for test, </p> for html
-
-                       List<Paragraph> paras = new ArrayList<Paragraph>();
-                       while (scan.hasNext()) {
-                               String line = scan.next().trim();
-                               boolean image = false;
-                               if (line.startsWith("[") && line.endsWith("]")) {
-                                       URL url = getImageUrl(this, source,
-                                                       line.substring(1, line.length() - 1).trim());
-                                       if (url != null) {
-                                               paras.add(new Paragraph(url));
-                                               image = true;
-                                       }
+                       BufferedReader buff = new BufferedReader(new InputStreamReader(in,
+                                       "UTF-8"));
+
+                       for (String encodedLine = buff.readLine(); encodedLine != null; encodedLine = buff
+                                       .readLine()) {
+                               String lines[];
+                               if (isHtml()) {
+                                       lines = encodedLine.split("(<p>|</p>|<br>|<br/>|\\n)");
+                               } else {
+                                       lines = new String[] { encodedLine };
                                 }
  
-                               if (!image) {
-                                       paras.add(processPara(line));
+                               for (String aline : lines) {
+                                       String line = aline.trim();
+
+                                       URL image = null;
+                                       if (line.startsWith("[") && line.endsWith("]")) {
+                                               image = getImageUrl(this, source,
+                                                               line.substring(1, line.length() - 1).trim());
+                                       }
+
+                                       if (image != null) {
+                                               paras.add(new Paragraph(image));
+                                       } else {
+                                               paras.add(processPara(line));
+                                       }
                                 }
                         }
+               } finally {
+                       in.close();
+               }
  
-                       // Check quotes for "bad" format
-                       List<Paragraph> newParas = new ArrayList<Paragraph>();
-                       for (Paragraph para : paras) {
-                               newParas.addAll(requotify(para));
-                       }
-                       paras = newParas;
-
-                       // Remove double blanks/brks
-                       boolean space = false;
-                       boolean brk = true;
-                       for (int i = 0; i < paras.size(); i++) {
-                               Paragraph para = paras.get(i);
-                               boolean thisSpace = para.getType() == ParagraphType.BLANK;
-                               boolean thisBrk = para.getType() == ParagraphType.BREAK;
-
-                               if (space && thisBrk) {
-                                       paras.remove(i - 1);
-                                       i--;
-                               } else if ((space || brk) && (thisSpace || thisBrk)) {
-                                       paras.remove(i);
-                                       i--;
-                               }
+               // Check quotes for "bad" format
+               List<Paragraph> newParas = new ArrayList<Paragraph>();
+               for (Paragraph para : paras) {
+                       newParas.addAll(requotify(para));
+               }
+               paras = newParas;
  
-                               space = thisSpace;
-                               brk = thisBrk;
-                       }
+               // Remove double blanks/brks
+               fixBlanksBreaks(paras);
  
-                       // Remove blank/brk at start
-                       if (paras.size() > 0
-                                       && (paras.get(0).getType() == ParagraphType.BLANK || paras
-                                                       .get(0).getType() == ParagraphType.BREAK)) {
-                               paras.remove(0);
-                       }
+               return paras;
+       }
  
-                       // Remove blank/brk at end
-                       int last = paras.size() - 1;
-                       if (paras.size() > 0
-                                       && (paras.get(last).getType() == ParagraphType.BLANK || paras
-                                                       .get(last).getType() == ParagraphType.BREAK)) {
-                               paras.remove(last);
+       /**
+        * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
+        * those {@link Paragraph}s.
+        * <p>
+        * The resulting list will not contain a starting or trailing blank/break
+        * nor 2 blanks or breaks following each other.
+        * 
+        * @param paras
+        *            the list of {@link Paragraph}s to fix
+        */
+       protected void fixBlanksBreaks(List<Paragraph> paras) {
+               boolean space = false;
+               boolean brk = true;
+               for (int i = 0; i < paras.size(); i++) {
+                       Paragraph para = paras.get(i);
+                       boolean thisSpace = para.getType() == ParagraphType.BLANK;
+                       boolean thisBrk = para.getType() == ParagraphType.BREAK;
+
+                       if (i > 0 && space && thisBrk) {
+                               paras.remove(i - 1);
+                               i--;
+                       } else if ((space || brk) && (thisSpace || thisBrk)) {
+                               paras.remove(i);
+                               i--;
                         }
  
-                       chap.setParagraphs(paras);
+                       space = thisSpace;
+                       brk = thisBrk;
+               }
  
-                       return chap;
-               } finally {
-                       in.close();
+               // Remove blank/brk at start
+               if (paras.size() > 0
+                               && (paras.get(0).getType() == ParagraphType.BLANK || paras.get(
+                                               0).getType() == ParagraphType.BREAK)) {
+                       paras.remove(0);
+               }
+
+               // Remove blank/brk at end
+               int last = paras.size() - 1;
+               if (paras.size() > 0
+                               && (paras.get(last).getType() == ParagraphType.BLANK || paras
+                                               .get(last).getType() == ParagraphType.BREAK)) {
+                       paras.remove(last);
                 }
         }
  
+       /**
+        * Get the default cover related to this subject (see <tt>.info</tt> files).
+        * 
+        * @param subject
+        *            the subject
+        * 
+        * @return the cover if any, or NULL
+        */
         static BufferedImage getDefaultCover(String subject) {
                 if (subject != null && !subject.isEmpty()
                                 && Instance.getCoverDir() != null) {
@@ -697,6 +767,21 @@ public abstract class BasicSupport {
                 return url;
         }
  
+       /**
+        * Open the input file that will be used through the support.
+        * 
+        * @param source
+        *            the source {@link URL}
+        * 
+        * @return the {@link InputStream}
+        * 
+        * @throws IOException
+        *             in case of I/O error
+        */
+       protected InputStream openInput(URL source) throws IOException {
+               return Instance.getCache().open(source, this, false);
+       }
+
         protected InputStream reset(InputStream in) {
                 try {
                         in.reset();
@@ -753,7 +838,7 @@ public abstract class BasicSupport {
          * 
          * @return the correctly (or so we hope) quotified paragraphs
          */
-       private List<Paragraph> requotify(Paragraph para) {
+       protected List<Paragraph> requotify(Paragraph para) {
                 List<Paragraph> newParas = new ArrayList<Paragraph>();
  
                 if (para.getType() == ParagraphType.QUOTE
@@ -828,7 +913,7 @@ public abstract class BasicSupport {
          * 
          * @return the processed {@link Paragraph}
          */
-       private Paragraph processPara(String line) {
+       protected Paragraph processPara(String line) {
                 line = ifUnhtml(line).trim();
  
                 boolean space = true;
@@ -851,11 +936,16 @@ public abstract class BasicSupport {
  
                         if (tentativeCloseQuote) {
                                 tentativeCloseQuote = false;
-                               if ((car >= 'a' && car <= 'z') || (car >= 'A' && car <= 'Z')
-                                               || (car >= '0' && car <= '9')) {
+                               if (Character.isLetterOrDigit(car)) {
                                         builder.append("'");
                                 } else {
-                                       builder.append(closeQuote);
+                                       // handle double-single quotes as double quotes
+                                       if (prev == car) {
+                                               builder.append(closeDoubleQuote);
+                                               continue;
+                                       } else {
+                                               builder.append(closeQuote);
+                                       }
                                 }
                         }
  
@@ -871,9 +961,21 @@ public abstract class BasicSupport {
                         case '\'':
                                 if (space || (brk && quote)) {
                                         quote = true;
-                                       builder.append(openQuote);
-                               } else if (prev == ' ') {
-                                       builder.append(openQuote);
+                                       // handle double-single quotes as double quotes
+                                       if (prev == car) {
+                                               builder.deleteCharAt(builder.length() - 1);
+                                               builder.append(openDoubleQuote);
+                                       } else {
+                                               builder.append(openQuote);
+                                       }
+                               } else if (prev == ' ' || prev == car) {
+                                       // handle double-single quotes as double quotes
+                                       if (prev == car) {
+                                               builder.deleteCharAt(builder.length() - 1);
+                                               builder.append(openDoubleQuote);
+                                       } else {
+                                               builder.append(openQuote);
+                                       }
                                 } else {
                                         // it is a quote ("I'm off") or a 'quote' ("This
                                         // 'good' restaurant"...)
@@ -926,7 +1028,13 @@ public abstract class BasicSupport {
                                         quote = true;
                                         builder.append(openQuote);
                                 } else {
-                                       builder.append(openQuote);
+                                       // handle double-single quotes as double quotes
+                                       if (prev == car) {
+                                               builder.deleteCharAt(builder.length() - 1);
+                                               builder.append(openDoubleQuote);
+                                       } else {
+                                               builder.append(openQuote);
+                                       }
                                 }
                                 space = false;
                                 brk = false;
@@ -939,7 +1047,13 @@ public abstract class BasicSupport {
                         case '」':
                                 space = false;
                                 brk = false;
-                               builder.append(closeQuote);
+                               // handle double-single quotes as double quotes
+                               if (prev == car) {
+                                       builder.deleteCharAt(builder.length() - 1);
+                                       builder.append(closeDoubleQuote);
+                               } else {
+                                       builder.append(closeQuote);
+                               }
                                 break;
  
                         case '«':
@@ -1037,8 +1151,8 @@ public abstract class BasicSupport {
                         }
                 }
  
-               for (SupportType type : new SupportType[] { SupportType.TEXT,
-                               SupportType.INFO_TEXT }) {
+               for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
+                               SupportType.TEXT }) {
                         BasicSupport support = getSupport(type);
                         if (support != null && support.supports(url)) {
                                 return support;
@@ -1074,6 +1188,8 @@ public abstract class BasicSupport {
                         return new E621().setType(type);
                 case CBZ:
                         return new Cbz().setType(type);
+               case HTML:
+                       return new Html().setType(type);
                 }
  
                 return null;