fix/perf base64/serial
[nikiroo-utils.git] / src / be / nikiroo / utils / StringUtils.java
index 2c90d29d0e1bc45c1cb4dfadfb64d4cd7988ee21..ef3d84b921aea488f5ce17e5b8b52334cf24a375 100644 (file)
@@ -1,7 +1,8 @@
 package be.nikiroo.utils;
 
-import java.io.ByteArrayInputStream;
 import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
 import java.io.UnsupportedEncodingException;
 import java.security.MessageDigest;
 import java.security.NoSuchAlgorithmException;
@@ -14,7 +15,6 @@ import java.util.ArrayList;
 import java.util.Date;
 import java.util.List;
 import java.util.Map.Entry;
-import java.util.Scanner;
 import java.util.regex.Pattern;
 
 import org.unbescape.html.HtmlEscape;
@@ -512,14 +512,18 @@ public class StringUtils {
        /**
         * Zip the data and then encode it into Base64.
         * 
+        * @deprecated use {@link StringUtils#base64(byte[], boolean)} with the
+        *             correct parameter instead
+        * 
         * @param data
         *            the data
         * 
         * @return the Base64 zipped version
         */
+       @Deprecated
        public static String zip64(String data) {
                try {
-                       return Base64.encodeBytes(data.getBytes(), Base64.GZIP);
+                       return Base64.encodeBytes(data.getBytes("UTF-8"), Base64.GZIP);
                } catch (IOException e) {
                        e.printStackTrace();
                        return null;
@@ -529,6 +533,9 @@ public class StringUtils {
        /**
         * Unconvert from Base64 then unzip the content.
         * 
+        * @deprecated use {@link StringUtils#unbase64s(String, boolean)} with the
+        *             correct parameter instead
+        * 
         * @param data
         *            the data in Base64 format
         * 
@@ -537,17 +544,255 @@ public class StringUtils {
         * @throws IOException
         *             in case of I/O error
         */
+       @Deprecated
        public static String unzip64(String data) throws IOException {
-               ByteArrayInputStream in = new ByteArrayInputStream(Base64.decode(data,
-                               Base64.GZIP));
+               return new String(Base64.decode(data, Base64.GZIP), "UTF-8");
+       }
 
-               Scanner scan = new Scanner(in);
-               scan.useDelimiter("\\A");
-               try {
-                       return scan.next();
-               } finally {
-                       scan.close();
+       /**
+        * Convert the given data to Base64 format.
+        * 
+        * @param data
+        *            the data to convert
+        * @param zip
+        *            TRUE to also compress the data in GZIP format; remember that
+        *            compressed and not-compressed content are different; you need
+        *            to know which is which when decoding
+        * 
+        * @return the Base64 {@link String} representation of the data
+        * 
+        * @throws IOException
+        *             in case of I/O errors
+        */
+       public static String base64(String data, boolean zip) throws IOException {
+               return base64(data.getBytes("UTF-8"), zip);
+       }
+
+       /**
+        * Convert the given data to Base64 format.
+        * 
+        * @param data
+        *            the data to convert
+        * @param zip
+        *            TRUE to also compress the data in GZIP format; remember that
+        *            compressed and not-compressed content are different; you need
+        *            to know which is which when decoding
+        * 
+        * @return the Base64 {@link String} representation of the data
+        * 
+        * @throws IOException
+        *             in case of I/O errors
+        */
+       public static String base64(byte[] data, boolean zip) throws IOException {
+               return Base64.encodeBytes(data, zip ? Base64.GZIP : Base64.NO_OPTIONS);
+       }
+
+       /**
+        * Convert the given data to Base64 format.
+        * 
+        * @param data
+        *            the data to convert
+        * @param zip
+        *            TRUE to also uncompress the data from a GZIP format; take care
+        *            about this flag, as it could easily cause errors in the
+        *            returned content or an {@link IOException}
+        * @param breakLines
+        *            TRUE to break lines on every 76th character
+        * 
+        * @return the Base64 {@link String} representation of the data
+        * 
+        * @throws IOException
+        *             in case of I/O errors
+        */
+       public static OutputStream base64(OutputStream data, boolean zip,
+                       boolean breakLines) throws IOException {
+               OutputStream out = new Base64.OutputStream(data,
+                               breakLines ? Base64.DO_BREAK_LINES & Base64.ENCODE
+                                               : Base64.ENCODE);
+
+               if (zip) {
+                       out = new java.util.zip.GZIPOutputStream(out);
+               }
+
+               return out;
+       }
+
+       /**
+        * Convert the given data to Base64 format.
+        * 
+        * @param data
+        *            the data to convert
+        * @param zip
+        *            TRUE to also uncompress the data from a GZIP format; take care
+        *            about this flag, as it could easily cause errors in the
+        *            returned content or an {@link IOException}
+        * @param breakLines
+        *            TRUE to break lines on every 76th character
+        * 
+        * @return the Base64 {@link String} representation of the data
+        * 
+        * @throws IOException
+        *             in case of I/O errors
+        */
+       public static InputStream base64(InputStream data, boolean zip,
+                       boolean breakLines) throws IOException {
+               if (zip) {
+                       data = new java.util.zip.GZIPInputStream(data);
+               }
+
+               return new Base64.InputStream(data, breakLines ? Base64.DO_BREAK_LINES
+                               & Base64.ENCODE : Base64.ENCODE);
+       }
+
+       /**
+        * Unconvert the given data from Base64 format back to a raw array of bytes.
+        * <p>
+        * Will automatically detect zipped data and also uncompress it before
+        * returning, unless ZIP is false.
+        * 
+        * @param data
+        *            the data to unconvert
+        * @param zip
+        *            TRUE to also uncompress the data from a GZIP format
+        *            automatically; if set to FALSE, zipped data can be returned
+        * 
+        * @return the raw data represented by the given Base64 {@link String},
+        *         optionally compressed with GZIP
+        * 
+        * @throws IOException
+        *             in case of I/O errors
+        */
+       public static byte[] unbase64(String data, boolean zip) throws IOException {
+               return Base64
+                               .decode(data, zip ? Base64.NO_OPTIONS : Base64.DONT_GUNZIP);
+       }
+
+       /**
+        * Unconvert the given data from Base64 format back to a raw array of bytes.
+        * 
+        * @param data
+        *            the data to unconvert
+        * @param zip
+        *            TRUE to also uncompress the data from a GZIP format; take care
+        *            about this flag, as it could easily cause errors in the
+        *            returned content or an {@link IOException}
+        * 
+        * @return the raw data represented by the given Base64 {@link String}
+        * 
+        * @throws IOException
+        *             in case of I/O errors
+        */
+       public static OutputStream unbase64(OutputStream data, boolean zip)
+                       throws IOException {
+               OutputStream out = new Base64.OutputStream(data, Base64.DECODE);
+
+               if (zip) {
+                       out = new java.util.zip.GZIPOutputStream(out);
+               }
+
+               return out;
+       }
+
+       /**
+        * Unconvert the given data from Base64 format back to a raw array of bytes.
+        * 
+        * @param data
+        *            the data to unconvert
+        * @param zip
+        *            TRUE to also uncompress the data from a GZIP format; take care
+        *            about this flag, as it could easily cause errors in the
+        *            returned content or an {@link IOException}
+        * 
+        * @return the raw data represented by the given Base64 {@link String}
+        * 
+        * @throws IOException
+        *             in case of I/O errors
+        */
+       public static InputStream unbase64(InputStream data, boolean zip)
+                       throws IOException {
+               if (zip) {
+                       data = new java.util.zip.GZIPInputStream(data);
                }
+
+               return new Base64.InputStream(data, Base64.DECODE);
+       }
+
+       /**
+        * Unconvert the given data from Base64 format back to a raw array of bytes.
+        * <p>
+        * Will automatically detect zipped data and also uncompress it before
+        * returning, unless ZIP is false.
+        * 
+        * @param data
+        *            the data to unconvert
+        * @param offset
+        *            the offset at which to start taking the data (do not take the
+        *            data before it into account)
+        * @param count
+        *            the number of bytes to take into account (do not process after
+        *            this number of bytes has been processed)
+        * @param zip
+        *            TRUE to also uncompress the data from a GZIP format
+        *            automatically; if set to FALSE, zipped data can be returned
+        * 
+        * @return the raw data represented by the given Base64 {@link String}
+        * 
+        * @throws IOException
+        *             in case of I/O errors
+        */
+       public static byte[] unbase64(byte[] data, int offset, int count,
+                       boolean zip) throws IOException {
+               return Base64.niki_decode(data, offset, count, zip ? Base64.NO_OPTIONS
+                               : Base64.DONT_GUNZIP);
+       }
+
+       /**
+        * Unonvert the given data from Base64 format back to a {@link String}.
+        * <p>
+        * Will automatically detect zipped data and also uncompress it before
+        * returning, unless ZIP is false.
+        * 
+        * @param data
+        *            the data to unconvert
+        * @param zip
+        *            TRUE to also uncompress the data from a GZIP format
+        *            automatically; if set to FALSE, zipped data can be returned
+        * 
+        * @return the {@link String} represented by the given Base64 {@link String}
+        *         , optionally compressed with GZIP
+        * 
+        * @throws IOException
+        *             in case of I/O errors
+        */
+       public static String unbase64s(String data, boolean zip) throws IOException {
+               return new String(unbase64(data, zip), "UTF-8");
+       }
+
+       /**
+        * Unconvert the given data from Base64 format back into a {@link String}.
+        * 
+        * @param data
+        *            the data to unconvert
+        * @param offset
+        *            the offset at which to start taking the data (do not take the
+        *            data before it into account)
+        * @param count
+        *            the number of bytes to take into account (do not process after
+        *            this number of bytes has been processed)
+        * @param zip
+        *            TRUE to also uncompress the data from a GZIP format; take care
+        *            about this flag, as it could easily cause errors in the
+        *            returned content or an {@link IOException}
+        * 
+        * @return the {@link String} represented by the given Base64 {@link String}
+        *         , optionally compressed with GZIP
+        * 
+        * @throws IOException
+        *             in case of I/O errors
+        */
+       public static String unbase64s(byte[] data, int offset, int count,
+                       boolean zip) throws IOException {
+               return new String(unbase64(data, offset, count, zip), "UTF-8");
        }
 
        /**
@@ -565,21 +810,70 @@ public class StringUtils {
                }
        }
 
+       //
        // justify List<String> related:
+       //
 
+       /**
+        * Check if this line ends as a complete line (ends with a "." or similar).
+        * <p>
+        * Note that we consider an empty line as full, and a line ending with
+        * spaces as not complete.
+        * 
+        * @param line
+        *            the line to check
+        * 
+        * @return TRUE if it does
+        */
        static private boolean isFullLine(StringBuilder line) {
-               return line.length() == 0 //
-                               || line.charAt(line.length() - 1) == '.'
-                               || line.charAt(line.length() - 1) == '"'
-                               || line.charAt(line.length() - 1) == '»';
+               if (line.length() == 0) {
+                       return true;
+               }
+
+               char lastCar = line.charAt(line.length() - 1);
+               switch (lastCar) {
+               case '.': // points
+               case '?':
+               case '!':
+
+               case '\'': // quotes
+               case '‘':
+               case '’':
+
+               case '"': // double quotes
+               case '”':
+               case '“':
+               case '»':
+               case '«':
+                       return true;
+               default:
+                       return false;
+               }
        }
 
+       /**
+        * Check if this line represent an item in a list or description (i.e.,
+        * check that the first non-space char is "-").
+        * 
+        * @param line
+        *            the line to check
+        * 
+        * @return TRUE if it is
+        */
        static private boolean isItemLine(String line) {
                String spacing = getItemSpacing(line);
                return spacing != null && !spacing.isEmpty()
                                && line.charAt(spacing.length()) == '-';
        }
 
+       /**
+        * Return all the spaces that start this line (or Empty if none).
+        * 
+        * @param line
+        *            the line to get the starting spaces from
+        * 
+        * @return the left spacing
+        */
        static private String getItemSpacing(String line) {
                int i;
                for (i = 0; i < line.length(); i++) {
@@ -591,6 +885,14 @@ public class StringUtils {
                return "";
        }
 
+       /**
+        * This line is an horizontal spacer line.
+        * 
+        * @param line
+        *            the line to test
+        * 
+        * @return TRUE if it is
+        */
        static private boolean isHrLine(CharSequence line) {
                int count = 0;
                if (line != null) {