fix/perf base64/serial
authorNiki Roo <niki@nikiroo.be>
Sun, 17 Mar 2019 15:44:10 +0000 (16:44 +0100)
committerNiki Roo <niki@nikiroo.be>
Sun, 17 Mar 2019 15:44:10 +0000 (16:44 +0100)
changelog.md
src/be/nikiroo/utils/Base64.java
src/be/nikiroo/utils/StringUtils.java
src/be/nikiroo/utils/serial/Importer.java

index 2ad5d0354da23e8f102fd1ff224f5b384653d771..37080fb2fc9be162a27a5829e632422198a9a2ed 100644 (file)
@@ -3,6 +3,8 @@
 ## Version WIP
 
 - Serial: fix b64/not b64 error
+- Serial: perf improvement
+- Base64: perf improvement
 
 ## Version 4.5.1
 
index efb57b4a0cf1d62fa8025d96f26361bc2a61ec2f..476160182475a7d51850a64dafef25ea576782a1 100644 (file)
@@ -208,7 +208,7 @@ class Base64
     
     
     /** Preferred encoding. */
-    private final static String PREFERRED_ENCODING = "US-ASCII";
+    private final static String PREFERRED_ENCODING = "UTF-8";
     
        
     private final static byte WHITE_SPACE_ENC = -5; // Indicates white space in encoding
@@ -1249,8 +1249,24 @@ class Base64
         }   // end catch
                //</change>
         
+        return niki_decode(bytes, 0, bytes.length, options);
+    }
+     
+    /**
+     * Decodes data from Base64 notation, automatically
+     * detecting gzip-compressed data and decompressing it.
+     *
+     * @param s the string to decode
+     * @param options encode options such as URL_SAFE
+     * @return the decoded data
+     * @throws java.io.IOException if there is an error
+     * @throws NullPointerException if <tt>s</tt> is null
+     * @since niki
+     */
+    public static byte[] niki_decode( byte[] bytes, int offset, int count, int options ) throws java.io.IOException {
+        
         // Decode
-        bytes = decode( bytes, 0, bytes.length, options );
+        bytes = decode( bytes, offset, count, options );
         
         // Check to see if it's gzip-compressed
         // GZIP Magic Two-Byte Number: 0x8b1f (35615)
index 1884c21ff47a98fce6864c0c6a7c801727f0cb05..ef3d84b921aea488f5ce17e5b8b52334cf24a375 100644 (file)
@@ -1,6 +1,5 @@
 package be.nikiroo.utils;
 
-import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
@@ -16,7 +15,6 @@ import java.util.ArrayList;
 import java.util.Date;
 import java.util.List;
 import java.util.Map.Entry;
-import java.util.Scanner;
 import java.util.regex.Pattern;
 
 import org.unbescape.html.HtmlEscape;
@@ -548,16 +546,7 @@ public class StringUtils {
         */
        @Deprecated
        public static String unzip64(String data) throws IOException {
-               ByteArrayInputStream in = new ByteArrayInputStream(Base64.decode(data,
-                               Base64.GZIP));
-
-               Scanner scan = new Scanner(in);
-               scan.useDelimiter("\\A");
-               try {
-                       return scan.next();
-               } finally {
-                       scan.close();
-               }
+               return new String(Base64.decode(data, Base64.GZIP), "UTF-8");
        }
 
        /**
@@ -657,13 +646,15 @@ public class StringUtils {
 
        /**
         * Unconvert the given data from Base64 format back to a raw array of bytes.
+        * <p>
+        * Will automatically detect zipped data and also uncompress it before
+        * returning, unless ZIP is false.
         * 
         * @param data
         *            the data to unconvert
         * @param zip
-        *            TRUE to also uncompress the data from a GZIP format; take care
-        *            about this flag, as it could easily cause errors in the
-        *            returned content or an {@link IOException}
+        *            TRUE to also uncompress the data from a GZIP format
+        *            automatically; if set to FALSE, zipped data can be returned
         * 
         * @return the raw data represented by the given Base64 {@link String},
         *         optionally compressed with GZIP
@@ -672,7 +663,8 @@ public class StringUtils {
         *             in case of I/O errors
         */
        public static byte[] unbase64(String data, boolean zip) throws IOException {
-               return Base64.decode(data, zip ? Base64.GZIP : Base64.NO_OPTIONS);
+               return Base64
+                               .decode(data, zip ? Base64.NO_OPTIONS : Base64.DONT_GUNZIP);
        }
 
        /**
@@ -684,19 +676,15 @@ public class StringUtils {
         *            TRUE to also uncompress the data from a GZIP format; take care
         *            about this flag, as it could easily cause errors in the
         *            returned content or an {@link IOException}
-        * @param breakLines
-        *            TRUE to break lines on every 76th character
         * 
         * @return the raw data represented by the given Base64 {@link String}
         * 
         * @throws IOException
         *             in case of I/O errors
         */
-       public static OutputStream unbase64(OutputStream data, boolean zip,
-                       boolean breakLines) throws IOException {
-               OutputStream out = new Base64.OutputStream(data,
-                               breakLines ? Base64.DO_BREAK_LINES & Base64.ENCODE
-                                               : Base64.ENCODE);
+       public static OutputStream unbase64(OutputStream data, boolean zip)
+                       throws IOException {
+               OutputStream out = new Base64.OutputStream(data, Base64.DECODE);
 
                if (zip) {
                        out = new java.util.zip.GZIPOutputStream(out);
@@ -714,29 +702,83 @@ public class StringUtils {
         *            TRUE to also uncompress the data from a GZIP format; take care
         *            about this flag, as it could easily cause errors in the
         *            returned content or an {@link IOException}
-        * @param breakLines
-        *            TRUE to break lines on every 76th character
         * 
         * @return the raw data represented by the given Base64 {@link String}
         * 
         * @throws IOException
         *             in case of I/O errors
         */
-       public static InputStream unbase64(InputStream data, boolean zip,
-                       boolean breakLines) throws IOException {
+       public static InputStream unbase64(InputStream data, boolean zip)
+                       throws IOException {
                if (zip) {
                        data = new java.util.zip.GZIPInputStream(data);
                }
 
-               return new Base64.InputStream(data, breakLines ? Base64.DO_BREAK_LINES
-                               & Base64.ENCODE : Base64.ENCODE);
+               return new Base64.InputStream(data, Base64.DECODE);
+       }
+
+       /**
+        * Unconvert the given data from Base64 format back to a raw array of bytes.
+        * <p>
+        * Will automatically detect zipped data and also uncompress it before
+        * returning, unless ZIP is false.
+        * 
+        * @param data
+        *            the data to unconvert
+        * @param offset
+        *            the offset at which to start taking the data (do not take the
+        *            data before it into account)
+        * @param count
+        *            the number of bytes to take into account (do not process after
+        *            this number of bytes has been processed)
+        * @param zip
+        *            TRUE to also uncompress the data from a GZIP format
+        *            automatically; if set to FALSE, zipped data can be returned
+        * 
+        * @return the raw data represented by the given Base64 {@link String}
+        * 
+        * @throws IOException
+        *             in case of I/O errors
+        */
+       public static byte[] unbase64(byte[] data, int offset, int count,
+                       boolean zip) throws IOException {
+               return Base64.niki_decode(data, offset, count, zip ? Base64.NO_OPTIONS
+                               : Base64.DONT_GUNZIP);
        }
 
        /**
         * Unonvert the given data from Base64 format back to a {@link String}.
+        * <p>
+        * Will automatically detect zipped data and also uncompress it before
+        * returning, unless ZIP is false.
+        * 
+        * @param data
+        *            the data to unconvert
+        * @param zip
+        *            TRUE to also uncompress the data from a GZIP format
+        *            automatically; if set to FALSE, zipped data can be returned
+        * 
+        * @return the {@link String} represented by the given Base64 {@link String}
+        *         , optionally compressed with GZIP
+        * 
+        * @throws IOException
+        *             in case of I/O errors
+        */
+       public static String unbase64s(String data, boolean zip) throws IOException {
+               return new String(unbase64(data, zip), "UTF-8");
+       }
+
+       /**
+        * Unconvert the given data from Base64 format back into a {@link String}.
         * 
         * @param data
         *            the data to unconvert
+        * @param offset
+        *            the offset at which to start taking the data (do not take the
+        *            data before it into account)
+        * @param count
+        *            the number of bytes to take into account (do not process after
+        *            this number of bytes has been processed)
         * @param zip
         *            TRUE to also uncompress the data from a GZIP format; take care
         *            about this flag, as it could easily cause errors in the
@@ -748,16 +790,9 @@ public class StringUtils {
         * @throws IOException
         *             in case of I/O errors
         */
-       public static String unbase64s(String data, boolean zip) throws IOException {
-               ByteArrayInputStream in = new ByteArrayInputStream(unbase64(data, zip));
-
-               Scanner scan = new Scanner(in, "UTF-8");
-               scan.useDelimiter("\\A");
-               try {
-                       return scan.next();
-               } finally {
-                       scan.close();
-               }
+       public static String unbase64s(byte[] data, int offset, int count,
+                       boolean zip) throws IOException {
+               return new String(unbase64(data, offset, count, zip), "UTF-8");
        }
 
        /**
index 84fb5aae5742f49bcf68cd915906a2b7322a399b..bca157c501bcb8a512fd5367585b15a06a82771e 100644 (file)
@@ -1,10 +1,10 @@
 package be.nikiroo.utils.serial;
 
 import java.io.IOException;
+import java.io.UnsupportedEncodingException;
 import java.lang.reflect.Field;
 import java.util.HashMap;
 import java.util.Map;
-import java.util.Scanner;
 
 import be.nikiroo.utils.StringUtils;
 
@@ -19,6 +19,9 @@ import be.nikiroo.utils.StringUtils;
  * @author niki
  */
 public class Importer {
+       static private Integer SIZE_ID = null;
+       static private byte[] NEWLINE = null;
+
        private Boolean link;
        private Object me;
        private Importer child;
@@ -26,6 +29,15 @@ public class Importer {
 
        private String currentFieldName;
 
+       static {
+               try {
+                       SIZE_ID = "EXT:".getBytes("UTF-8").length;
+                       NEWLINE = "\n".getBytes("UTF-8");
+               } catch (UnsupportedEncodingException e) {
+                       // UTF-8 is mandated to exist on confirming jre's
+               }
+       }
+
        /**
         * Create a new {@link Importer}.
         */
@@ -61,37 +73,74 @@ public class Importer {
         */
        public Importer read(String data) throws NoSuchFieldException,
                        NoSuchMethodException, ClassNotFoundException, IOException {
+               return read(data.getBytes("UTF-8"), 0);
+       }
 
-               Scanner scan = new Scanner(data);
-               try {
-                       scan.useDelimiter("\n");
-                       while (scan.hasNext()) {
-                               String line = scan.next();
-
-                               if (line.startsWith("ZIP:")) {
-                                       try {
-                                               line = StringUtils.unbase64s(
-                                                               line.substring("ZIP:".length()), true);
-                                       } catch (IOException e) {
-                                               throw new IOException(
-                                                               "Internal error when decoding ZIP content: input may be corrupt");
-                                       }
-                                       read(line);
-                               } else if (line.startsWith("B64:")) {
-                                       try {
-                                               line = StringUtils.unbase64s(
-                                                               line.substring("B64:".length()), false);
-                                       } catch (IOException e) {
-                                               throw new IOException(
-                                                               "Internal error when decoding B64 content: input may be corrupt");
-                                       }
-                                       read(line);
-                               } else {
-                                       processLine(line);
+       /**
+        * Read some data into this {@link Importer}: it can be the full serialised
+        * content, or a number of lines of it (any given line <b>MUST</b> be
+        * complete though) and accumulate it with the already present data.
+        * 
+        * @param data
+        *            the data to parse
+        * @param offset
+        *            the offset at which to start reading the data (we ignore
+        *            anything that goes before that offset)
+        * 
+        * @return itself so it can be chained
+        * 
+        * @throws NoSuchFieldException
+        *             if the serialised data contains information about a field
+        *             which does actually not exist in the class we know of
+        * @throws NoSuchMethodException
+        *             if a class described in the serialised data cannot be created
+        *             because it is not compatible with this code
+        * @throws ClassNotFoundException
+        *             if a class described in the serialised data cannot be found
+        * @throws IOException
+        *             if the content cannot be read (for instance, corrupt data)
+        */
+       private Importer read(byte[] data, int offset) throws NoSuchFieldException,
+                       NoSuchMethodException, ClassNotFoundException, IOException {
+
+               int dataStart = offset;
+               while (dataStart < data.length) {
+                       String id = "";
+                       if (data.length - dataStart >= SIZE_ID) {
+                               id = new String(data, dataStart, SIZE_ID);
+                       }
+
+                       boolean zip = id.equals("ZIP:");
+                       boolean b64 = id.equals("B64:");
+                       if (zip || b64) {
+                               dataStart += SIZE_ID;
+                       }
+
+                       int count = find(data, dataStart, NEWLINE);
+                       count -= dataStart;
+                       if (count < 0) {
+                               count = data.length - dataStart;
+                       }
+
+                       if (zip || b64) {
+                               boolean unpacked = false;
+                               try {
+                                       byte[] line = StringUtils.unbase64(data, dataStart, count,
+                                                       zip);
+                                       unpacked = true;
+                                       read(line, 0);
+                               } catch (IOException e) {
+                                       throw new IOException("Internal error when decoding "
+                                                       + (unpacked ? "unpacked " : "")
+                                                       + (zip ? "ZIP" : "B64")
+                                                       + " content: input may be corrupt");
                                }
+                       } else {
+                               String line = new String(data, dataStart, count, "UTF-8");
+                               processLine(line);
                        }
-               } finally {
-                       scan.close();
+
+                       dataStart += count + NEWLINE.length;
                }
 
                return this;
@@ -206,6 +255,37 @@ public class Importer {
                }
        }
 
+       /**
+        * Find the given needle in the data and return its position (or -1 if not
+        * found).
+        * 
+        * @param data
+        *            the data to look through
+        * @param offset
+        *            the offset at wich to start searching
+        * @param needle
+        *            the needle to find
+        * 
+        * @return the position of the needle if found, -1 if not found
+        */
+       private int find(byte[] data, int offset, byte[] needle) {
+               for (int i = offset; i + needle.length - 1 < data.length; i++) {
+                       boolean same = true;
+                       for (int j = 0; j < needle.length; j++) {
+                               if (data[i + j] != needle[j]) {
+                                       same = false;
+                                       break;
+                               }
+                       }
+
+                       if (same) {
+                               return i;
+                       }
+               }
+
+               return -1;
+       }
+
        /**
         * Return the current deserialised value.
         *