From: Niki Roo Date: Sun, 17 Mar 2019 15:44:10 +0000 (+0100) Subject: fix/perf base64/serial X-Git-Tag: nikiroo-utils-4.5.2~3 X-Git-Url: http://git.nikiroo.be/?p=nikiroo-utils.git;a=commitdiff_plain;h=b6200792e591f13c2d8ecedc4d2015db8ff76cbe fix/perf base64/serial --- diff --git a/changelog.md b/changelog.md index 2ad5d03..37080fb 100644 --- a/changelog.md +++ b/changelog.md @@ -3,6 +3,8 @@ ## Version WIP - Serial: fix b64/not b64 error +- Serial: perf improvement +- Base64: perf improvement ## Version 4.5.1 diff --git a/src/be/nikiroo/utils/Base64.java b/src/be/nikiroo/utils/Base64.java index efb57b4..4761601 100644 --- a/src/be/nikiroo/utils/Base64.java +++ b/src/be/nikiroo/utils/Base64.java @@ -208,7 +208,7 @@ class Base64 /** Preferred encoding. */ - private final static String PREFERRED_ENCODING = "US-ASCII"; + private final static String PREFERRED_ENCODING = "UTF-8"; private final static byte WHITE_SPACE_ENC = -5; // Indicates white space in encoding @@ -1249,8 +1249,24 @@ class Base64 } // end catch // + return niki_decode(bytes, 0, bytes.length, options); + } + + /** + * Decodes data from Base64 notation, automatically + * detecting gzip-compressed data and decompressing it. + * + * @param s the string to decode + * @param options encode options such as URL_SAFE + * @return the decoded data + * @throws java.io.IOException if there is an error + * @throws NullPointerException if s is null + * @since niki + */ + public static byte[] niki_decode( byte[] bytes, int offset, int count, int options ) throws java.io.IOException { + // Decode - bytes = decode( bytes, 0, bytes.length, options ); + bytes = decode( bytes, offset, count, options ); // Check to see if it's gzip-compressed // GZIP Magic Two-Byte Number: 0x8b1f (35615) diff --git a/src/be/nikiroo/utils/StringUtils.java b/src/be/nikiroo/utils/StringUtils.java index 1884c21..ef3d84b 100644 --- a/src/be/nikiroo/utils/StringUtils.java +++ b/src/be/nikiroo/utils/StringUtils.java @@ -1,6 +1,5 @@ package be.nikiroo.utils; -import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -16,7 +15,6 @@ import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.Map.Entry; -import java.util.Scanner; import java.util.regex.Pattern; import org.unbescape.html.HtmlEscape; @@ -548,16 +546,7 @@ public class StringUtils { */ @Deprecated public static String unzip64(String data) throws IOException { - ByteArrayInputStream in = new ByteArrayInputStream(Base64.decode(data, - Base64.GZIP)); - - Scanner scan = new Scanner(in); - scan.useDelimiter("\\A"); - try { - return scan.next(); - } finally { - scan.close(); - } + return new String(Base64.decode(data, Base64.GZIP), "UTF-8"); } /** @@ -657,13 +646,15 @@ public class StringUtils { /** * Unconvert the given data from Base64 format back to a raw array of bytes. + *

+ * Will automatically detect zipped data and also uncompress it before + * returning, unless ZIP is false. * * @param data * the data to unconvert * @param zip - * TRUE to also uncompress the data from a GZIP format; take care - * about this flag, as it could easily cause errors in the - * returned content or an {@link IOException} + * TRUE to also uncompress the data from a GZIP format + * automatically; if set to FALSE, zipped data can be returned * * @return the raw data represented by the given Base64 {@link String}, * optionally compressed with GZIP @@ -672,7 +663,8 @@ public class StringUtils { * in case of I/O errors */ public static byte[] unbase64(String data, boolean zip) throws IOException { - return Base64.decode(data, zip ? Base64.GZIP : Base64.NO_OPTIONS); + return Base64 + .decode(data, zip ? Base64.NO_OPTIONS : Base64.DONT_GUNZIP); } /** @@ -684,19 +676,15 @@ public class StringUtils { * TRUE to also uncompress the data from a GZIP format; take care * about this flag, as it could easily cause errors in the * returned content or an {@link IOException} - * @param breakLines - * TRUE to break lines on every 76th character * * @return the raw data represented by the given Base64 {@link String} * * @throws IOException * in case of I/O errors */ - public static OutputStream unbase64(OutputStream data, boolean zip, - boolean breakLines) throws IOException { - OutputStream out = new Base64.OutputStream(data, - breakLines ? Base64.DO_BREAK_LINES & Base64.ENCODE - : Base64.ENCODE); + public static OutputStream unbase64(OutputStream data, boolean zip) + throws IOException { + OutputStream out = new Base64.OutputStream(data, Base64.DECODE); if (zip) { out = new java.util.zip.GZIPOutputStream(out); @@ -714,29 +702,83 @@ public class StringUtils { * TRUE to also uncompress the data from a GZIP format; take care * about this flag, as it could easily cause errors in the * returned content or an {@link IOException} - * @param breakLines - * TRUE to break lines on every 76th character * * @return the raw data represented by the given Base64 {@link String} * * @throws IOException * in case of I/O errors */ - public static InputStream unbase64(InputStream data, boolean zip, - boolean breakLines) throws IOException { + public static InputStream unbase64(InputStream data, boolean zip) + throws IOException { if (zip) { data = new java.util.zip.GZIPInputStream(data); } - return new Base64.InputStream(data, breakLines ? Base64.DO_BREAK_LINES - & Base64.ENCODE : Base64.ENCODE); + return new Base64.InputStream(data, Base64.DECODE); + } + + /** + * Unconvert the given data from Base64 format back to a raw array of bytes. + *

+ * Will automatically detect zipped data and also uncompress it before + * returning, unless ZIP is false. + * + * @param data + * the data to unconvert + * @param offset + * the offset at which to start taking the data (do not take the + * data before it into account) + * @param count + * the number of bytes to take into account (do not process after + * this number of bytes has been processed) + * @param zip + * TRUE to also uncompress the data from a GZIP format + * automatically; if set to FALSE, zipped data can be returned + * + * @return the raw data represented by the given Base64 {@link String} + * + * @throws IOException + * in case of I/O errors + */ + public static byte[] unbase64(byte[] data, int offset, int count, + boolean zip) throws IOException { + return Base64.niki_decode(data, offset, count, zip ? Base64.NO_OPTIONS + : Base64.DONT_GUNZIP); } /** * Unonvert the given data from Base64 format back to a {@link String}. + *

+ * Will automatically detect zipped data and also uncompress it before + * returning, unless ZIP is false. + * + * @param data + * the data to unconvert + * @param zip + * TRUE to also uncompress the data from a GZIP format + * automatically; if set to FALSE, zipped data can be returned + * + * @return the {@link String} represented by the given Base64 {@link String} + * , optionally compressed with GZIP + * + * @throws IOException + * in case of I/O errors + */ + public static String unbase64s(String data, boolean zip) throws IOException { + return new String(unbase64(data, zip), "UTF-8"); + } + + /** + * Unconvert the given data from Base64 format back into a {@link String}. * * @param data * the data to unconvert + * @param offset + * the offset at which to start taking the data (do not take the + * data before it into account) + * @param count + * the number of bytes to take into account (do not process after + * this number of bytes has been processed) * @param zip * TRUE to also uncompress the data from a GZIP format; take care * about this flag, as it could easily cause errors in the @@ -748,16 +790,9 @@ public class StringUtils { * @throws IOException * in case of I/O errors */ - public static String unbase64s(String data, boolean zip) throws IOException { - ByteArrayInputStream in = new ByteArrayInputStream(unbase64(data, zip)); - - Scanner scan = new Scanner(in, "UTF-8"); - scan.useDelimiter("\\A"); - try { - return scan.next(); - } finally { - scan.close(); - } + public static String unbase64s(byte[] data, int offset, int count, + boolean zip) throws IOException { + return new String(unbase64(data, offset, count, zip), "UTF-8"); } /** diff --git a/src/be/nikiroo/utils/serial/Importer.java b/src/be/nikiroo/utils/serial/Importer.java index 84fb5aa..bca157c 100644 --- a/src/be/nikiroo/utils/serial/Importer.java +++ b/src/be/nikiroo/utils/serial/Importer.java @@ -1,10 +1,10 @@ package be.nikiroo.utils.serial; import java.io.IOException; +import java.io.UnsupportedEncodingException; import java.lang.reflect.Field; import java.util.HashMap; import java.util.Map; -import java.util.Scanner; import be.nikiroo.utils.StringUtils; @@ -19,6 +19,9 @@ import be.nikiroo.utils.StringUtils; * @author niki */ public class Importer { + static private Integer SIZE_ID = null; + static private byte[] NEWLINE = null; + private Boolean link; private Object me; private Importer child; @@ -26,6 +29,15 @@ public class Importer { private String currentFieldName; + static { + try { + SIZE_ID = "EXT:".getBytes("UTF-8").length; + NEWLINE = "\n".getBytes("UTF-8"); + } catch (UnsupportedEncodingException e) { + // UTF-8 is mandated to exist on confirming jre's + } + } + /** * Create a new {@link Importer}. */ @@ -61,37 +73,74 @@ public class Importer { */ public Importer read(String data) throws NoSuchFieldException, NoSuchMethodException, ClassNotFoundException, IOException { + return read(data.getBytes("UTF-8"), 0); + } - Scanner scan = new Scanner(data); - try { - scan.useDelimiter("\n"); - while (scan.hasNext()) { - String line = scan.next(); - - if (line.startsWith("ZIP:")) { - try { - line = StringUtils.unbase64s( - line.substring("ZIP:".length()), true); - } catch (IOException e) { - throw new IOException( - "Internal error when decoding ZIP content: input may be corrupt"); - } - read(line); - } else if (line.startsWith("B64:")) { - try { - line = StringUtils.unbase64s( - line.substring("B64:".length()), false); - } catch (IOException e) { - throw new IOException( - "Internal error when decoding B64 content: input may be corrupt"); - } - read(line); - } else { - processLine(line); + /** + * Read some data into this {@link Importer}: it can be the full serialised + * content, or a number of lines of it (any given line MUST be + * complete though) and accumulate it with the already present data. + * + * @param data + * the data to parse + * @param offset + * the offset at which to start reading the data (we ignore + * anything that goes before that offset) + * + * @return itself so it can be chained + * + * @throws NoSuchFieldException + * if the serialised data contains information about a field + * which does actually not exist in the class we know of + * @throws NoSuchMethodException + * if a class described in the serialised data cannot be created + * because it is not compatible with this code + * @throws ClassNotFoundException + * if a class described in the serialised data cannot be found + * @throws IOException + * if the content cannot be read (for instance, corrupt data) + */ + private Importer read(byte[] data, int offset) throws NoSuchFieldException, + NoSuchMethodException, ClassNotFoundException, IOException { + + int dataStart = offset; + while (dataStart < data.length) { + String id = ""; + if (data.length - dataStart >= SIZE_ID) { + id = new String(data, dataStart, SIZE_ID); + } + + boolean zip = id.equals("ZIP:"); + boolean b64 = id.equals("B64:"); + if (zip || b64) { + dataStart += SIZE_ID; + } + + int count = find(data, dataStart, NEWLINE); + count -= dataStart; + if (count < 0) { + count = data.length - dataStart; + } + + if (zip || b64) { + boolean unpacked = false; + try { + byte[] line = StringUtils.unbase64(data, dataStart, count, + zip); + unpacked = true; + read(line, 0); + } catch (IOException e) { + throw new IOException("Internal error when decoding " + + (unpacked ? "unpacked " : "") + + (zip ? "ZIP" : "B64") + + " content: input may be corrupt"); } + } else { + String line = new String(data, dataStart, count, "UTF-8"); + processLine(line); } - } finally { - scan.close(); + + dataStart += count + NEWLINE.length; } return this; @@ -206,6 +255,37 @@ public class Importer { } } + /** + * Find the given needle in the data and return its position (or -1 if not + * found). + * + * @param data + * the data to look through + * @param offset + * the offset at wich to start searching + * @param needle + * the needle to find + * + * @return the position of the needle if found, -1 if not found + */ + private int find(byte[] data, int offset, byte[] needle) { + for (int i = offset; i + needle.length - 1 < data.length; i++) { + boolean same = true; + for (int j = 0; j < needle.length; j++) { + if (data[i + j] != needle[j]) { + same = false; + break; + } + } + + if (same) { + return i; + } + } + + return -1; + } + /** * Return the current deserialised value. *