From: Niki Date: Fri, 22 Mar 2019 13:45:57 +0000 (+0100) Subject: merge with master X-Git-Url: http://git.nikiroo.be/?p=nikiroo-utils.git;a=commitdiff_plain;h=a43e4f72629d04cd3122bde830b6e4925fd3aa91;hp=875fbf21497ef5c8d74d5b7d88e2bbabede5aa3c merge with master --- diff --git a/VERSION b/VERSION index 4404a17..6cedcff 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -4.5.1 +4.5.2 diff --git a/changelog.md b/changelog.md index 202b37d..c631464 100644 --- a/changelog.md +++ b/changelog.md @@ -2,6 +2,9 @@ ## Version WIP +- Serial: fix b64/not b64 error +- Serial: perf improvement +- Base64: perf improvement - new: Proxy selector ## Version 4.5.1 diff --git a/src/be/nikiroo/utils/Base64.java b/src/be/nikiroo/utils/Base64.java index efb57b4..4761601 100644 --- a/src/be/nikiroo/utils/Base64.java +++ b/src/be/nikiroo/utils/Base64.java @@ -208,7 +208,7 @@ class Base64 /** Preferred encoding. */ - private final static String PREFERRED_ENCODING = "US-ASCII"; + private final static String PREFERRED_ENCODING = "UTF-8"; private final static byte WHITE_SPACE_ENC = -5; // Indicates white space in encoding @@ -1249,8 +1249,24 @@ class Base64 } // end catch // + return niki_decode(bytes, 0, bytes.length, options); + } + + /** + * Decodes data from Base64 notation, automatically + * detecting gzip-compressed data and decompressing it. + * + * @param s the string to decode + * @param options encode options such as URL_SAFE + * @return the decoded data + * @throws java.io.IOException if there is an error + * @throws NullPointerException if s is null + * @since niki + */ + public static byte[] niki_decode( byte[] bytes, int offset, int count, int options ) throws java.io.IOException { + // Decode - bytes = decode( bytes, 0, bytes.length, options ); + bytes = decode( bytes, offset, count, options ); // Check to see if it's gzip-compressed // GZIP Magic Two-Byte Number: 0x8b1f (35615) diff --git a/src/be/nikiroo/utils/StringUtils.java b/src/be/nikiroo/utils/StringUtils.java index 1884c21..ef3d84b 100644 --- a/src/be/nikiroo/utils/StringUtils.java +++ b/src/be/nikiroo/utils/StringUtils.java @@ -1,6 +1,5 @@ package be.nikiroo.utils; -import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -16,7 +15,6 @@ import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.Map.Entry; -import java.util.Scanner; import java.util.regex.Pattern; import org.unbescape.html.HtmlEscape; @@ -548,16 +546,7 @@ public class StringUtils { */ @Deprecated public static String unzip64(String data) throws IOException { - ByteArrayInputStream in = new ByteArrayInputStream(Base64.decode(data, - Base64.GZIP)); - - Scanner scan = new Scanner(in); - scan.useDelimiter("\\A"); - try { - return scan.next(); - } finally { - scan.close(); - } + return new String(Base64.decode(data, Base64.GZIP), "UTF-8"); } /** @@ -657,13 +646,15 @@ public class StringUtils { /** * Unconvert the given data from Base64 format back to a raw array of bytes. + *

+ * Will automatically detect zipped data and also uncompress it before + * returning, unless ZIP is false. * * @param data * the data to unconvert * @param zip - * TRUE to also uncompress the data from a GZIP format; take care - * about this flag, as it could easily cause errors in the - * returned content or an {@link IOException} + * TRUE to also uncompress the data from a GZIP format + * automatically; if set to FALSE, zipped data can be returned * * @return the raw data represented by the given Base64 {@link String}, * optionally compressed with GZIP @@ -672,7 +663,8 @@ public class StringUtils { * in case of I/O errors */ public static byte[] unbase64(String data, boolean zip) throws IOException { - return Base64.decode(data, zip ? Base64.GZIP : Base64.NO_OPTIONS); + return Base64 + .decode(data, zip ? Base64.NO_OPTIONS : Base64.DONT_GUNZIP); } /** @@ -684,19 +676,15 @@ public class StringUtils { * TRUE to also uncompress the data from a GZIP format; take care * about this flag, as it could easily cause errors in the * returned content or an {@link IOException} - * @param breakLines - * TRUE to break lines on every 76th character * * @return the raw data represented by the given Base64 {@link String} * * @throws IOException * in case of I/O errors */ - public static OutputStream unbase64(OutputStream data, boolean zip, - boolean breakLines) throws IOException { - OutputStream out = new Base64.OutputStream(data, - breakLines ? Base64.DO_BREAK_LINES & Base64.ENCODE - : Base64.ENCODE); + public static OutputStream unbase64(OutputStream data, boolean zip) + throws IOException { + OutputStream out = new Base64.OutputStream(data, Base64.DECODE); if (zip) { out = new java.util.zip.GZIPOutputStream(out); @@ -714,29 +702,83 @@ public class StringUtils { * TRUE to also uncompress the data from a GZIP format; take care * about this flag, as it could easily cause errors in the * returned content or an {@link IOException} - * @param breakLines - * TRUE to break lines on every 76th character * * @return the raw data represented by the given Base64 {@link String} * * @throws IOException * in case of I/O errors */ - public static InputStream unbase64(InputStream data, boolean zip, - boolean breakLines) throws IOException { + public static InputStream unbase64(InputStream data, boolean zip) + throws IOException { if (zip) { data = new java.util.zip.GZIPInputStream(data); } - return new Base64.InputStream(data, breakLines ? Base64.DO_BREAK_LINES - & Base64.ENCODE : Base64.ENCODE); + return new Base64.InputStream(data, Base64.DECODE); + } + + /** + * Unconvert the given data from Base64 format back to a raw array of bytes. + *

+ * Will automatically detect zipped data and also uncompress it before + * returning, unless ZIP is false. + * + * @param data + * the data to unconvert + * @param offset + * the offset at which to start taking the data (do not take the + * data before it into account) + * @param count + * the number of bytes to take into account (do not process after + * this number of bytes has been processed) + * @param zip + * TRUE to also uncompress the data from a GZIP format + * automatically; if set to FALSE, zipped data can be returned + * + * @return the raw data represented by the given Base64 {@link String} + * + * @throws IOException + * in case of I/O errors + */ + public static byte[] unbase64(byte[] data, int offset, int count, + boolean zip) throws IOException { + return Base64.niki_decode(data, offset, count, zip ? Base64.NO_OPTIONS + : Base64.DONT_GUNZIP); } /** * Unonvert the given data from Base64 format back to a {@link String}. + *

+ * Will automatically detect zipped data and also uncompress it before + * returning, unless ZIP is false. + * + * @param data + * the data to unconvert + * @param zip + * TRUE to also uncompress the data from a GZIP format + * automatically; if set to FALSE, zipped data can be returned + * + * @return the {@link String} represented by the given Base64 {@link String} + * , optionally compressed with GZIP + * + * @throws IOException + * in case of I/O errors + */ + public static String unbase64s(String data, boolean zip) throws IOException { + return new String(unbase64(data, zip), "UTF-8"); + } + + /** + * Unconvert the given data from Base64 format back into a {@link String}. * * @param data * the data to unconvert + * @param offset + * the offset at which to start taking the data (do not take the + * data before it into account) + * @param count + * the number of bytes to take into account (do not process after + * this number of bytes has been processed) * @param zip * TRUE to also uncompress the data from a GZIP format; take care * about this flag, as it could easily cause errors in the @@ -748,16 +790,9 @@ public class StringUtils { * @throws IOException * in case of I/O errors */ - public static String unbase64s(String data, boolean zip) throws IOException { - ByteArrayInputStream in = new ByteArrayInputStream(unbase64(data, zip)); - - Scanner scan = new Scanner(in, "UTF-8"); - scan.useDelimiter("\\A"); - try { - return scan.next(); - } finally { - scan.close(); - } + public static String unbase64s(byte[] data, int offset, int count, + boolean zip) throws IOException { + return new String(unbase64(data, offset, count, zip), "UTF-8"); } /** diff --git a/src/be/nikiroo/utils/serial/Exporter.java b/src/be/nikiroo/utils/serial/Exporter.java index d5f3a5f..dc96d97 100644 --- a/src/be/nikiroo/utils/serial/Exporter.java +++ b/src/be/nikiroo/utils/serial/Exporter.java @@ -74,7 +74,7 @@ public class Exporter { b64 = false; } - if (b64 != null || b64) { + if (b64 == null || b64) { try { String zipped = StringUtils.base64(builder.toString(), zip); if (b64 != null || zipped.length() < builder.length() - 4) { diff --git a/src/be/nikiroo/utils/serial/Importer.java b/src/be/nikiroo/utils/serial/Importer.java index 84fb5aa..bca157c 100644 --- a/src/be/nikiroo/utils/serial/Importer.java +++ b/src/be/nikiroo/utils/serial/Importer.java @@ -1,10 +1,10 @@ package be.nikiroo.utils.serial; import java.io.IOException; +import java.io.UnsupportedEncodingException; import java.lang.reflect.Field; import java.util.HashMap; import java.util.Map; -import java.util.Scanner; import be.nikiroo.utils.StringUtils; @@ -19,6 +19,9 @@ import be.nikiroo.utils.StringUtils; * @author niki */ public class Importer { + static private Integer SIZE_ID = null; + static private byte[] NEWLINE = null; + private Boolean link; private Object me; private Importer child; @@ -26,6 +29,15 @@ public class Importer { private String currentFieldName; + static { + try { + SIZE_ID = "EXT:".getBytes("UTF-8").length; + NEWLINE = "\n".getBytes("UTF-8"); + } catch (UnsupportedEncodingException e) { + // UTF-8 is mandated to exist on confirming jre's + } + } + /** * Create a new {@link Importer}. */ @@ -61,37 +73,74 @@ public class Importer { */ public Importer read(String data) throws NoSuchFieldException, NoSuchMethodException, ClassNotFoundException, IOException { + return read(data.getBytes("UTF-8"), 0); + } - Scanner scan = new Scanner(data); - try { - scan.useDelimiter("\n"); - while (scan.hasNext()) { - String line = scan.next(); - - if (line.startsWith("ZIP:")) { - try { - line = StringUtils.unbase64s( - line.substring("ZIP:".length()), true); - } catch (IOException e) { - throw new IOException( - "Internal error when decoding ZIP content: input may be corrupt"); - } - read(line); - } else if (line.startsWith("B64:")) { - try { - line = StringUtils.unbase64s( - line.substring("B64:".length()), false); - } catch (IOException e) { - throw new IOException( - "Internal error when decoding B64 content: input may be corrupt"); - } - read(line); - } else { - processLine(line); + /** + * Read some data into this {@link Importer}: it can be the full serialised + * content, or a number of lines of it (any given line MUST be + * complete though) and accumulate it with the already present data. + * + * @param data + * the data to parse + * @param offset + * the offset at which to start reading the data (we ignore + * anything that goes before that offset) + * + * @return itself so it can be chained + * + * @throws NoSuchFieldException + * if the serialised data contains information about a field + * which does actually not exist in the class we know of + * @throws NoSuchMethodException + * if a class described in the serialised data cannot be created + * because it is not compatible with this code + * @throws ClassNotFoundException + * if a class described in the serialised data cannot be found + * @throws IOException + * if the content cannot be read (for instance, corrupt data) + */ + private Importer read(byte[] data, int offset) throws NoSuchFieldException, + NoSuchMethodException, ClassNotFoundException, IOException { + + int dataStart = offset; + while (dataStart < data.length) { + String id = ""; + if (data.length - dataStart >= SIZE_ID) { + id = new String(data, dataStart, SIZE_ID); + } + + boolean zip = id.equals("ZIP:"); + boolean b64 = id.equals("B64:"); + if (zip || b64) { + dataStart += SIZE_ID; + } + + int count = find(data, dataStart, NEWLINE); + count -= dataStart; + if (count < 0) { + count = data.length - dataStart; + } + + if (zip || b64) { + boolean unpacked = false; + try { + byte[] line = StringUtils.unbase64(data, dataStart, count, + zip); + unpacked = true; + read(line, 0); + } catch (IOException e) { + throw new IOException("Internal error when decoding " + + (unpacked ? "unpacked " : "") + + (zip ? "ZIP" : "B64") + + " content: input may be corrupt"); } + } else { + String line = new String(data, dataStart, count, "UTF-8"); + processLine(line); } - } finally { - scan.close(); + + dataStart += count + NEWLINE.length; } return this; @@ -206,6 +255,37 @@ public class Importer { } } + /** + * Find the given needle in the data and return its position (or -1 if not + * found). + * + * @param data + * the data to look through + * @param offset + * the offset at wich to start searching + * @param needle + * the needle to find + * + * @return the position of the needle if found, -1 if not found + */ + private int find(byte[] data, int offset, byte[] needle) { + for (int i = offset; i + needle.length - 1 < data.length; i++) { + boolean same = true; + for (int j = 0; j < needle.length; j++) { + if (data[i + j] != needle[j]) { + same = false; + break; + } + } + + if (same) { + return i; + } + } + + return -1; + } + /** * Return the current deserialised value. * diff --git a/src/be/nikiroo/utils/test/SerialTest.java b/src/be/nikiroo/utils/test/SerialTest.java index 8fb1722..fdf5992 100644 --- a/src/be/nikiroo/utils/test/SerialTest.java +++ b/src/be/nikiroo/utils/test/SerialTest.java @@ -20,10 +20,11 @@ class SerialTest extends TestLauncher { @Override public void test() throws Exception { Data data = new Data(42); - String encoded = new Exporter().append(data).toString(false); + String encoded = new Exporter().append(data).toString(false, + false); Object redata = new Importer().read(encoded).getValue(); - String reencoded = new Exporter().append(redata) - .toString(false); + String reencoded = new Exporter().append(redata).toString( + false, false); assertEquals(encoded.replaceAll("@[0-9]*", "@REF"), reencoded.replaceAll("@[0-9]*", "@REF")); @@ -37,13 +38,15 @@ class SerialTest extends TestLauncher { @Override public void test() throws Exception { Data data = new Data() { + @SuppressWarnings("unused") int value = 42; }; - String encoded = new Exporter().append(data).toString(false); + String encoded = new Exporter().append(data).toString(false, + false); Object redata = new Importer().read(encoded).getValue(); - String reencoded = new Exporter().append(redata) - .toString(false); + String reencoded = new Exporter().append(redata).toString( + false, false); assertEquals(encoded.replaceAll("@[0-9]*", "@REF"), reencoded.replaceAll("@[0-9]*", "@REF")); @@ -57,20 +60,22 @@ class SerialTest extends TestLauncher { @Override public void test() throws Exception { Data[] data = new Data[] { new Data() { + @SuppressWarnings("unused") int value = 42; } }; - String encoded = new Exporter().append(data).toString(false); + String encoded = new Exporter().append(data).toString(false, + false); Object redata = new Importer().read(encoded).getValue(); - String reencoded = new Exporter().append(redata) - .toString(false); + String reencoded = new Exporter().append(redata).toString( + false, false); // Comparing the 2 strings won't be useful, because the @REFs // will be ZIP-encoded; so we parse and re-encode the object - encoded = new Exporter().append(data[0]).toString(false); + encoded = new Exporter().append(data[0]).toString(false, false); try { reencoded = new Exporter().append(((Data[]) redata)[0]) - .toString(false); + .toString(false, false); } catch (Exception e) { fail("Cannot cast the returned data into its original object", e); @@ -85,10 +90,11 @@ class SerialTest extends TestLauncher { @Override public void test() throws Exception { URL data = new URL("https://fanfan.be/"); - String encoded = new Exporter().append(data).toString(false); + String encoded = new Exporter().append(data).toString(false, + false); Object redata = new Importer().read(encoded).getValue(); - String reencoded = new Exporter().append(redata) - .toString(false); + String reencoded = new Exporter().append(redata).toString( + false, false); assertEquals(encoded.replaceAll("@[0-9]*", "@REF"), reencoded.replaceAll("@[0-9]*", "@REF")); @@ -99,10 +105,11 @@ class SerialTest extends TestLauncher { @Override public void test() throws Exception { String data = new URL("https://fanfan.be/").toString(); - String encoded = new Exporter().append(data).toString(false); + String encoded = new Exporter().append(data).toString(false, + false); Object redata = new Importer().read(encoded).getValue(); - String reencoded = new Exporter().append(redata) - .toString(false); + String reencoded = new Exporter().append(redata).toString( + false, false); assertEquals(encoded.replaceAll("@[0-9]*", "@REF"), reencoded.replaceAll("@[0-9]*", "@REF")); @@ -116,10 +123,11 @@ class SerialTest extends TestLauncher { final String url = "https://fanfan.be/"; Object[] data = new Object[] { new URL(url), url }; - String encoded = new Exporter().append(data).toString(false); + String encoded = new Exporter().append(data).toString(false, + false); Object redata = new Importer().read(encoded).getValue(); - String reencoded = new Exporter().append(redata) - .toString(false); + String reencoded = new Exporter().append(redata).toString( + false, false); assertEquals(encoded.replaceAll("@[0-9]*", "@REF"), reencoded.replaceAll("@[0-9]*", "@REF")); @@ -132,10 +140,11 @@ class SerialTest extends TestLauncher { @Override public void test() throws Exception { Data data = new DataObject(new Data(21)); - String encoded = new Exporter().append(data).toString(false); + String encoded = new Exporter().append(data).toString(false, + false); Object redata = new Importer().read(encoded).getValue(); - String reencoded = new Exporter().append(redata) - .toString(false); + String reencoded = new Exporter().append(redata).toString( + false, false); assertEquals(encoded.replaceAll("@[0-9]*", "@REF"), reencoded.replaceAll("@[0-9]*", "@REF")); @@ -149,10 +158,11 @@ class SerialTest extends TestLauncher { data.next = new DataLoop("level 2"); data.next.next = data; - String encoded = new Exporter().append(data).toString(false); + String encoded = new Exporter().append(data).toString(false, + false); Object redata = new Importer().read(encoded).getValue(); - String reencoded = new Exporter().append(redata) - .toString(false); + String reencoded = new Exporter().append(redata).toString( + false, false); assertEquals(encoded.replaceAll("@[0-9]*", "@REF"), reencoded.replaceAll("@[0-9]*", "@REF")); @@ -163,10 +173,11 @@ class SerialTest extends TestLauncher { @Override public void test() throws Exception { Object data = new DataArray();// new String[] { "un", "deux" }; - String encoded = new Exporter().append(data).toString(false); + String encoded = new Exporter().append(data).toString(false, + false); Object redata = new Importer().read(encoded).getValue(); - String reencoded = new Exporter().append(redata) - .toString(false); + String reencoded = new Exporter().append(redata).toString( + false, false); assertEquals(encoded.replaceAll("@[0-9]*", "@REF"), reencoded.replaceAll("@[0-9]*", "@REF")); @@ -177,10 +188,11 @@ class SerialTest extends TestLauncher { @Override public void test() throws Exception { Object data = new String[] { "un", "deux" }; - String encoded = new Exporter().append(data).toString(false); + String encoded = new Exporter().append(data).toString(false, + false); Object redata = new Importer().read(encoded).getValue(); - String reencoded = new Exporter().append(redata) - .toString(false); + String reencoded = new Exporter().append(redata).toString( + false, false); assertEquals(encoded.replaceAll("@[0-9]*", "@REF"), reencoded.replaceAll("@[0-9]*", "@REF")); @@ -191,22 +203,80 @@ class SerialTest extends TestLauncher { @Override public void test() throws Exception { Object data = EnumToSend.FANFAN; - String encoded = new Exporter().append(data).toString(false); + String encoded = new Exporter().append(data).toString(false, + false); Object redata = new Importer().read(encoded).getValue(); - String reencoded = new Exporter().append(redata) - .toString(false); + String reencoded = new Exporter().append(redata).toString( + false, false); assertEquals(encoded.replaceAll("@[0-9]*", "@REF"), reencoded.replaceAll("@[0-9]*", "@REF")); } }); + + addTest(new TestCase("B64 and ZIP String test") { + @Override + public void test() throws Exception { + Object data = "Fanfan la tulipe"; + String encoded = new Exporter().append(data).toString(true, + false); + String redata = (String) new Importer().read(encoded) + .getValue(); + + assertEquals("Items not identical after B64", data, redata); + + encoded = new Exporter().append(data).toString(true, true); + redata = (String) new Importer().read(encoded).getValue(); + + assertEquals("Items not identical after ZIP", data, redata); + } + }); + + addTest(new TestCase("B64 and ZIP Data test") { + @Override + public void test() throws Exception { + Object data = new Data(55); + String encoded = new Exporter().append(data).toString(true, + false); + Data redata = (Data) new Importer().read(encoded).getValue(); + + assertEquals("Items not identical after B64", data, redata); + + encoded = new Exporter().append(data).toString(true, true); + redata = (Data) new Importer().read(encoded).getValue(); + + assertEquals("Items not identical after ZIP", data, redata); + } + }); + + addTest(new TestCase("B64 and ZIP 70000 chars test") { + @Override + public void test() throws Exception { + StringBuilder builder = new StringBuilder(); + for (int i = 0; i < 7000; i++) { + builder.append("0123456789"); + } + + Object data = builder.toString(); + String encoded = new Exporter().append(data).toString(true, + false); + String redata = (String) new Importer().read(encoded) + .getValue(); + + assertEquals("Items not identical after B64", data, redata); + + encoded = new Exporter().append(data).toString(true, true); + redata = (String) new Importer().read(encoded).getValue(); + + assertEquals("Items not identical after ZIP", data, redata); + } + }); } class DataArray { public String[] data = new String[] { "un", "deux" }; } - @SuppressWarnings("unused") class Data { private int value; @@ -216,6 +286,21 @@ class SerialTest extends TestLauncher { public Data(int value) { this.value = value; } + + @Override + public boolean equals(Object obj) { + if (obj instanceof Data) { + Data other = (Data) obj; + return other.value == this.value; + } + + return false; + } + + @Override + public int hashCode() { + return new Integer(value).hashCode(); + } } @SuppressWarnings("unused") diff --git a/src/be/nikiroo/utils/test/Test.java b/src/be/nikiroo/utils/test/Test.java index 14c7af3..02c4995 100644 --- a/src/be/nikiroo/utils/test/Test.java +++ b/src/be/nikiroo/utils/test/Test.java @@ -30,7 +30,7 @@ public class Test extends TestLauncher { addSeries(new SerialServerTest(args)); addSeries(new StringUtilsTest(args)); addSeries(new TempFilesTest(args)); - + // TODO: test cache and downloader Cache cache = null; Downloader downloader = null; diff --git a/src/be/nikiroo/utils/test/TestCase.java b/src/be/nikiroo/utils/test/TestCase.java index 0210e2f..fe7b9af 100644 --- a/src/be/nikiroo/utils/test/TestCase.java +++ b/src/be/nikiroo/utils/test/TestCase.java @@ -509,7 +509,7 @@ abstract public class TestCase { return String.format("" // + "Assertion failed!%n" // + "Expected value: [%s]%n" // - + "Actual value: [%s]", expected, actual); + + "Actual value: [%s]", expected, actual); } private static String list(List items) {