X-Git-Url: http://git.nikiroo.be/?p=nikiroo-utils.git;a=blobdiff_plain;f=src%2Fjexer%2Fbits%2FStringUtils.java;h=d33f71f4e0031710e52ed008fefa0f37f28883c4;hp=fffce206875cf663480d2041aac121f88b58d01a;hb=c4cefaa04ec122fc02efb6542451a31fdf722c32;hpb=c6815053bca27b1c2374548e06779a97651fe07d diff --git a/src/jexer/bits/StringUtils.java b/src/jexer/bits/StringUtils.java index fffce20..d33f71f 100644 --- a/src/jexer/bits/StringUtils.java +++ b/src/jexer/bits/StringUtils.java @@ -30,6 +30,7 @@ package jexer.bits; import java.util.List; import java.util.ArrayList; +import java.util.Arrays; /** * StringUtils contains methods to: @@ -41,6 +42,11 @@ import java.util.ArrayList; * * - Read/write a line of RFC4180 comma-separated values strings to/from a * list of strings. + * + * - Compute number of visible text cells for a given Unicode codepoint or + * string. + * + * - Convert bytes to and from base-64 encoding. */ public class StringUtils { @@ -466,6 +472,10 @@ public class StringUtils { * @return the number of text cell columns required to display this string */ public static int width(final String str) { + if (str == null) { + return 0; + } + int n = 0; for (int i = 0; i < str.length();) { int ch = str.codePointAt(i); @@ -495,4 +505,241 @@ public class StringUtils { return ((ch >= 0x1f004) && (ch <= 0x1fffd)); } + // ------------------------------------------------------------------------ + // Base64 ----------------------------------------------------------------- + // ------------------------------------------------------------------------ + + /* + * The Base64 encoder/decoder below is provided to support JDK 1.6 - JDK + * 11. It was taken from https://sourceforge.net/projects/migbase64/ + * + * The following changes were made: + * + * - Code has been indented and long lines cut to fit within 80 columns. + * + * - Char, String, and "fast" byte functions removed. byte versions + * retained and called toBase64()/fromBase64(). + * + * - Enclosing braces added to blocks. + */ + + /** + * A very fast and memory efficient class to encode and decode to and + * from BASE64 in full accordance with RFC 2045.

On Windows XP + * sp1 with 1.4.2_04 and later ;), this encoder and decoder is about 10 + * times faster on small arrays (10 - 1000 bytes) and 2-3 times as fast + * on larger arrays (10000 - 1000000 bytes) compared to + * sun.misc.Encoder()/Decoder().

+ * + * On byte arrays the encoder is about 20% faster than Jakarta Commons + * Base64 Codec for encode and about 50% faster for decoding large + * arrays. This implementation is about twice as fast on very small + * arrays (< 30 bytes). If source/destination is a String + * this version is about three times as fast due to the fact that the + * Commons Codec result has to be recoded to a String from + * byte[], which is very expensive.

+ * + * This encode/decode algorithm doesn't create any temporary arrays as + * many other codecs do, it only allocates the resulting array. This + * produces less garbage and it is possible to handle arrays twice as + * large as algorithms that create a temporary array. (E.g. Jakarta + * Commons Codec). It is unknown whether Sun's + * sun.misc.Encoder()/Decoder() produce temporary arrays but + * since performance is quite low it probably does.

+ * + * The encoder produces the same output as the Sun one except that the + * Sun's encoder appends a trailing line separator if the last character + * isn't a pad. Unclear why but it only adds to the length and is + * probably a side effect. Both are in conformance with RFC 2045 + * though.
Commons codec seem to always att a trailing line + * separator.

+ * + * Note! The encode/decode method pairs (types) come in three + * versions with the exact same algorithm and thus a lot of code + * redundancy. This is to not create any temporary arrays for transcoding + * to/from different format types. The methods not used can simply be + * commented out.

+ * + * There is also a "fast" version of all decode methods that works the + * same way as the normal ones, but har a few demands on the decoded + * input. Normally though, these fast verions should be used if the + * source if the input is known and it hasn't bee tampered with.

+ * + * If you find the code useful or you find a bug, please send me a note + * at base64 @ miginfocom . com. + * + * Licence (BSD): + * ============== + * + * Copyright (c) 2004, Mikael Grev, MiG InfoCom AB. (base64 @ miginfocom + * . com) All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * Neither the name of the MiG InfoCom AB nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * @version 2.2 + * @author Mikael Grev + * Date: 2004-aug-02 + * Time: 11:31:11 + */ + + private static final char[] CA = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".toCharArray(); + private static final int[] IA = new int[256]; + static { + Arrays.fill(IA, -1); + for (int i = 0, iS = CA.length; i < iS; i++) { + IA[CA[i]] = i; + } + IA['='] = 0; + } + + /** + * Encodes a raw byte array into a BASE64 byte[] + * representation i accordance with RFC 2045. + * @param sArr The bytes to convert. If null or length 0 + * an empty array will be returned. + * @return A BASE64 encoded array. Never null. + */ + public final static String toBase64(byte[] sArr) { + // Check special case + int sLen = sArr != null ? sArr.length : 0; + if (sLen == 0) { + return ""; + } + + final boolean lineSep = true; + + int eLen = (sLen / 3) * 3; // Length of even 24-bits. + int cCnt = ((sLen - 1) / 3 + 1) << 2; // Returned character count + int dLen = cCnt + (lineSep ? (cCnt - 1) / 76 << 1 : 0); // Length of returned array + byte[] dArr = new byte[dLen]; + + // Encode even 24-bits + for (int s = 0, d = 0, cc = 0; s < eLen;) { + // Copy next three bytes into lower 24 bits of int, paying + // attension to sign. + int i = (sArr[s++] & 0xff) << 16 | (sArr[s++] & 0xff) << 8 | (sArr[s++] & 0xff); + + // Encode the int into four chars + dArr[d++] = (byte) CA[(i >>> 18) & 0x3f]; + dArr[d++] = (byte) CA[(i >>> 12) & 0x3f]; + dArr[d++] = (byte) CA[(i >>> 6) & 0x3f]; + dArr[d++] = (byte) CA[i & 0x3f]; + + // Add optional line separator + if (lineSep && ++cc == 19 && d < dLen - 2) { + dArr[d++] = '\r'; + dArr[d++] = '\n'; + cc = 0; + } + } + + // Pad and encode last bits if source isn't an even 24 bits. + int left = sLen - eLen; // 0 - 2. + if (left > 0) { + // Prepare the int + int i = ((sArr[eLen] & 0xff) << 10) | (left == 2 ? ((sArr[sLen - 1] & 0xff) << 2) : 0); + + // Set last four chars + dArr[dLen - 4] = (byte) CA[i >> 12]; + dArr[dLen - 3] = (byte) CA[(i >>> 6) & 0x3f]; + dArr[dLen - 2] = left == 2 ? (byte) CA[i & 0x3f] : (byte) '='; + dArr[dLen - 1] = '='; + } + try { + return new String(dArr, "UTF-8"); + } catch (java.io.UnsupportedEncodingException e) { + throw new IllegalArgumentException(e); + } + + } + + /** + * Decodes a BASE64 encoded byte array. All illegal characters will + * be ignored and can handle both arrays with and without line + * separators. + * @param sArr The source array. Length 0 will return an empty + * array. null will throw an exception. + * @return The decoded array of bytes. May be of length 0. Will be + * null if the legal characters (including '=') isn't + * divideable by 4. (I.e. definitely corrupted). + */ + public final static byte[] fromBase64(byte[] sArr) { + // Check special case + int sLen = sArr.length; + + // Count illegal characters (including '\r', '\n') to know what + // size the returned array will be, so we don't have to + // reallocate & copy it later. + int sepCnt = 0; // Number of separator characters. (Actually illegal characters, but that's a bonus...) + for (int i = 0; i < sLen; i++) { + // If input is "pure" (I.e. no line separators or illegal chars) + // base64 this loop can be commented out. + if (IA[sArr[i] & 0xff] < 0) { + sepCnt++; + } + } + + // Check so that legal chars (including '=') are evenly + // divideable by 4 as specified in RFC 2045. + if ((sLen - sepCnt) % 4 != 0) { + return null; + } + + int pad = 0; + for (int i = sLen; i > 1 && IA[sArr[--i] & 0xff] <= 0;) { + if (sArr[i] == '=') { + pad++; + } + } + + int len = ((sLen - sepCnt) * 6 >> 3) - pad; + + byte[] dArr = new byte[len]; // Preallocate byte[] of exact length + + for (int s = 0, d = 0; d < len;) { + // Assemble three bytes into an int from four "valid" characters. + int i = 0; + for (int j = 0; j < 4; j++) { // j only increased if a valid char was found. + int c = IA[sArr[s++] & 0xff]; + if (c >= 0) { + i |= c << (18 - j * 6); + } else { + j--; + } + } + + // Add the bytes + dArr[d++] = (byte) (i >> 16); + if (d < len) { + dArr[d++]= (byte) (i >> 8); + if (d < len) { + dArr[d++] = (byte) i; + } + } + } + + return dArr; + } + }