/* * Jexer - Java Text User Interface * * The MIT License (MIT) * * Copyright (C) 2019 Kevin Lamonte * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * @author Kevin Lamonte [kevin.lamonte@gmail.com] * @version 1 */ package jexer.bits; import java.util.List; import java.util.ArrayList; import java.util.Arrays; /** * StringUtils contains methods to: * * - Convert one or more long lines of strings into justified text * paragraphs. * * - Unescape C0 control codes. * * - Read/write a line of RFC4180 comma-separated values strings to/from a * list of strings. * * - Compute number of visible text cells for a given Unicode codepoint or * string. * * - Convert bytes to and from base-64 encoding. */ public class StringUtils { /** * Left-justify a string into a list of lines. * * @param str the string * @param n the maximum number of characters in a line * @return the list of lines */ public static List left(final String str, final int n) { List result = new ArrayList(); /* * General procedure: * * 1. Split on '\n' into paragraphs. * * 2. Scan each line, noting the position of the last * beginning-of-a-word. * * 3. Chop at the last #2 if the next beginning-of-a-word exceeds * n. * * 4. Return the lines. */ String [] rawLines = str.split("\n"); for (int i = 0; i < rawLines.length; i++) { StringBuilder line = new StringBuilder(); StringBuilder word = new StringBuilder(); boolean inWord = false; for (int j = 0; j < rawLines[i].length(); j++) { char ch = rawLines[i].charAt(j); if ((ch == ' ') || (ch == '\t')) { if (inWord == true) { // We have just transitioned from a word to // whitespace. See if we have enough space to add // the word to the line. if (width(word.toString()) + width(line.toString()) > n) { // This word will exceed the line length. Wrap // at it instead. result.add(line.toString()); line = new StringBuilder(); } if ((word.toString().startsWith(" ")) && (width(line.toString()) == 0) ) { line.append(word.substring(1)); } else { line.append(word); } word = new StringBuilder(); word.append(ch); inWord = false; } else { // We are in the whitespace before another word. Do // nothing. } } else { if (inWord == true) { // We are appending to a word. word.append(ch); } else { // We have transitioned from whitespace to a word. word.append(ch); inWord = true; } } } // for (int j = 0; j < rawLines[i].length(); j++) if (width(word.toString()) + width(line.toString()) > n) { // This word will exceed the line length. Wrap at it // instead. result.add(line.toString()); line = new StringBuilder(); } if ((word.toString().startsWith(" ")) && (width(line.toString()) == 0) ) { line.append(word.substring(1)); } else { line.append(word); } result.add(line.toString()); } // for (int i = 0; i < rawLines.length; i++) { return result; } /** * Right-justify a string into a list of lines. * * @param str the string * @param n the maximum number of characters in a line * @return the list of lines */ public static List right(final String str, final int n) { List result = new ArrayList(); /* * Same as left(), but preceed each line with spaces to make it n * chars long. */ List lines = left(str, n); for (String line: lines) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < n - width(line); i++) { sb.append(' '); } sb.append(line); result.add(sb.toString()); } return result; } /** * Center a string into a list of lines. * * @param str the string * @param n the maximum number of characters in a line * @return the list of lines */ public static List center(final String str, final int n) { List result = new ArrayList(); /* * Same as left(), but preceed/succeed each line with spaces to make * it n chars long. */ List lines = left(str, n); for (String line: lines) { StringBuilder sb = new StringBuilder(); int l = (n - width(line)) / 2; int r = n - width(line) - l; for (int i = 0; i < l; i++) { sb.append(' '); } sb.append(line); for (int i = 0; i < r; i++) { sb.append(' '); } result.add(sb.toString()); } return result; } /** * Fully-justify a string into a list of lines. * * @param str the string * @param n the maximum number of characters in a line * @return the list of lines */ public static List full(final String str, final int n) { List result = new ArrayList(); /* * Same as left(), but insert spaces between words to make each line * n chars long. The "algorithm" here is pretty dumb: it performs a * split on space and then re-inserts multiples of n between words. */ List lines = left(str, n); for (int lineI = 0; lineI < lines.size() - 1; lineI++) { String line = lines.get(lineI); String [] words = line.split(" "); if (words.length > 1) { int charCount = 0; for (int i = 0; i < words.length; i++) { charCount += words[i].length(); } int spaceCount = n - charCount; int q = spaceCount / (words.length - 1); int r = spaceCount % (words.length - 1); StringBuilder sb = new StringBuilder(); for (int i = 0; i < words.length - 1; i++) { sb.append(words[i]); for (int j = 0; j < q; j++) { sb.append(' '); } if (r > 0) { sb.append(' '); r--; } } for (int j = 0; j < r; j++) { sb.append(' '); } sb.append(words[words.length - 1]); result.add(sb.toString()); } else { result.add(line); } } if (lines.size() > 0) { result.add(lines.get(lines.size() - 1)); } return result; } /** * Convert raw strings into escaped strings that be splatted on the * screen. * * @param str the string * @return a string that can be passed into Screen.putStringXY() */ public static String unescape(final String str) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < str.length(); i++) { char ch = str.charAt(i); if ((ch < 0x20) || (ch == 0x7F)) { switch (ch) { case '\b': sb.append("\\b"); continue; case '\f': sb.append("\\f"); continue; case '\n': sb.append("\\n"); continue; case '\r': sb.append("\\r"); continue; case '\t': sb.append("\\t"); continue; case 0x7f: sb.append("^?"); continue; default: sb.append(' '); continue; } } sb.append(ch); } return sb.toString(); } /** * Read a line of RFC4180 comma-separated values (CSV) into a list of * strings. * * @param line the CSV line, with or without without line terminators * @return the list of strings */ public static List fromCsv(final String line) { List result = new ArrayList(); StringBuilder str = new StringBuilder(); boolean quoted = false; boolean fieldQuoted = false; for (int i = 0; i < line.length(); i++) { char ch = line.charAt(i); /* System.err.println("ch '" + ch + "' str '" + str + "' " + " fieldQuoted " + fieldQuoted + " quoted " + quoted); */ if (ch == ',') { if (fieldQuoted && quoted) { // Terminating a quoted field. result.add(str.toString()); str = new StringBuilder(); quoted = false; fieldQuoted = false; } else if (fieldQuoted) { // Still waiting to see the terminating quote for this // field. str.append(ch); } else if (quoted) { // An unmatched double-quote and comma. This should be // an invalid sequence. We will treat it as a quote // terminating the field. str.append('\"'); result.add(str.toString()); str = new StringBuilder(); quoted = false; fieldQuoted = false; } else { // A field separator. result.add(str.toString()); str = new StringBuilder(); quoted = false; fieldQuoted = false; } continue; } if (ch == '\"') { if ((str.length() == 0) && (!fieldQuoted)) { // The opening quote to a quoted field. fieldQuoted = true; } else if (quoted) { // This is a double-quote. str.append('\"'); quoted = false; } else { // This is the beginning of a quote. quoted = true; } continue; } // Normal character, pass it on. str.append(ch); } // Include the final field. result.add(str.toString()); return result; } /** * Write a list of strings to on line of RFC4180 comma-separated values * (CSV). * * @param list the list of strings * @return the CSV line, without any line terminators */ public static String toCsv(final List list) { StringBuilder result = new StringBuilder(); int i = 0; for (String str: list) { if (!str.contains("\"") && !str.contains(",")) { // Just append the string with a comma. result.append(str); } else if (!str.contains("\"") && str.contains(",")) { // Contains commas, but no quotes. Just double-quote it. result.append("\""); result.append(str); result.append("\""); } else if (str.contains("\"")) { // Contains quotes and maybe commas. Double-quote it and // replace quotes inside. result.append("\""); for (int j = 0; j < str.length(); j++) { char ch = str.charAt(j); result.append(ch); if (ch == '\"') { result.append("\""); } } result.append("\""); } if (i < list.size() - 1) { result.append(","); } i++; } return result.toString(); } /** * Determine display width of a Unicode code point. * * @param ch the code point, can be char * @return the number of text cell columns required to display this code * point, one of 0, 1, or 2 */ public static int width(final int ch) { /* * This routine is a modified version of mk_wcwidth() available * at: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c * * The combining characters list has been omitted from this * implementation. Hopefully no users will be impacted. */ // 8-bit control characters: width 0 if (ch == 0) { return 0; } if ((ch < 32) || ((ch >= 0x7f) && (ch < 0xa0))) { return 0; } // All others: either 1 or 2 if ((ch >= 0x1100) && ((ch <= 0x115f) // Hangul Jamo init. consonants || (ch == 0x2329) || (ch == 0x232a) // CJK ... Yi || ((ch >= 0x2e80) && (ch <= 0xa4cf) && (ch != 0x303f)) // Hangul Syllables || ((ch >= 0xac00) && (ch <= 0xd7a3)) // CJK Compatibility Ideographs || ((ch >= 0xf900) && (ch <= 0xfaff)) // Vertical forms || ((ch >= 0xfe10) && (ch <= 0xfe19)) // CJK Compatibility Forms || ((ch >= 0xfe30) && (ch <= 0xfe6f)) // Fullwidth Forms || ((ch >= 0xff00) && (ch <= 0xff60)) || ((ch >= 0xffe0) && (ch <= 0xffe6)) || ((ch >= 0x20000) && (ch <= 0x2fffd)) || ((ch >= 0x30000) && (ch <= 0x3fffd)) // emoji || ((ch >= 0x1f004) && (ch <= 0x1fffd)) ) ) { return 2; } return 1; } /** * Determine display width of a string. This ASSUMES that no characters * are combining. Hopefully no users will be impacted. * * @param str the string * @return the number of text cell columns required to display this string */ public static int width(final String str) { if (str == null) { return 0; } int n = 0; for (int i = 0; i < str.length();) { int ch = str.codePointAt(i); n += width(ch); i += Character.charCount(ch); } return n; } /** * Check if character is in the CJK range. * * @param ch character to check * @return true if this character is in the CJK range */ public static boolean isCjk(final int ch) { return ((ch >= 0x2e80) && (ch <= 0x9fff)); } /** * Check if character is in the emoji range. * * @param ch character to check * @return true if this character is in the emoji range */ public static boolean isEmoji(final int ch) { return ((ch >= 0x1f004) && (ch <= 0x1fffd)); } // ------------------------------------------------------------------------ // Base64 ----------------------------------------------------------------- // ------------------------------------------------------------------------ /* * The Base64 encoder/decoder below is provided to support JDK 1.6 - JDK * 11. It was taken from https://sourceforge.net/projects/migbase64/ * * The following changes were made: * * - Code has been indented and long lines cut to fit within 80 columns. * * - Char, String, and "fast" byte functions removed. byte versions * retained and called toBase64()/fromBase64(). * * - Enclosing braces added to blocks. */ /** * A very fast and memory efficient class to encode and decode to and * from BASE64 in full accordance with RFC 2045.

On Windows XP * sp1 with 1.4.2_04 and later ;), this encoder and decoder is about 10 * times faster on small arrays (10 - 1000 bytes) and 2-3 times as fast * on larger arrays (10000 - 1000000 bytes) compared to * sun.misc.Encoder()/Decoder().

* * On byte arrays the encoder is about 20% faster than Jakarta Commons * Base64 Codec for encode and about 50% faster for decoding large * arrays. This implementation is about twice as fast on very small * arrays (< 30 bytes). If source/destination is a String * this version is about three times as fast due to the fact that the * Commons Codec result has to be recoded to a String from * byte[], which is very expensive.

* * This encode/decode algorithm doesn't create any temporary arrays as * many other codecs do, it only allocates the resulting array. This * produces less garbage and it is possible to handle arrays twice as * large as algorithms that create a temporary array. (E.g. Jakarta * Commons Codec). It is unknown whether Sun's * sun.misc.Encoder()/Decoder() produce temporary arrays but * since performance is quite low it probably does.

* * The encoder produces the same output as the Sun one except that the * Sun's encoder appends a trailing line separator if the last character * isn't a pad. Unclear why but it only adds to the length and is * probably a side effect. Both are in conformance with RFC 2045 * though.
Commons codec seem to always att a trailing line * separator.

* * Note! The encode/decode method pairs (types) come in three * versions with the exact same algorithm and thus a lot of code * redundancy. This is to not create any temporary arrays for transcoding * to/from different format types. The methods not used can simply be * commented out.

* * There is also a "fast" version of all decode methods that works the * same way as the normal ones, but har a few demands on the decoded * input. Normally though, these fast verions should be used if the * source if the input is known and it hasn't bee tampered with.

* * If you find the code useful or you find a bug, please send me a note * at base64 @ miginfocom . com. * * Licence (BSD): * ============== * * Copyright (c) 2004, Mikael Grev, MiG InfoCom AB. (base64 @ miginfocom * . com) All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * Neither the name of the MiG InfoCom AB nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * @version 2.2 * @author Mikael Grev * Date: 2004-aug-02 * Time: 11:31:11 */ private static final char[] CA = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".toCharArray(); private static final int[] IA = new int[256]; static { Arrays.fill(IA, -1); for (int i = 0, iS = CA.length; i < iS; i++) { IA[CA[i]] = i; } IA['='] = 0; } /** * Encodes a raw byte array into a BASE64 byte[] * representation i accordance with RFC 2045. * @param sArr The bytes to convert. If null or length 0 * an empty array will be returned. * @return A BASE64 encoded array. Never null. */ public final static String toBase64(byte[] sArr) { // Check special case int sLen = sArr != null ? sArr.length : 0; if (sLen == 0) { return ""; } final boolean lineSep = true; int eLen = (sLen / 3) * 3; // Length of even 24-bits. int cCnt = ((sLen - 1) / 3 + 1) << 2; // Returned character count int dLen = cCnt + (lineSep ? (cCnt - 1) / 76 << 1 : 0); // Length of returned array byte[] dArr = new byte[dLen]; // Encode even 24-bits for (int s = 0, d = 0, cc = 0; s < eLen;) { // Copy next three bytes into lower 24 bits of int, paying // attension to sign. int i = (sArr[s++] & 0xff) << 16 | (sArr[s++] & 0xff) << 8 | (sArr[s++] & 0xff); // Encode the int into four chars dArr[d++] = (byte) CA[(i >>> 18) & 0x3f]; dArr[d++] = (byte) CA[(i >>> 12) & 0x3f]; dArr[d++] = (byte) CA[(i >>> 6) & 0x3f]; dArr[d++] = (byte) CA[i & 0x3f]; // Add optional line separator if (lineSep && ++cc == 19 && d < dLen - 2) { dArr[d++] = '\r'; dArr[d++] = '\n'; cc = 0; } } // Pad and encode last bits if source isn't an even 24 bits. int left = sLen - eLen; // 0 - 2. if (left > 0) { // Prepare the int int i = ((sArr[eLen] & 0xff) << 10) | (left == 2 ? ((sArr[sLen - 1] & 0xff) << 2) : 0); // Set last four chars dArr[dLen - 4] = (byte) CA[i >> 12]; dArr[dLen - 3] = (byte) CA[(i >>> 6) & 0x3f]; dArr[dLen - 2] = left == 2 ? (byte) CA[i & 0x3f] : (byte) '='; dArr[dLen - 1] = '='; } try { return new String(dArr, "UTF-8"); } catch (java.io.UnsupportedEncodingException e) { throw new IllegalArgumentException(e); } } /** * Decodes a BASE64 encoded byte array. All illegal characters will * be ignored and can handle both arrays with and without line * separators. * @param sArr The source array. Length 0 will return an empty * array. null will throw an exception. * @return The decoded array of bytes. May be of length 0. Will be * null if the legal characters (including '=') isn't * divideable by 4. (I.e. definitely corrupted). */ public final static byte[] fromBase64(byte[] sArr) { // Check special case int sLen = sArr.length; // Count illegal characters (including '\r', '\n') to know what // size the returned array will be, so we don't have to // reallocate & copy it later. int sepCnt = 0; // Number of separator characters. (Actually illegal characters, but that's a bonus...) for (int i = 0; i < sLen; i++) { // If input is "pure" (I.e. no line separators or illegal chars) // base64 this loop can be commented out. if (IA[sArr[i] & 0xff] < 0) { sepCnt++; } } // Check so that legal chars (including '=') are evenly // divideable by 4 as specified in RFC 2045. if ((sLen - sepCnt) % 4 != 0) { return null; } int pad = 0; for (int i = sLen; i > 1 && IA[sArr[--i] & 0xff] <= 0;) { if (sArr[i] == '=') { pad++; } } int len = ((sLen - sepCnt) * 6 >> 3) - pad; byte[] dArr = new byte[len]; // Preallocate byte[] of exact length for (int s = 0, d = 0; d < len;) { // Assemble three bytes into an int from four "valid" characters. int i = 0; for (int j = 0; j < 4; j++) { // j only increased if a valid char was found. int c = IA[sArr[s++] & 0xff]; if (c >= 0) { i |= c << (18 - j * 6); } else { j--; } } // Add the bytes dArr[d++] = (byte) (i >> 16); if (d < len) { dArr[d++]= (byte) (i >> 8); if (d < len) { dArr[d++] = (byte) i; } } } return dArr; } }