X-Git-Url: http://git.nikiroo.be/?a=blobdiff_plain;f=src%2Fjexer%2Fbits%2FStringUtils.java;h=d33f71f4e0031710e52ed008fefa0f37f28883c4;hb=7a455971fed716123933d0f685a0d6eebcf3282b;hp=2a4fc1dc152b4cbb3dc1ba5fb5dc49b4d3ca940c;hpb=c334c9a224670c537a5369e8e4e3307001a03acb;p=nikiroo-utils.git diff --git a/src/jexer/bits/StringUtils.java b/src/jexer/bits/StringUtils.java deleted file mode 100644 index 2a4fc1d..0000000 --- a/src/jexer/bits/StringUtils.java +++ /dev/null @@ -1,736 +0,0 @@ -/* - * Jexer - Java Text User Interface - * - * The MIT License (MIT) - * - * Copyright (C) 2019 Kevin Lamonte - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * @author Kevin Lamonte [kevin.lamonte@gmail.com] - * @version 1 - */ -package jexer.bits; - -import java.util.List; -import java.util.ArrayList; -import java.util.Arrays; - -/** - * StringUtils contains methods to: - * - * - Convert one or more long lines of strings into justified text - * paragraphs. - * - * - Unescape C0 control codes. - * - * - Read/write a line of RFC4180 comma-separated values strings to/from a - * list of strings. - */ -public class StringUtils { - - /** - * Left-justify a string into a list of lines. - * - * @param str the string - * @param n the maximum number of characters in a line - * @return the list of lines - */ - public static List left(final String str, final int n) { - List result = new ArrayList(); - - /* - * General procedure: - * - * 1. Split on '\n' into paragraphs. - * - * 2. Scan each line, noting the position of the last - * beginning-of-a-word. - * - * 3. Chop at the last #2 if the next beginning-of-a-word exceeds - * n. - * - * 4. Return the lines. - */ - - String [] rawLines = str.split("\n"); - for (int i = 0; i < rawLines.length; i++) { - StringBuilder line = new StringBuilder(); - StringBuilder word = new StringBuilder(); - boolean inWord = false; - for (int j = 0; j < rawLines[i].length(); j++) { - char ch = rawLines[i].charAt(j); - if ((ch == ' ') || (ch == '\t')) { - if (inWord == true) { - // We have just transitioned from a word to - // whitespace. See if we have enough space to add - // the word to the line. - if (width(word.toString()) + width(line.toString()) > n) { - // This word will exceed the line length. Wrap - // at it instead. - result.add(line.toString()); - line = new StringBuilder(); - } - if ((word.toString().startsWith(" ")) - && (width(line.toString()) == 0) - ) { - line.append(word.substring(1)); - } else { - line.append(word); - } - word = new StringBuilder(); - word.append(ch); - inWord = false; - } else { - // We are in the whitespace before another word. Do - // nothing. - } - } else { - if (inWord == true) { - // We are appending to a word. - word.append(ch); - } else { - // We have transitioned from whitespace to a word. - word.append(ch); - inWord = true; - } - } - } // for (int j = 0; j < rawLines[i].length(); j++) - - if (width(word.toString()) + width(line.toString()) > n) { - // This word will exceed the line length. Wrap at it - // instead. - result.add(line.toString()); - line = new StringBuilder(); - } - if ((word.toString().startsWith(" ")) - && (width(line.toString()) == 0) - ) { - line.append(word.substring(1)); - } else { - line.append(word); - } - result.add(line.toString()); - } // for (int i = 0; i < rawLines.length; i++) { - - return result; - } - - /** - * Right-justify a string into a list of lines. - * - * @param str the string - * @param n the maximum number of characters in a line - * @return the list of lines - */ - public static List right(final String str, final int n) { - List result = new ArrayList(); - - /* - * Same as left(), but preceed each line with spaces to make it n - * chars long. - */ - List lines = left(str, n); - for (String line: lines) { - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < n - width(line); i++) { - sb.append(' '); - } - sb.append(line); - result.add(sb.toString()); - } - - return result; - } - - /** - * Center a string into a list of lines. - * - * @param str the string - * @param n the maximum number of characters in a line - * @return the list of lines - */ - public static List center(final String str, final int n) { - List result = new ArrayList(); - - /* - * Same as left(), but preceed/succeed each line with spaces to make - * it n chars long. - */ - List lines = left(str, n); - for (String line: lines) { - StringBuilder sb = new StringBuilder(); - int l = (n - width(line)) / 2; - int r = n - width(line) - l; - for (int i = 0; i < l; i++) { - sb.append(' '); - } - sb.append(line); - for (int i = 0; i < r; i++) { - sb.append(' '); - } - result.add(sb.toString()); - } - - return result; - } - - /** - * Fully-justify a string into a list of lines. - * - * @param str the string - * @param n the maximum number of characters in a line - * @return the list of lines - */ - public static List full(final String str, final int n) { - List result = new ArrayList(); - - /* - * Same as left(), but insert spaces between words to make each line - * n chars long. The "algorithm" here is pretty dumb: it performs a - * split on space and then re-inserts multiples of n between words. - */ - List lines = left(str, n); - for (int lineI = 0; lineI < lines.size() - 1; lineI++) { - String line = lines.get(lineI); - String [] words = line.split(" "); - if (words.length > 1) { - int charCount = 0; - for (int i = 0; i < words.length; i++) { - charCount += words[i].length(); - } - int spaceCount = n - charCount; - int q = spaceCount / (words.length - 1); - int r = spaceCount % (words.length - 1); - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < words.length - 1; i++) { - sb.append(words[i]); - for (int j = 0; j < q; j++) { - sb.append(' '); - } - if (r > 0) { - sb.append(' '); - r--; - } - } - for (int j = 0; j < r; j++) { - sb.append(' '); - } - sb.append(words[words.length - 1]); - result.add(sb.toString()); - } else { - result.add(line); - } - } - if (lines.size() > 0) { - result.add(lines.get(lines.size() - 1)); - } - - return result; - } - - /** - * Convert raw strings into escaped strings that be splatted on the - * screen. - * - * @param str the string - * @return a string that can be passed into Screen.putStringXY() - */ - public static String unescape(final String str) { - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < str.length(); i++) { - char ch = str.charAt(i); - if ((ch < 0x20) || (ch == 0x7F)) { - switch (ch) { - case '\b': - sb.append("\\b"); - continue; - case '\f': - sb.append("\\f"); - continue; - case '\n': - sb.append("\\n"); - continue; - case '\r': - sb.append("\\r"); - continue; - case '\t': - sb.append("\\t"); - continue; - case 0x7f: - sb.append("^?"); - continue; - default: - sb.append(' '); - continue; - } - } - sb.append(ch); - } - return sb.toString(); - } - - /** - * Read a line of RFC4180 comma-separated values (CSV) into a list of - * strings. - * - * @param line the CSV line, with or without without line terminators - * @return the list of strings - */ - public static List fromCsv(final String line) { - List result = new ArrayList(); - - StringBuilder str = new StringBuilder(); - boolean quoted = false; - boolean fieldQuoted = false; - - for (int i = 0; i < line.length(); i++) { - char ch = line.charAt(i); - - /* - System.err.println("ch '" + ch + "' str '" + str + "' " + - " fieldQuoted " + fieldQuoted + " quoted " + quoted); - */ - - if (ch == ',') { - if (fieldQuoted && quoted) { - // Terminating a quoted field. - result.add(str.toString()); - str = new StringBuilder(); - quoted = false; - fieldQuoted = false; - } else if (fieldQuoted) { - // Still waiting to see the terminating quote for this - // field. - str.append(ch); - } else if (quoted) { - // An unmatched double-quote and comma. This should be - // an invalid sequence. We will treat it as a quote - // terminating the field. - str.append('\"'); - result.add(str.toString()); - str = new StringBuilder(); - quoted = false; - fieldQuoted = false; - } else { - // A field separator. - result.add(str.toString()); - str = new StringBuilder(); - quoted = false; - fieldQuoted = false; - } - continue; - } - - if (ch == '\"') { - if ((str.length() == 0) && (!fieldQuoted)) { - // The opening quote to a quoted field. - fieldQuoted = true; - } else if (quoted) { - // This is a double-quote. - str.append('\"'); - quoted = false; - } else { - // This is the beginning of a quote. - quoted = true; - } - continue; - } - - // Normal character, pass it on. - str.append(ch); - } - - // Include the final field. - result.add(str.toString()); - - return result; - } - - /** - * Write a list of strings to on line of RFC4180 comma-separated values - * (CSV). - * - * @param list the list of strings - * @return the CSV line, without any line terminators - */ - public static String toCsv(final List list) { - StringBuilder result = new StringBuilder(); - int i = 0; - for (String str: list) { - - if (!str.contains("\"") && !str.contains(",")) { - // Just append the string with a comma. - result.append(str); - } else if (!str.contains("\"") && str.contains(",")) { - // Contains commas, but no quotes. Just double-quote it. - result.append("\""); - result.append(str); - result.append("\""); - } else if (str.contains("\"")) { - // Contains quotes and maybe commas. Double-quote it and - // replace quotes inside. - result.append("\""); - for (int j = 0; j < str.length(); j++) { - char ch = str.charAt(j); - result.append(ch); - if (ch == '\"') { - result.append("\""); - } - } - result.append("\""); - } - - if (i < list.size() - 1) { - result.append(","); - } - i++; - } - return result.toString(); - } - - /** - * Determine display width of a Unicode code point. - * - * @param ch the code point, can be char - * @return the number of text cell columns required to display this code - * point, one of 0, 1, or 2 - */ - public static int width(final int ch) { - /* - * This routine is a modified version of mk_wcwidth() available - * at: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c - * - * The combining characters list has been omitted from this - * implementation. Hopefully no users will be impacted. - */ - - // 8-bit control characters: width 0 - if (ch == 0) { - return 0; - } - if ((ch < 32) || ((ch >= 0x7f) && (ch < 0xa0))) { - return 0; - } - - // All others: either 1 or 2 - if ((ch >= 0x1100) - && ((ch <= 0x115f) - // Hangul Jamo init. consonants - || (ch == 0x2329) - || (ch == 0x232a) - // CJK ... Yi - || ((ch >= 0x2e80) && (ch <= 0xa4cf) && (ch != 0x303f)) - // Hangul Syllables - || ((ch >= 0xac00) && (ch <= 0xd7a3)) - // CJK Compatibility Ideographs - || ((ch >= 0xf900) && (ch <= 0xfaff)) - // Vertical forms - || ((ch >= 0xfe10) && (ch <= 0xfe19)) - // CJK Compatibility Forms - || ((ch >= 0xfe30) && (ch <= 0xfe6f)) - // Fullwidth Forms - || ((ch >= 0xff00) && (ch <= 0xff60)) - || ((ch >= 0xffe0) && (ch <= 0xffe6)) - || ((ch >= 0x20000) && (ch <= 0x2fffd)) - || ((ch >= 0x30000) && (ch <= 0x3fffd)) - // emoji - || ((ch >= 0x1f004) && (ch <= 0x1fffd)) - ) - ) { - return 2; - } - return 1; - } - - /** - * Determine display width of a string. This ASSUMES that no characters - * are combining. Hopefully no users will be impacted. - * - * @param str the string - * @return the number of text cell columns required to display this string - */ - public static int width(final String str) { - int n = 0; - for (int i = 0; i < str.length();) { - int ch = str.codePointAt(i); - n += width(ch); - i += Character.charCount(ch); - } - return n; - } - - /** - * Check if character is in the CJK range. - * - * @param ch character to check - * @return true if this character is in the CJK range - */ - public static boolean isCjk(final int ch) { - return ((ch >= 0x2e80) && (ch <= 0x9fff)); - } - - /** - * Check if character is in the emoji range. - * - * @param ch character to check - * @return true if this character is in the emoji range - */ - public static boolean isEmoji(final int ch) { - return ((ch >= 0x1f004) && (ch <= 0x1fffd)); - } - - // ------------------------------------------------------------------------ - // Base64 ----------------------------------------------------------------- - // ------------------------------------------------------------------------ - - /* - * The Base64 encoder/decoder below is provided to support JDK 1.6 - JDK - * 11. It was taken from https://sourceforge.net/projects/migbase64/ - * - * The following changes were made: - * - * - Code has been indented and long lines cut to fit within 80 columns. - * - * - Char, String, and "fast" byte functions removed. byte versions - * retained and called toBase64()/fromBase64(). - * - * - Enclosing braces added to blocks. - */ - - /** - * A very fast and memory efficient class to encode and decode to and - * from BASE64 in full accordance with RFC 2045.

On Windows XP - * sp1 with 1.4.2_04 and later ;), this encoder and decoder is about 10 - * times faster on small arrays (10 - 1000 bytes) and 2-3 times as fast - * on larger arrays (10000 - 1000000 bytes) compared to - * sun.misc.Encoder()/Decoder().

- * - * On byte arrays the encoder is about 20% faster than Jakarta Commons - * Base64 Codec for encode and about 50% faster for decoding large - * arrays. This implementation is about twice as fast on very small - * arrays (< 30 bytes). If source/destination is a String - * this version is about three times as fast due to the fact that the - * Commons Codec result has to be recoded to a String from - * byte[], which is very expensive.

- * - * This encode/decode algorithm doesn't create any temporary arrays as - * many other codecs do, it only allocates the resulting array. This - * produces less garbage and it is possible to handle arrays twice as - * large as algorithms that create a temporary array. (E.g. Jakarta - * Commons Codec). It is unknown whether Sun's - * sun.misc.Encoder()/Decoder() produce temporary arrays but - * since performance is quite low it probably does.

- * - * The encoder produces the same output as the Sun one except that the - * Sun's encoder appends a trailing line separator if the last character - * isn't a pad. Unclear why but it only adds to the length and is - * probably a side effect. Both are in conformance with RFC 2045 - * though.
Commons codec seem to always att a trailing line - * separator.

- * - * Note! The encode/decode method pairs (types) come in three - * versions with the exact same algorithm and thus a lot of code - * redundancy. This is to not create any temporary arrays for transcoding - * to/from different format types. The methods not used can simply be - * commented out.

- * - * There is also a "fast" version of all decode methods that works the - * same way as the normal ones, but har a few demands on the decoded - * input. Normally though, these fast verions should be used if the - * source if the input is known and it hasn't bee tampered with.

- * - * If you find the code useful or you find a bug, please send me a note - * at base64 @ miginfocom . com. - * - * Licence (BSD): - * ============== - * - * Copyright (c) 2004, Mikael Grev, MiG InfoCom AB. (base64 @ miginfocom - * . com) All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * Neither the name of the MiG InfoCom AB nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * @version 2.2 - * @author Mikael Grev - * Date: 2004-aug-02 - * Time: 11:31:11 - */ - - private static final char[] CA = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".toCharArray(); - private static final int[] IA = new int[256]; - static { - Arrays.fill(IA, -1); - for (int i = 0, iS = CA.length; i < iS; i++) { - IA[CA[i]] = i; - } - IA['='] = 0; - } - - /** - * Encodes a raw byte array into a BASE64 byte[] - * representation i accordance with RFC 2045. - * @param sArr The bytes to convert. If null or length 0 - * an empty array will be returned. - * @return A BASE64 encoded array. Never null. - */ - public final static String toBase64(byte[] sArr) { - // Check special case - int sLen = sArr != null ? sArr.length : 0; - if (sLen == 0) { - return ""; - } - - final boolean lineSep = true; - - int eLen = (sLen / 3) * 3; // Length of even 24-bits. - int cCnt = ((sLen - 1) / 3 + 1) << 2; // Returned character count - int dLen = cCnt + (lineSep ? (cCnt - 1) / 76 << 1 : 0); // Length of returned array - byte[] dArr = new byte[dLen]; - - // Encode even 24-bits - for (int s = 0, d = 0, cc = 0; s < eLen;) { - // Copy next three bytes into lower 24 bits of int, paying - // attension to sign. - int i = (sArr[s++] & 0xff) << 16 | (sArr[s++] & 0xff) << 8 | (sArr[s++] & 0xff); - - // Encode the int into four chars - dArr[d++] = (byte) CA[(i >>> 18) & 0x3f]; - dArr[d++] = (byte) CA[(i >>> 12) & 0x3f]; - dArr[d++] = (byte) CA[(i >>> 6) & 0x3f]; - dArr[d++] = (byte) CA[i & 0x3f]; - - // Add optional line separator - if (lineSep && ++cc == 19 && d < dLen - 2) { - dArr[d++] = '\r'; - dArr[d++] = '\n'; - cc = 0; - } - } - - // Pad and encode last bits if source isn't an even 24 bits. - int left = sLen - eLen; // 0 - 2. - if (left > 0) { - // Prepare the int - int i = ((sArr[eLen] & 0xff) << 10) | (left == 2 ? ((sArr[sLen - 1] & 0xff) << 2) : 0); - - // Set last four chars - dArr[dLen - 4] = (byte) CA[i >> 12]; - dArr[dLen - 3] = (byte) CA[(i >>> 6) & 0x3f]; - dArr[dLen - 2] = left == 2 ? (byte) CA[i & 0x3f] : (byte) '='; - dArr[dLen - 1] = '='; - } - try { - return new String(dArr, "UTF-8"); - } catch (java.io.UnsupportedEncodingException e) { - throw new IllegalArgumentException(e); - } - - } - - /** - * Decodes a BASE64 encoded byte array. All illegal characters will - * be ignored and can handle both arrays with and without line - * separators. - * @param sArr The source array. Length 0 will return an empty - * array. null will throw an exception. - * @return The decoded array of bytes. May be of length 0. Will be - * null if the legal characters (including '=') isn't - * divideable by 4. (I.e. definitely corrupted). - */ - public final static byte[] fromBase64(byte[] sArr) { - // Check special case - int sLen = sArr.length; - - // Count illegal characters (including '\r', '\n') to know what - // size the returned array will be, so we don't have to - // reallocate & copy it later. - int sepCnt = 0; // Number of separator characters. (Actually illegal characters, but that's a bonus...) - for (int i = 0; i < sLen; i++) { - // If input is "pure" (I.e. no line separators or illegal chars) - // base64 this loop can be commented out. - if (IA[sArr[i] & 0xff] < 0) { - sepCnt++; - } - } - - // Check so that legal chars (including '=') are evenly - // divideable by 4 as specified in RFC 2045. - if ((sLen - sepCnt) % 4 != 0) { - return null; - } - - int pad = 0; - for (int i = sLen; i > 1 && IA[sArr[--i] & 0xff] <= 0;) { - if (sArr[i] == '=') { - pad++; - } - } - - int len = ((sLen - sepCnt) * 6 >> 3) - pad; - - byte[] dArr = new byte[len]; // Preallocate byte[] of exact length - - for (int s = 0, d = 0; d < len;) { - // Assemble three bytes into an int from four "valid" characters. - int i = 0; - for (int j = 0; j < 4; j++) { // j only increased if a valid char was found. - int c = IA[sArr[s++] & 0xff]; - if (c >= 0) { - i |= c << (18 - j * 6); - } else { - j--; - } - } - - // Add the bytes - dArr[d++] = (byte) (i >> 16); - if (d < len) { - dArr[d++]= (byte) (i >> 8); - if (d < len) { - dArr[d++] = (byte) i; - } - } - } - - return dArr; - } - -}