+++ /dev/null
-/*
- * Jexer - Java Text User Interface
- *
- * The MIT License (MIT)
- *
- * Copyright (C) 2019 Kevin Lamonte
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * @author Kevin Lamonte [kevin.lamonte@gmail.com]
- * @version 1
- */
-package jexer.bits;
-
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Arrays;
-
-/**
- * StringUtils contains methods to:
- *
- * - Convert one or more long lines of strings into justified text
- * paragraphs.
- *
- * - Unescape C0 control codes.
- *
- * - Read/write a line of RFC4180 comma-separated values strings to/from a
- * list of strings.
- */
-public class StringUtils {
-
- /**
- * Left-justify a string into a list of lines.
- *
- * @param str the string
- * @param n the maximum number of characters in a line
- * @return the list of lines
- */
- public static List<String> left(final String str, final int n) {
- List<String> result = new ArrayList<String>();
-
- /*
- * General procedure:
- *
- * 1. Split on '\n' into paragraphs.
- *
- * 2. Scan each line, noting the position of the last
- * beginning-of-a-word.
- *
- * 3. Chop at the last #2 if the next beginning-of-a-word exceeds
- * n.
- *
- * 4. Return the lines.
- */
-
- String [] rawLines = str.split("\n");
- for (int i = 0; i < rawLines.length; i++) {
- StringBuilder line = new StringBuilder();
- StringBuilder word = new StringBuilder();
- boolean inWord = false;
- for (int j = 0; j < rawLines[i].length(); j++) {
- char ch = rawLines[i].charAt(j);
- if ((ch == ' ') || (ch == '\t')) {
- if (inWord == true) {
- // We have just transitioned from a word to
- // whitespace. See if we have enough space to add
- // the word to the line.
- if (width(word.toString()) + width(line.toString()) > n) {
- // This word will exceed the line length. Wrap
- // at it instead.
- result.add(line.toString());
- line = new StringBuilder();
- }
- if ((word.toString().startsWith(" "))
- && (width(line.toString()) == 0)
- ) {
- line.append(word.substring(1));
- } else {
- line.append(word);
- }
- word = new StringBuilder();
- word.append(ch);
- inWord = false;
- } else {
- // We are in the whitespace before another word. Do
- // nothing.
- }
- } else {
- if (inWord == true) {
- // We are appending to a word.
- word.append(ch);
- } else {
- // We have transitioned from whitespace to a word.
- word.append(ch);
- inWord = true;
- }
- }
- } // for (int j = 0; j < rawLines[i].length(); j++)
-
- if (width(word.toString()) + width(line.toString()) > n) {
- // This word will exceed the line length. Wrap at it
- // instead.
- result.add(line.toString());
- line = new StringBuilder();
- }
- if ((word.toString().startsWith(" "))
- && (width(line.toString()) == 0)
- ) {
- line.append(word.substring(1));
- } else {
- line.append(word);
- }
- result.add(line.toString());
- } // for (int i = 0; i < rawLines.length; i++) {
-
- return result;
- }
-
- /**
- * Right-justify a string into a list of lines.
- *
- * @param str the string
- * @param n the maximum number of characters in a line
- * @return the list of lines
- */
- public static List<String> right(final String str, final int n) {
- List<String> result = new ArrayList<String>();
-
- /*
- * Same as left(), but preceed each line with spaces to make it n
- * chars long.
- */
- List<String> lines = left(str, n);
- for (String line: lines) {
- StringBuilder sb = new StringBuilder();
- for (int i = 0; i < n - width(line); i++) {
- sb.append(' ');
- }
- sb.append(line);
- result.add(sb.toString());
- }
-
- return result;
- }
-
- /**
- * Center a string into a list of lines.
- *
- * @param str the string
- * @param n the maximum number of characters in a line
- * @return the list of lines
- */
- public static List<String> center(final String str, final int n) {
- List<String> result = new ArrayList<String>();
-
- /*
- * Same as left(), but preceed/succeed each line with spaces to make
- * it n chars long.
- */
- List<String> lines = left(str, n);
- for (String line: lines) {
- StringBuilder sb = new StringBuilder();
- int l = (n - width(line)) / 2;
- int r = n - width(line) - l;
- for (int i = 0; i < l; i++) {
- sb.append(' ');
- }
- sb.append(line);
- for (int i = 0; i < r; i++) {
- sb.append(' ');
- }
- result.add(sb.toString());
- }
-
- return result;
- }
-
- /**
- * Fully-justify a string into a list of lines.
- *
- * @param str the string
- * @param n the maximum number of characters in a line
- * @return the list of lines
- */
- public static List<String> full(final String str, final int n) {
- List<String> result = new ArrayList<String>();
-
- /*
- * Same as left(), but insert spaces between words to make each line
- * n chars long. The "algorithm" here is pretty dumb: it performs a
- * split on space and then re-inserts multiples of n between words.
- */
- List<String> lines = left(str, n);
- for (int lineI = 0; lineI < lines.size() - 1; lineI++) {
- String line = lines.get(lineI);
- String [] words = line.split(" ");
- if (words.length > 1) {
- int charCount = 0;
- for (int i = 0; i < words.length; i++) {
- charCount += words[i].length();
- }
- int spaceCount = n - charCount;
- int q = spaceCount / (words.length - 1);
- int r = spaceCount % (words.length - 1);
- StringBuilder sb = new StringBuilder();
- for (int i = 0; i < words.length - 1; i++) {
- sb.append(words[i]);
- for (int j = 0; j < q; j++) {
- sb.append(' ');
- }
- if (r > 0) {
- sb.append(' ');
- r--;
- }
- }
- for (int j = 0; j < r; j++) {
- sb.append(' ');
- }
- sb.append(words[words.length - 1]);
- result.add(sb.toString());
- } else {
- result.add(line);
- }
- }
- if (lines.size() > 0) {
- result.add(lines.get(lines.size() - 1));
- }
-
- return result;
- }
-
- /**
- * Convert raw strings into escaped strings that be splatted on the
- * screen.
- *
- * @param str the string
- * @return a string that can be passed into Screen.putStringXY()
- */
- public static String unescape(final String str) {
- StringBuilder sb = new StringBuilder();
- for (int i = 0; i < str.length(); i++) {
- char ch = str.charAt(i);
- if ((ch < 0x20) || (ch == 0x7F)) {
- switch (ch) {
- case '\b':
- sb.append("\\b");
- continue;
- case '\f':
- sb.append("\\f");
- continue;
- case '\n':
- sb.append("\\n");
- continue;
- case '\r':
- sb.append("\\r");
- continue;
- case '\t':
- sb.append("\\t");
- continue;
- case 0x7f:
- sb.append("^?");
- continue;
- default:
- sb.append(' ');
- continue;
- }
- }
- sb.append(ch);
- }
- return sb.toString();
- }
-
- /**
- * Read a line of RFC4180 comma-separated values (CSV) into a list of
- * strings.
- *
- * @param line the CSV line, with or without without line terminators
- * @return the list of strings
- */
- public static List<String> fromCsv(final String line) {
- List<String> result = new ArrayList<String>();
-
- StringBuilder str = new StringBuilder();
- boolean quoted = false;
- boolean fieldQuoted = false;
-
- for (int i = 0; i < line.length(); i++) {
- char ch = line.charAt(i);
-
- /*
- System.err.println("ch '" + ch + "' str '" + str + "' " +
- " fieldQuoted " + fieldQuoted + " quoted " + quoted);
- */
-
- if (ch == ',') {
- if (fieldQuoted && quoted) {
- // Terminating a quoted field.
- result.add(str.toString());
- str = new StringBuilder();
- quoted = false;
- fieldQuoted = false;
- } else if (fieldQuoted) {
- // Still waiting to see the terminating quote for this
- // field.
- str.append(ch);
- } else if (quoted) {
- // An unmatched double-quote and comma. This should be
- // an invalid sequence. We will treat it as a quote
- // terminating the field.
- str.append('\"');
- result.add(str.toString());
- str = new StringBuilder();
- quoted = false;
- fieldQuoted = false;
- } else {
- // A field separator.
- result.add(str.toString());
- str = new StringBuilder();
- quoted = false;
- fieldQuoted = false;
- }
- continue;
- }
-
- if (ch == '\"') {
- if ((str.length() == 0) && (!fieldQuoted)) {
- // The opening quote to a quoted field.
- fieldQuoted = true;
- } else if (quoted) {
- // This is a double-quote.
- str.append('\"');
- quoted = false;
- } else {
- // This is the beginning of a quote.
- quoted = true;
- }
- continue;
- }
-
- // Normal character, pass it on.
- str.append(ch);
- }
-
- // Include the final field.
- result.add(str.toString());
-
- return result;
- }
-
- /**
- * Write a list of strings to on line of RFC4180 comma-separated values
- * (CSV).
- *
- * @param list the list of strings
- * @return the CSV line, without any line terminators
- */
- public static String toCsv(final List<String> list) {
- StringBuilder result = new StringBuilder();
- int i = 0;
- for (String str: list) {
-
- if (!str.contains("\"") && !str.contains(",")) {
- // Just append the string with a comma.
- result.append(str);
- } else if (!str.contains("\"") && str.contains(",")) {
- // Contains commas, but no quotes. Just double-quote it.
- result.append("\"");
- result.append(str);
- result.append("\"");
- } else if (str.contains("\"")) {
- // Contains quotes and maybe commas. Double-quote it and
- // replace quotes inside.
- result.append("\"");
- for (int j = 0; j < str.length(); j++) {
- char ch = str.charAt(j);
- result.append(ch);
- if (ch == '\"') {
- result.append("\"");
- }
- }
- result.append("\"");
- }
-
- if (i < list.size() - 1) {
- result.append(",");
- }
- i++;
- }
- return result.toString();
- }
-
- /**
- * Determine display width of a Unicode code point.
- *
- * @param ch the code point, can be char
- * @return the number of text cell columns required to display this code
- * point, one of 0, 1, or 2
- */
- public static int width(final int ch) {
- /*
- * This routine is a modified version of mk_wcwidth() available
- * at: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
- *
- * The combining characters list has been omitted from this
- * implementation. Hopefully no users will be impacted.
- */
-
- // 8-bit control characters: width 0
- if (ch == 0) {
- return 0;
- }
- if ((ch < 32) || ((ch >= 0x7f) && (ch < 0xa0))) {
- return 0;
- }
-
- // All others: either 1 or 2
- if ((ch >= 0x1100)
- && ((ch <= 0x115f)
- // Hangul Jamo init. consonants
- || (ch == 0x2329)
- || (ch == 0x232a)
- // CJK ... Yi
- || ((ch >= 0x2e80) && (ch <= 0xa4cf) && (ch != 0x303f))
- // Hangul Syllables
- || ((ch >= 0xac00) && (ch <= 0xd7a3))
- // CJK Compatibility Ideographs
- || ((ch >= 0xf900) && (ch <= 0xfaff))
- // Vertical forms
- || ((ch >= 0xfe10) && (ch <= 0xfe19))
- // CJK Compatibility Forms
- || ((ch >= 0xfe30) && (ch <= 0xfe6f))
- // Fullwidth Forms
- || ((ch >= 0xff00) && (ch <= 0xff60))
- || ((ch >= 0xffe0) && (ch <= 0xffe6))
- || ((ch >= 0x20000) && (ch <= 0x2fffd))
- || ((ch >= 0x30000) && (ch <= 0x3fffd))
- // emoji
- || ((ch >= 0x1f004) && (ch <= 0x1fffd))
- )
- ) {
- return 2;
- }
- return 1;
- }
-
- /**
- * Determine display width of a string. This ASSUMES that no characters
- * are combining. Hopefully no users will be impacted.
- *
- * @param str the string
- * @return the number of text cell columns required to display this string
- */
- public static int width(final String str) {
- int n = 0;
- for (int i = 0; i < str.length();) {
- int ch = str.codePointAt(i);
- n += width(ch);
- i += Character.charCount(ch);
- }
- return n;
- }
-
- /**
- * Check if character is in the CJK range.
- *
- * @param ch character to check
- * @return true if this character is in the CJK range
- */
- public static boolean isCjk(final int ch) {
- return ((ch >= 0x2e80) && (ch <= 0x9fff));
- }
-
- /**
- * Check if character is in the emoji range.
- *
- * @param ch character to check
- * @return true if this character is in the emoji range
- */
- public static boolean isEmoji(final int ch) {
- return ((ch >= 0x1f004) && (ch <= 0x1fffd));
- }
-
- // ------------------------------------------------------------------------
- // Base64 -----------------------------------------------------------------
- // ------------------------------------------------------------------------
-
- /*
- * The Base64 encoder/decoder below is provided to support JDK 1.6 - JDK
- * 11. It was taken from https://sourceforge.net/projects/migbase64/
- *
- * The following changes were made:
- *
- * - Code has been indented and long lines cut to fit within 80 columns.
- *
- * - Char, String, and "fast" byte functions removed. byte versions
- * retained and called toBase64()/fromBase64().
- *
- * - Enclosing braces added to blocks.
- */
-
- /**
- * A very fast and memory efficient class to encode and decode to and
- * from BASE64 in full accordance with RFC 2045.<br><br> On Windows XP
- * sp1 with 1.4.2_04 and later ;), this encoder and decoder is about 10
- * times faster on small arrays (10 - 1000 bytes) and 2-3 times as fast
- * on larger arrays (10000 - 1000000 bytes) compared to
- * <code>sun.misc.Encoder()/Decoder()</code>.<br><br>
- *
- * On byte arrays the encoder is about 20% faster than Jakarta Commons
- * Base64 Codec for encode and about 50% faster for decoding large
- * arrays. This implementation is about twice as fast on very small
- * arrays (< 30 bytes). If source/destination is a <code>String</code>
- * this version is about three times as fast due to the fact that the
- * Commons Codec result has to be recoded to a <code>String</code> from
- * <code>byte[]</code>, which is very expensive.<br><br>
- *
- * This encode/decode algorithm doesn't create any temporary arrays as
- * many other codecs do, it only allocates the resulting array. This
- * produces less garbage and it is possible to handle arrays twice as
- * large as algorithms that create a temporary array. (E.g. Jakarta
- * Commons Codec). It is unknown whether Sun's
- * <code>sun.misc.Encoder()/Decoder()</code> produce temporary arrays but
- * since performance is quite low it probably does.<br><br>
- *
- * The encoder produces the same output as the Sun one except that the
- * Sun's encoder appends a trailing line separator if the last character
- * isn't a pad. Unclear why but it only adds to the length and is
- * probably a side effect. Both are in conformance with RFC 2045
- * though.<br> Commons codec seem to always att a trailing line
- * separator.<br><br>
- *
- * <b>Note!</b> The encode/decode method pairs (types) come in three
- * versions with the <b>exact</b> same algorithm and thus a lot of code
- * redundancy. This is to not create any temporary arrays for transcoding
- * to/from different format types. The methods not used can simply be
- * commented out.<br><br>
- *
- * There is also a "fast" version of all decode methods that works the
- * same way as the normal ones, but har a few demands on the decoded
- * input. Normally though, these fast verions should be used if the
- * source if the input is known and it hasn't bee tampered with.<br><br>
- *
- * If you find the code useful or you find a bug, please send me a note
- * at base64 @ miginfocom . com.
- *
- * Licence (BSD):
- * ==============
- *
- * Copyright (c) 2004, Mikael Grev, MiG InfoCom AB. (base64 @ miginfocom
- * . com) All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * Neither the name of the MiG InfoCom AB nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * @version 2.2
- * @author Mikael Grev
- * Date: 2004-aug-02
- * Time: 11:31:11
- */
-
- private static final char[] CA = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".toCharArray();
- private static final int[] IA = new int[256];
- static {
- Arrays.fill(IA, -1);
- for (int i = 0, iS = CA.length; i < iS; i++) {
- IA[CA[i]] = i;
- }
- IA['='] = 0;
- }
-
- /**
- * Encodes a raw byte array into a BASE64 <code>byte[]</code>
- * representation i accordance with RFC 2045.
- * @param sArr The bytes to convert. If <code>null</code> or length 0
- * an empty array will be returned.
- * @param lineSep Optional "\r\n" after 76 characters, unless end of
- * file.<br> No line separator will be in breach of RFC 2045 which
- * specifies max 76 per line but will be a little faster.
- * @return A BASE64 encoded array. Never <code>null</code>.
- */
- public final static String toBase64(byte[] sArr) {
- // Check special case
- int sLen = sArr != null ? sArr.length : 0;
- if (sLen == 0) {
- return "";
- }
-
- final boolean lineSep = true;
-
- int eLen = (sLen / 3) * 3; // Length of even 24-bits.
- int cCnt = ((sLen - 1) / 3 + 1) << 2; // Returned character count
- int dLen = cCnt + (lineSep ? (cCnt - 1) / 76 << 1 : 0); // Length of returned array
- byte[] dArr = new byte[dLen];
-
- // Encode even 24-bits
- for (int s = 0, d = 0, cc = 0; s < eLen;) {
- // Copy next three bytes into lower 24 bits of int, paying
- // attension to sign.
- int i = (sArr[s++] & 0xff) << 16 | (sArr[s++] & 0xff) << 8 | (sArr[s++] & 0xff);
-
- // Encode the int into four chars
- dArr[d++] = (byte) CA[(i >>> 18) & 0x3f];
- dArr[d++] = (byte) CA[(i >>> 12) & 0x3f];
- dArr[d++] = (byte) CA[(i >>> 6) & 0x3f];
- dArr[d++] = (byte) CA[i & 0x3f];
-
- // Add optional line separator
- if (lineSep && ++cc == 19 && d < dLen - 2) {
- dArr[d++] = '\r';
- dArr[d++] = '\n';
- cc = 0;
- }
- }
-
- // Pad and encode last bits if source isn't an even 24 bits.
- int left = sLen - eLen; // 0 - 2.
- if (left > 0) {
- // Prepare the int
- int i = ((sArr[eLen] & 0xff) << 10) | (left == 2 ? ((sArr[sLen - 1] & 0xff) << 2) : 0);
-
- // Set last four chars
- dArr[dLen - 4] = (byte) CA[i >> 12];
- dArr[dLen - 3] = (byte) CA[(i >>> 6) & 0x3f];
- dArr[dLen - 2] = left == 2 ? (byte) CA[i & 0x3f] : (byte) '=';
- dArr[dLen - 1] = '=';
- }
- try {
- return new String(dArr, "UTF-8");
- } catch (java.io.UnsupportedEncodingException e) {
- throw new IllegalArgumentException(e);
- }
-
- }
-
- /**
- * Decodes a BASE64 encoded byte array. All illegal characters will
- * be ignored and can handle both arrays with and without line
- * separators.
- * @param sArr The source array. Length 0 will return an empty
- * array. <code>null</code> will throw an exception.
- * @return The decoded array of bytes. May be of length 0. Will be
- * <code>null</code> if the legal characters (including '=') isn't
- * divideable by 4. (I.e. definitely corrupted).
- */
- public final static byte[] fromBase64(byte[] sArr) {
- // Check special case
- int sLen = sArr.length;
-
- // Count illegal characters (including '\r', '\n') to know what
- // size the returned array will be, so we don't have to
- // reallocate & copy it later.
- int sepCnt = 0; // Number of separator characters. (Actually illegal characters, but that's a bonus...)
- for (int i = 0; i < sLen; i++) {
- // If input is "pure" (I.e. no line separators or illegal chars)
- // base64 this loop can be commented out.
- if (IA[sArr[i] & 0xff] < 0) {
- sepCnt++;
- }
- }
-
- // Check so that legal chars (including '=') are evenly
- // divideable by 4 as specified in RFC 2045.
- if ((sLen - sepCnt) % 4 != 0) {
- return null;
- }
-
- int pad = 0;
- for (int i = sLen; i > 1 && IA[sArr[--i] & 0xff] <= 0;) {
- if (sArr[i] == '=') {
- pad++;
- }
- }
-
- int len = ((sLen - sepCnt) * 6 >> 3) - pad;
-
- byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
-
- for (int s = 0, d = 0; d < len;) {
- // Assemble three bytes into an int from four "valid" characters.
- int i = 0;
- for (int j = 0; j < 4; j++) { // j only increased if a valid char was found.
- int c = IA[sArr[s++] & 0xff];
- if (c >= 0) {
- i |= c << (18 - j * 6);
- } else {
- j--;
- }
- }
-
- // Add the bytes
- dArr[d++] = (byte) (i >> 16);
- if (d < len) {
- dArr[d++]= (byte) (i >> 8);
- if (d < len) {
- dArr[d++] = (byte) i;
- }
- }
- }
-
- return dArr;
- }
-
-}