X-Git-Url: http://git.nikiroo.be/?a=blobdiff_plain;f=src%2Fjexer%2Fbits%2FStringUtils.java;h=fffce206875cf663480d2041aac121f88b58d01a;hb=12b90437b5f22c2ae6e9b9b14c3b62b60f6143e5;hp=535720da7f82d235eb52465500ab802e2c84f97c;hpb=d36057dfab8def933a64be042b039d76708ac5ba;p=nikiroo-utils.git diff --git a/src/jexer/bits/StringUtils.java b/src/jexer/bits/StringUtils.java index 535720d..fffce20 100644 --- a/src/jexer/bits/StringUtils.java +++ b/src/jexer/bits/StringUtils.java @@ -3,7 +3,7 @@ * * The MIT License (MIT) * - * Copyright (C) 2017 Kevin Lamonte + * Copyright (C) 2019 Kevin Lamonte * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -29,7 +29,7 @@ package jexer.bits; import java.util.List; -import java.util.LinkedList; +import java.util.ArrayList; /** * StringUtils contains methods to: @@ -39,8 +39,10 @@ import java.util.LinkedList; * * - Unescape C0 control codes. * + * - Read/write a line of RFC4180 comma-separated values strings to/from a + * list of strings. */ -public final class StringUtils { +public class StringUtils { /** * Left-justify a string into a list of lines. @@ -50,7 +52,7 @@ public final class StringUtils { * @return the list of lines */ public static List left(final String str, final int n) { - List result = new LinkedList(); + List result = new ArrayList(); /* * General procedure: @@ -78,14 +80,14 @@ public final class StringUtils { // We have just transitioned from a word to // whitespace. See if we have enough space to add // the word to the line. - if (word.length() + line.length() > n) { + if (width(word.toString()) + width(line.toString()) > n) { // This word will exceed the line length. Wrap // at it instead. result.add(line.toString()); line = new StringBuilder(); } if ((word.toString().startsWith(" ")) - && (line.length() == 0) + && (width(line.toString()) == 0) ) { line.append(word.substring(1)); } else { @@ -110,14 +112,14 @@ public final class StringUtils { } } // for (int j = 0; j < rawLines[i].length(); j++) - if (word.length() + line.length() > n) { + if (width(word.toString()) + width(line.toString()) > n) { // This word will exceed the line length. Wrap at it // instead. result.add(line.toString()); line = new StringBuilder(); } if ((word.toString().startsWith(" ")) - && (line.length() == 0) + && (width(line.toString()) == 0) ) { line.append(word.substring(1)); } else { @@ -137,7 +139,7 @@ public final class StringUtils { * @return the list of lines */ public static List right(final String str, final int n) { - List result = new LinkedList(); + List result = new ArrayList(); /* * Same as left(), but preceed each line with spaces to make it n @@ -146,7 +148,7 @@ public final class StringUtils { List lines = left(str, n); for (String line: lines) { StringBuilder sb = new StringBuilder(); - for (int i = 0; i < n - line.length(); i++) { + for (int i = 0; i < n - width(line); i++) { sb.append(' '); } sb.append(line); @@ -164,7 +166,7 @@ public final class StringUtils { * @return the list of lines */ public static List center(final String str, final int n) { - List result = new LinkedList(); + List result = new ArrayList(); /* * Same as left(), but preceed/succeed each line with spaces to make @@ -173,8 +175,8 @@ public final class StringUtils { List lines = left(str, n); for (String line: lines) { StringBuilder sb = new StringBuilder(); - int l = (n - line.length()) / 2; - int r = n - line.length() - l; + int l = (n - width(line)) / 2; + int r = n - width(line) - l; for (int i = 0; i < l; i++) { sb.append(' '); } @@ -196,7 +198,7 @@ public final class StringUtils { * @return the list of lines */ public static List full(final String str, final int n) { - List result = new LinkedList(); + List result = new ArrayList(); /* * Same as left(), but insert spaces between words to make each line @@ -283,4 +285,214 @@ public final class StringUtils { return sb.toString(); } + /** + * Read a line of RFC4180 comma-separated values (CSV) into a list of + * strings. + * + * @param line the CSV line, with or without without line terminators + * @return the list of strings + */ + public static List fromCsv(final String line) { + List result = new ArrayList(); + + StringBuilder str = new StringBuilder(); + boolean quoted = false; + boolean fieldQuoted = false; + + for (int i = 0; i < line.length(); i++) { + char ch = line.charAt(i); + + /* + System.err.println("ch '" + ch + "' str '" + str + "' " + + " fieldQuoted " + fieldQuoted + " quoted " + quoted); + */ + + if (ch == ',') { + if (fieldQuoted && quoted) { + // Terminating a quoted field. + result.add(str.toString()); + str = new StringBuilder(); + quoted = false; + fieldQuoted = false; + } else if (fieldQuoted) { + // Still waiting to see the terminating quote for this + // field. + str.append(ch); + } else if (quoted) { + // An unmatched double-quote and comma. This should be + // an invalid sequence. We will treat it as a quote + // terminating the field. + str.append('\"'); + result.add(str.toString()); + str = new StringBuilder(); + quoted = false; + fieldQuoted = false; + } else { + // A field separator. + result.add(str.toString()); + str = new StringBuilder(); + quoted = false; + fieldQuoted = false; + } + continue; + } + + if (ch == '\"') { + if ((str.length() == 0) && (!fieldQuoted)) { + // The opening quote to a quoted field. + fieldQuoted = true; + } else if (quoted) { + // This is a double-quote. + str.append('\"'); + quoted = false; + } else { + // This is the beginning of a quote. + quoted = true; + } + continue; + } + + // Normal character, pass it on. + str.append(ch); + } + + // Include the final field. + result.add(str.toString()); + + return result; + } + + /** + * Write a list of strings to on line of RFC4180 comma-separated values + * (CSV). + * + * @param list the list of strings + * @return the CSV line, without any line terminators + */ + public static String toCsv(final List list) { + StringBuilder result = new StringBuilder(); + int i = 0; + for (String str: list) { + + if (!str.contains("\"") && !str.contains(",")) { + // Just append the string with a comma. + result.append(str); + } else if (!str.contains("\"") && str.contains(",")) { + // Contains commas, but no quotes. Just double-quote it. + result.append("\""); + result.append(str); + result.append("\""); + } else if (str.contains("\"")) { + // Contains quotes and maybe commas. Double-quote it and + // replace quotes inside. + result.append("\""); + for (int j = 0; j < str.length(); j++) { + char ch = str.charAt(j); + result.append(ch); + if (ch == '\"') { + result.append("\""); + } + } + result.append("\""); + } + + if (i < list.size() - 1) { + result.append(","); + } + i++; + } + return result.toString(); + } + + /** + * Determine display width of a Unicode code point. + * + * @param ch the code point, can be char + * @return the number of text cell columns required to display this code + * point, one of 0, 1, or 2 + */ + public static int width(final int ch) { + /* + * This routine is a modified version of mk_wcwidth() available + * at: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c + * + * The combining characters list has been omitted from this + * implementation. Hopefully no users will be impacted. + */ + + // 8-bit control characters: width 0 + if (ch == 0) { + return 0; + } + if ((ch < 32) || ((ch >= 0x7f) && (ch < 0xa0))) { + return 0; + } + + // All others: either 1 or 2 + if ((ch >= 0x1100) + && ((ch <= 0x115f) + // Hangul Jamo init. consonants + || (ch == 0x2329) + || (ch == 0x232a) + // CJK ... Yi + || ((ch >= 0x2e80) && (ch <= 0xa4cf) && (ch != 0x303f)) + // Hangul Syllables + || ((ch >= 0xac00) && (ch <= 0xd7a3)) + // CJK Compatibility Ideographs + || ((ch >= 0xf900) && (ch <= 0xfaff)) + // Vertical forms + || ((ch >= 0xfe10) && (ch <= 0xfe19)) + // CJK Compatibility Forms + || ((ch >= 0xfe30) && (ch <= 0xfe6f)) + // Fullwidth Forms + || ((ch >= 0xff00) && (ch <= 0xff60)) + || ((ch >= 0xffe0) && (ch <= 0xffe6)) + || ((ch >= 0x20000) && (ch <= 0x2fffd)) + || ((ch >= 0x30000) && (ch <= 0x3fffd)) + // emoji + || ((ch >= 0x1f004) && (ch <= 0x1fffd)) + ) + ) { + return 2; + } + return 1; + } + + /** + * Determine display width of a string. This ASSUMES that no characters + * are combining. Hopefully no users will be impacted. + * + * @param str the string + * @return the number of text cell columns required to display this string + */ + public static int width(final String str) { + int n = 0; + for (int i = 0; i < str.length();) { + int ch = str.codePointAt(i); + n += width(ch); + i += Character.charCount(ch); + } + return n; + } + + /** + * Check if character is in the CJK range. + * + * @param ch character to check + * @return true if this character is in the CJK range + */ + public static boolean isCjk(final int ch) { + return ((ch >= 0x2e80) && (ch <= 0x9fff)); + } + + /** + * Check if character is in the emoji range. + * + * @param ch character to check + * @return true if this character is in the emoji range + */ + public static boolean isEmoji(final int ch) { + return ((ch >= 0x1f004) && (ch <= 0x1fffd)); + } + }