fffce206875cf663480d2041aac121f88b58d01a
[nikiroo-utils.git] / bits / StringUtils.java
1 /*
2 * Jexer - Java Text User Interface
3 *
4 * The MIT License (MIT)
5 *
6 * Copyright (C) 2019 Kevin Lamonte
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 * DEALINGS IN THE SOFTWARE.
25 *
26 * @author Kevin Lamonte [kevin.lamonte@gmail.com]
27 * @version 1
28 */
29 package jexer.bits;
30
31 import java.util.List;
32 import java.util.ArrayList;
33
34 /**
35 * StringUtils contains methods to:
36 *
37 * - Convert one or more long lines of strings into justified text
38 * paragraphs.
39 *
40 * - Unescape C0 control codes.
41 *
42 * - Read/write a line of RFC4180 comma-separated values strings to/from a
43 * list of strings.
44 */
45 public class StringUtils {
46
47 /**
48 * Left-justify a string into a list of lines.
49 *
50 * @param str the string
51 * @param n the maximum number of characters in a line
52 * @return the list of lines
53 */
54 public static List<String> left(final String str, final int n) {
55 List<String> result = new ArrayList<String>();
56
57 /*
58 * General procedure:
59 *
60 * 1. Split on '\n' into paragraphs.
61 *
62 * 2. Scan each line, noting the position of the last
63 * beginning-of-a-word.
64 *
65 * 3. Chop at the last #2 if the next beginning-of-a-word exceeds
66 * n.
67 *
68 * 4. Return the lines.
69 */
70
71 String [] rawLines = str.split("\n");
72 for (int i = 0; i < rawLines.length; i++) {
73 StringBuilder line = new StringBuilder();
74 StringBuilder word = new StringBuilder();
75 boolean inWord = false;
76 for (int j = 0; j < rawLines[i].length(); j++) {
77 char ch = rawLines[i].charAt(j);
78 if ((ch == ' ') || (ch == '\t')) {
79 if (inWord == true) {
80 // We have just transitioned from a word to
81 // whitespace. See if we have enough space to add
82 // the word to the line.
83 if (width(word.toString()) + width(line.toString()) > n) {
84 // This word will exceed the line length. Wrap
85 // at it instead.
86 result.add(line.toString());
87 line = new StringBuilder();
88 }
89 if ((word.toString().startsWith(" "))
90 && (width(line.toString()) == 0)
91 ) {
92 line.append(word.substring(1));
93 } else {
94 line.append(word);
95 }
96 word = new StringBuilder();
97 word.append(ch);
98 inWord = false;
99 } else {
100 // We are in the whitespace before another word. Do
101 // nothing.
102 }
103 } else {
104 if (inWord == true) {
105 // We are appending to a word.
106 word.append(ch);
107 } else {
108 // We have transitioned from whitespace to a word.
109 word.append(ch);
110 inWord = true;
111 }
112 }
113 } // for (int j = 0; j < rawLines[i].length(); j++)
114
115 if (width(word.toString()) + width(line.toString()) > n) {
116 // This word will exceed the line length. Wrap at it
117 // instead.
118 result.add(line.toString());
119 line = new StringBuilder();
120 }
121 if ((word.toString().startsWith(" "))
122 && (width(line.toString()) == 0)
123 ) {
124 line.append(word.substring(1));
125 } else {
126 line.append(word);
127 }
128 result.add(line.toString());
129 } // for (int i = 0; i < rawLines.length; i++) {
130
131 return result;
132 }
133
134 /**
135 * Right-justify a string into a list of lines.
136 *
137 * @param str the string
138 * @param n the maximum number of characters in a line
139 * @return the list of lines
140 */
141 public static List<String> right(final String str, final int n) {
142 List<String> result = new ArrayList<String>();
143
144 /*
145 * Same as left(), but preceed each line with spaces to make it n
146 * chars long.
147 */
148 List<String> lines = left(str, n);
149 for (String line: lines) {
150 StringBuilder sb = new StringBuilder();
151 for (int i = 0; i < n - width(line); i++) {
152 sb.append(' ');
153 }
154 sb.append(line);
155 result.add(sb.toString());
156 }
157
158 return result;
159 }
160
161 /**
162 * Center a string into a list of lines.
163 *
164 * @param str the string
165 * @param n the maximum number of characters in a line
166 * @return the list of lines
167 */
168 public static List<String> center(final String str, final int n) {
169 List<String> result = new ArrayList<String>();
170
171 /*
172 * Same as left(), but preceed/succeed each line with spaces to make
173 * it n chars long.
174 */
175 List<String> lines = left(str, n);
176 for (String line: lines) {
177 StringBuilder sb = new StringBuilder();
178 int l = (n - width(line)) / 2;
179 int r = n - width(line) - l;
180 for (int i = 0; i < l; i++) {
181 sb.append(' ');
182 }
183 sb.append(line);
184 for (int i = 0; i < r; i++) {
185 sb.append(' ');
186 }
187 result.add(sb.toString());
188 }
189
190 return result;
191 }
192
193 /**
194 * Fully-justify a string into a list of lines.
195 *
196 * @param str the string
197 * @param n the maximum number of characters in a line
198 * @return the list of lines
199 */
200 public static List<String> full(final String str, final int n) {
201 List<String> result = new ArrayList<String>();
202
203 /*
204 * Same as left(), but insert spaces between words to make each line
205 * n chars long. The "algorithm" here is pretty dumb: it performs a
206 * split on space and then re-inserts multiples of n between words.
207 */
208 List<String> lines = left(str, n);
209 for (int lineI = 0; lineI < lines.size() - 1; lineI++) {
210 String line = lines.get(lineI);
211 String [] words = line.split(" ");
212 if (words.length > 1) {
213 int charCount = 0;
214 for (int i = 0; i < words.length; i++) {
215 charCount += words[i].length();
216 }
217 int spaceCount = n - charCount;
218 int q = spaceCount / (words.length - 1);
219 int r = spaceCount % (words.length - 1);
220 StringBuilder sb = new StringBuilder();
221 for (int i = 0; i < words.length - 1; i++) {
222 sb.append(words[i]);
223 for (int j = 0; j < q; j++) {
224 sb.append(' ');
225 }
226 if (r > 0) {
227 sb.append(' ');
228 r--;
229 }
230 }
231 for (int j = 0; j < r; j++) {
232 sb.append(' ');
233 }
234 sb.append(words[words.length - 1]);
235 result.add(sb.toString());
236 } else {
237 result.add(line);
238 }
239 }
240 if (lines.size() > 0) {
241 result.add(lines.get(lines.size() - 1));
242 }
243
244 return result;
245 }
246
247 /**
248 * Convert raw strings into escaped strings that be splatted on the
249 * screen.
250 *
251 * @param str the string
252 * @return a string that can be passed into Screen.putStringXY()
253 */
254 public static String unescape(final String str) {
255 StringBuilder sb = new StringBuilder();
256 for (int i = 0; i < str.length(); i++) {
257 char ch = str.charAt(i);
258 if ((ch < 0x20) || (ch == 0x7F)) {
259 switch (ch) {
260 case '\b':
261 sb.append("\\b");
262 continue;
263 case '\f':
264 sb.append("\\f");
265 continue;
266 case '\n':
267 sb.append("\\n");
268 continue;
269 case '\r':
270 sb.append("\\r");
271 continue;
272 case '\t':
273 sb.append("\\t");
274 continue;
275 case 0x7f:
276 sb.append("^?");
277 continue;
278 default:
279 sb.append(' ');
280 continue;
281 }
282 }
283 sb.append(ch);
284 }
285 return sb.toString();
286 }
287
288 /**
289 * Read a line of RFC4180 comma-separated values (CSV) into a list of
290 * strings.
291 *
292 * @param line the CSV line, with or without without line terminators
293 * @return the list of strings
294 */
295 public static List<String> fromCsv(final String line) {
296 List<String> result = new ArrayList<String>();
297
298 StringBuilder str = new StringBuilder();
299 boolean quoted = false;
300 boolean fieldQuoted = false;
301
302 for (int i = 0; i < line.length(); i++) {
303 char ch = line.charAt(i);
304
305 /*
306 System.err.println("ch '" + ch + "' str '" + str + "' " +
307 " fieldQuoted " + fieldQuoted + " quoted " + quoted);
308 */
309
310 if (ch == ',') {
311 if (fieldQuoted && quoted) {
312 // Terminating a quoted field.
313 result.add(str.toString());
314 str = new StringBuilder();
315 quoted = false;
316 fieldQuoted = false;
317 } else if (fieldQuoted) {
318 // Still waiting to see the terminating quote for this
319 // field.
320 str.append(ch);
321 } else if (quoted) {
322 // An unmatched double-quote and comma. This should be
323 // an invalid sequence. We will treat it as a quote
324 // terminating the field.
325 str.append('\"');
326 result.add(str.toString());
327 str = new StringBuilder();
328 quoted = false;
329 fieldQuoted = false;
330 } else {
331 // A field separator.
332 result.add(str.toString());
333 str = new StringBuilder();
334 quoted = false;
335 fieldQuoted = false;
336 }
337 continue;
338 }
339
340 if (ch == '\"') {
341 if ((str.length() == 0) && (!fieldQuoted)) {
342 // The opening quote to a quoted field.
343 fieldQuoted = true;
344 } else if (quoted) {
345 // This is a double-quote.
346 str.append('\"');
347 quoted = false;
348 } else {
349 // This is the beginning of a quote.
350 quoted = true;
351 }
352 continue;
353 }
354
355 // Normal character, pass it on.
356 str.append(ch);
357 }
358
359 // Include the final field.
360 result.add(str.toString());
361
362 return result;
363 }
364
365 /**
366 * Write a list of strings to on line of RFC4180 comma-separated values
367 * (CSV).
368 *
369 * @param list the list of strings
370 * @return the CSV line, without any line terminators
371 */
372 public static String toCsv(final List<String> list) {
373 StringBuilder result = new StringBuilder();
374 int i = 0;
375 for (String str: list) {
376
377 if (!str.contains("\"") && !str.contains(",")) {
378 // Just append the string with a comma.
379 result.append(str);
380 } else if (!str.contains("\"") && str.contains(",")) {
381 // Contains commas, but no quotes. Just double-quote it.
382 result.append("\"");
383 result.append(str);
384 result.append("\"");
385 } else if (str.contains("\"")) {
386 // Contains quotes and maybe commas. Double-quote it and
387 // replace quotes inside.
388 result.append("\"");
389 for (int j = 0; j < str.length(); j++) {
390 char ch = str.charAt(j);
391 result.append(ch);
392 if (ch == '\"') {
393 result.append("\"");
394 }
395 }
396 result.append("\"");
397 }
398
399 if (i < list.size() - 1) {
400 result.append(",");
401 }
402 i++;
403 }
404 return result.toString();
405 }
406
407 /**
408 * Determine display width of a Unicode code point.
409 *
410 * @param ch the code point, can be char
411 * @return the number of text cell columns required to display this code
412 * point, one of 0, 1, or 2
413 */
414 public static int width(final int ch) {
415 /*
416 * This routine is a modified version of mk_wcwidth() available
417 * at: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
418 *
419 * The combining characters list has been omitted from this
420 * implementation. Hopefully no users will be impacted.
421 */
422
423 // 8-bit control characters: width 0
424 if (ch == 0) {
425 return 0;
426 }
427 if ((ch < 32) || ((ch >= 0x7f) && (ch < 0xa0))) {
428 return 0;
429 }
430
431 // All others: either 1 or 2
432 if ((ch >= 0x1100)
433 && ((ch <= 0x115f)
434 // Hangul Jamo init. consonants
435 || (ch == 0x2329)
436 || (ch == 0x232a)
437 // CJK ... Yi
438 || ((ch >= 0x2e80) && (ch <= 0xa4cf) && (ch != 0x303f))
439 // Hangul Syllables
440 || ((ch >= 0xac00) && (ch <= 0xd7a3))
441 // CJK Compatibility Ideographs
442 || ((ch >= 0xf900) && (ch <= 0xfaff))
443 // Vertical forms
444 || ((ch >= 0xfe10) && (ch <= 0xfe19))
445 // CJK Compatibility Forms
446 || ((ch >= 0xfe30) && (ch <= 0xfe6f))
447 // Fullwidth Forms
448 || ((ch >= 0xff00) && (ch <= 0xff60))
449 || ((ch >= 0xffe0) && (ch <= 0xffe6))
450 || ((ch >= 0x20000) && (ch <= 0x2fffd))
451 || ((ch >= 0x30000) && (ch <= 0x3fffd))
452 // emoji
453 || ((ch >= 0x1f004) && (ch <= 0x1fffd))
454 )
455 ) {
456 return 2;
457 }
458 return 1;
459 }
460
461 /**
462 * Determine display width of a string. This ASSUMES that no characters
463 * are combining. Hopefully no users will be impacted.
464 *
465 * @param str the string
466 * @return the number of text cell columns required to display this string
467 */
468 public static int width(final String str) {
469 int n = 0;
470 for (int i = 0; i < str.length();) {
471 int ch = str.codePointAt(i);
472 n += width(ch);
473 i += Character.charCount(ch);
474 }
475 return n;
476 }
477
478 /**
479 * Check if character is in the CJK range.
480 *
481 * @param ch character to check
482 * @return true if this character is in the CJK range
483 */
484 public static boolean isCjk(final int ch) {
485 return ((ch >= 0x2e80) && (ch <= 0x9fff));
486 }
487
488 /**
489 * Check if character is in the emoji range.
490 *
491 * @param ch character to check
492 * @return true if this character is in the emoji range
493 */
494 public static boolean isEmoji(final int ch) {
495 return ((ch >= 0x1f004) && (ch <= 0x1fffd));
496 }
497
498 }