Commit | Line | Data |
---|---|---|
7657ad8c KL |
1 | /* |
2 | * Jexer - Java Text User Interface | |
3 | * | |
4 | * The MIT License (MIT) | |
5 | * | |
a69ed767 | 6 | * Copyright (C) 2019 Kevin Lamonte |
7657ad8c KL |
7 | * |
8 | * Permission is hereby granted, free of charge, to any person obtaining a | |
9 | * copy of this software and associated documentation files (the "Software"), | |
10 | * to deal in the Software without restriction, including without limitation | |
11 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
12 | * and/or sell copies of the Software, and to permit persons to whom the | |
13 | * Software is furnished to do so, subject to the following conditions: | |
14 | * | |
15 | * The above copyright notice and this permission notice shall be included in | |
16 | * all copies or substantial portions of the Software. | |
17 | * | |
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
19 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
20 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
21 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
22 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | |
24 | * DEALINGS IN THE SOFTWARE. | |
25 | * | |
26 | * @author Kevin Lamonte [kevin.lamonte@gmail.com] | |
27 | * @version 1 | |
28 | */ | |
29 | package jexer.bits; | |
30 | ||
31 | import java.util.List; | |
656c0ddd | 32 | import java.util.ArrayList; |
7657ad8c KL |
33 | |
34 | /** | |
d36057df KL |
35 | * StringUtils contains methods to: |
36 | * | |
37 | * - Convert one or more long lines of strings into justified text | |
38 | * paragraphs. | |
39 | * | |
40 | * - Unescape C0 control codes. | |
41 | * | |
656c0ddd KL |
42 | * - Read/write a line of RFC4180 comma-separated values strings to/from a |
43 | * list of strings. | |
7657ad8c | 44 | */ |
051e2913 | 45 | public class StringUtils { |
7657ad8c KL |
46 | |
47 | /** | |
48 | * Left-justify a string into a list of lines. | |
49 | * | |
50 | * @param str the string | |
51 | * @param n the maximum number of characters in a line | |
52 | * @return the list of lines | |
53 | */ | |
54 | public static List<String> left(final String str, final int n) { | |
656c0ddd | 55 | List<String> result = new ArrayList<String>(); |
7657ad8c KL |
56 | |
57 | /* | |
58 | * General procedure: | |
59 | * | |
60 | * 1. Split on '\n' into paragraphs. | |
61 | * | |
62 | * 2. Scan each line, noting the position of the last | |
63 | * beginning-of-a-word. | |
64 | * | |
65 | * 3. Chop at the last #2 if the next beginning-of-a-word exceeds | |
66 | * n. | |
67 | * | |
68 | * 4. Return the lines. | |
69 | */ | |
70 | ||
71 | String [] rawLines = str.split("\n"); | |
72 | for (int i = 0; i < rawLines.length; i++) { | |
73 | StringBuilder line = new StringBuilder(); | |
74 | StringBuilder word = new StringBuilder(); | |
75 | boolean inWord = false; | |
76 | for (int j = 0; j < rawLines[i].length(); j++) { | |
77 | char ch = rawLines[i].charAt(j); | |
78 | if ((ch == ' ') || (ch == '\t')) { | |
79 | if (inWord == true) { | |
80 | // We have just transitioned from a word to | |
81 | // whitespace. See if we have enough space to add | |
82 | // the word to the line. | |
e820d5dd | 83 | if (width(word.toString()) + width(line.toString()) > n) { |
7657ad8c KL |
84 | // This word will exceed the line length. Wrap |
85 | // at it instead. | |
86 | result.add(line.toString()); | |
87 | line = new StringBuilder(); | |
88 | } | |
89 | if ((word.toString().startsWith(" ")) | |
e820d5dd | 90 | && (width(line.toString()) == 0) |
7657ad8c KL |
91 | ) { |
92 | line.append(word.substring(1)); | |
93 | } else { | |
94 | line.append(word); | |
95 | } | |
96 | word = new StringBuilder(); | |
97 | word.append(ch); | |
98 | inWord = false; | |
99 | } else { | |
100 | // We are in the whitespace before another word. Do | |
101 | // nothing. | |
102 | } | |
103 | } else { | |
104 | if (inWord == true) { | |
105 | // We are appending to a word. | |
106 | word.append(ch); | |
107 | } else { | |
108 | // We have transitioned from whitespace to a word. | |
109 | word.append(ch); | |
110 | inWord = true; | |
111 | } | |
112 | } | |
113 | } // for (int j = 0; j < rawLines[i].length(); j++) | |
114 | ||
e820d5dd | 115 | if (width(word.toString()) + width(line.toString()) > n) { |
7657ad8c KL |
116 | // This word will exceed the line length. Wrap at it |
117 | // instead. | |
118 | result.add(line.toString()); | |
119 | line = new StringBuilder(); | |
120 | } | |
121 | if ((word.toString().startsWith(" ")) | |
e820d5dd | 122 | && (width(line.toString()) == 0) |
7657ad8c KL |
123 | ) { |
124 | line.append(word.substring(1)); | |
125 | } else { | |
126 | line.append(word); | |
127 | } | |
128 | result.add(line.toString()); | |
129 | } // for (int i = 0; i < rawLines.length; i++) { | |
130 | ||
131 | return result; | |
132 | } | |
133 | ||
134 | /** | |
135 | * Right-justify a string into a list of lines. | |
136 | * | |
137 | * @param str the string | |
138 | * @param n the maximum number of characters in a line | |
139 | * @return the list of lines | |
140 | */ | |
141 | public static List<String> right(final String str, final int n) { | |
656c0ddd | 142 | List<String> result = new ArrayList<String>(); |
7657ad8c KL |
143 | |
144 | /* | |
145 | * Same as left(), but preceed each line with spaces to make it n | |
146 | * chars long. | |
147 | */ | |
148 | List<String> lines = left(str, n); | |
149 | for (String line: lines) { | |
150 | StringBuilder sb = new StringBuilder(); | |
e820d5dd | 151 | for (int i = 0; i < n - width(line); i++) { |
7657ad8c KL |
152 | sb.append(' '); |
153 | } | |
154 | sb.append(line); | |
155 | result.add(sb.toString()); | |
156 | } | |
157 | ||
158 | return result; | |
159 | } | |
160 | ||
161 | /** | |
162 | * Center a string into a list of lines. | |
163 | * | |
164 | * @param str the string | |
165 | * @param n the maximum number of characters in a line | |
166 | * @return the list of lines | |
167 | */ | |
168 | public static List<String> center(final String str, final int n) { | |
656c0ddd | 169 | List<String> result = new ArrayList<String>(); |
7657ad8c KL |
170 | |
171 | /* | |
172 | * Same as left(), but preceed/succeed each line with spaces to make | |
173 | * it n chars long. | |
174 | */ | |
175 | List<String> lines = left(str, n); | |
176 | for (String line: lines) { | |
177 | StringBuilder sb = new StringBuilder(); | |
e820d5dd KL |
178 | int l = (n - width(line)) / 2; |
179 | int r = n - width(line) - l; | |
7657ad8c KL |
180 | for (int i = 0; i < l; i++) { |
181 | sb.append(' '); | |
182 | } | |
183 | sb.append(line); | |
184 | for (int i = 0; i < r; i++) { | |
185 | sb.append(' '); | |
186 | } | |
187 | result.add(sb.toString()); | |
188 | } | |
189 | ||
190 | return result; | |
191 | } | |
192 | ||
193 | /** | |
194 | * Fully-justify a string into a list of lines. | |
195 | * | |
196 | * @param str the string | |
197 | * @param n the maximum number of characters in a line | |
198 | * @return the list of lines | |
199 | */ | |
200 | public static List<String> full(final String str, final int n) { | |
656c0ddd | 201 | List<String> result = new ArrayList<String>(); |
7657ad8c KL |
202 | |
203 | /* | |
204 | * Same as left(), but insert spaces between words to make each line | |
205 | * n chars long. The "algorithm" here is pretty dumb: it performs a | |
206 | * split on space and then re-inserts multiples of n between words. | |
207 | */ | |
208 | List<String> lines = left(str, n); | |
209 | for (int lineI = 0; lineI < lines.size() - 1; lineI++) { | |
210 | String line = lines.get(lineI); | |
211 | String [] words = line.split(" "); | |
212 | if (words.length > 1) { | |
213 | int charCount = 0; | |
214 | for (int i = 0; i < words.length; i++) { | |
215 | charCount += words[i].length(); | |
216 | } | |
217 | int spaceCount = n - charCount; | |
218 | int q = spaceCount / (words.length - 1); | |
219 | int r = spaceCount % (words.length - 1); | |
220 | StringBuilder sb = new StringBuilder(); | |
221 | for (int i = 0; i < words.length - 1; i++) { | |
222 | sb.append(words[i]); | |
223 | for (int j = 0; j < q; j++) { | |
224 | sb.append(' '); | |
225 | } | |
226 | if (r > 0) { | |
227 | sb.append(' '); | |
228 | r--; | |
229 | } | |
230 | } | |
231 | for (int j = 0; j < r; j++) { | |
232 | sb.append(' '); | |
233 | } | |
234 | sb.append(words[words.length - 1]); | |
235 | result.add(sb.toString()); | |
236 | } else { | |
237 | result.add(line); | |
238 | } | |
239 | } | |
240 | if (lines.size() > 0) { | |
241 | result.add(lines.get(lines.size() - 1)); | |
242 | } | |
243 | ||
244 | return result; | |
245 | } | |
246 | ||
d36057df KL |
247 | /** |
248 | * Convert raw strings into escaped strings that be splatted on the | |
249 | * screen. | |
250 | * | |
251 | * @param str the string | |
252 | * @return a string that can be passed into Screen.putStringXY() | |
253 | */ | |
254 | public static String unescape(final String str) { | |
255 | StringBuilder sb = new StringBuilder(); | |
256 | for (int i = 0; i < str.length(); i++) { | |
257 | char ch = str.charAt(i); | |
258 | if ((ch < 0x20) || (ch == 0x7F)) { | |
259 | switch (ch) { | |
260 | case '\b': | |
261 | sb.append("\\b"); | |
262 | continue; | |
263 | case '\f': | |
264 | sb.append("\\f"); | |
265 | continue; | |
266 | case '\n': | |
267 | sb.append("\\n"); | |
268 | continue; | |
269 | case '\r': | |
270 | sb.append("\\r"); | |
271 | continue; | |
272 | case '\t': | |
273 | sb.append("\\t"); | |
274 | continue; | |
275 | case 0x7f: | |
276 | sb.append("^?"); | |
277 | continue; | |
278 | default: | |
279 | sb.append(' '); | |
280 | continue; | |
281 | } | |
282 | } | |
283 | sb.append(ch); | |
284 | } | |
285 | return sb.toString(); | |
286 | } | |
287 | ||
656c0ddd KL |
288 | /** |
289 | * Read a line of RFC4180 comma-separated values (CSV) into a list of | |
290 | * strings. | |
291 | * | |
292 | * @param line the CSV line, with or without without line terminators | |
293 | * @return the list of strings | |
294 | */ | |
295 | public static List<String> fromCsv(final String line) { | |
296 | List<String> result = new ArrayList<String>(); | |
297 | ||
298 | StringBuilder str = new StringBuilder(); | |
299 | boolean quoted = false; | |
300 | boolean fieldQuoted = false; | |
301 | ||
302 | for (int i = 0; i < line.length(); i++) { | |
303 | char ch = line.charAt(i); | |
304 | ||
305 | /* | |
306 | System.err.println("ch '" + ch + "' str '" + str + "' " + | |
307 | " fieldQuoted " + fieldQuoted + " quoted " + quoted); | |
308 | */ | |
309 | ||
310 | if (ch == ',') { | |
311 | if (fieldQuoted && quoted) { | |
312 | // Terminating a quoted field. | |
313 | result.add(str.toString()); | |
314 | str = new StringBuilder(); | |
315 | quoted = false; | |
316 | fieldQuoted = false; | |
317 | } else if (fieldQuoted) { | |
318 | // Still waiting to see the terminating quote for this | |
319 | // field. | |
320 | str.append(ch); | |
321 | } else if (quoted) { | |
322 | // An unmatched double-quote and comma. This should be | |
323 | // an invalid sequence. We will treat it as a quote | |
324 | // terminating the field. | |
325 | str.append('\"'); | |
326 | result.add(str.toString()); | |
327 | str = new StringBuilder(); | |
328 | quoted = false; | |
329 | fieldQuoted = false; | |
330 | } else { | |
331 | // A field separator. | |
332 | result.add(str.toString()); | |
333 | str = new StringBuilder(); | |
334 | quoted = false; | |
335 | fieldQuoted = false; | |
336 | } | |
337 | continue; | |
338 | } | |
339 | ||
340 | if (ch == '\"') { | |
341 | if ((str.length() == 0) && (!fieldQuoted)) { | |
342 | // The opening quote to a quoted field. | |
343 | fieldQuoted = true; | |
344 | } else if (quoted) { | |
345 | // This is a double-quote. | |
346 | str.append('\"'); | |
347 | quoted = false; | |
348 | } else { | |
349 | // This is the beginning of a quote. | |
350 | quoted = true; | |
351 | } | |
352 | continue; | |
353 | } | |
354 | ||
355 | // Normal character, pass it on. | |
356 | str.append(ch); | |
357 | } | |
358 | ||
359 | // Include the final field. | |
360 | result.add(str.toString()); | |
361 | ||
362 | return result; | |
363 | } | |
364 | ||
365 | /** | |
366 | * Write a list of strings to on line of RFC4180 comma-separated values | |
367 | * (CSV). | |
368 | * | |
369 | * @param list the list of strings | |
370 | * @return the CSV line, without any line terminators | |
371 | */ | |
372 | public static String toCsv(final List<String> list) { | |
373 | StringBuilder result = new StringBuilder(); | |
374 | int i = 0; | |
375 | for (String str: list) { | |
376 | ||
377 | if (!str.contains("\"") && !str.contains(",")) { | |
378 | // Just append the string with a comma. | |
379 | result.append(str); | |
380 | } else if (!str.contains("\"") && str.contains(",")) { | |
381 | // Contains commas, but no quotes. Just double-quote it. | |
382 | result.append("\""); | |
383 | result.append(str); | |
384 | result.append("\""); | |
385 | } else if (str.contains("\"")) { | |
386 | // Contains quotes and maybe commas. Double-quote it and | |
387 | // replace quotes inside. | |
388 | result.append("\""); | |
389 | for (int j = 0; j < str.length(); j++) { | |
390 | char ch = str.charAt(j); | |
391 | result.append(ch); | |
392 | if (ch == '\"') { | |
393 | result.append("\""); | |
394 | } | |
395 | } | |
396 | result.append("\""); | |
397 | } | |
398 | ||
399 | if (i < list.size() - 1) { | |
400 | result.append(","); | |
401 | } | |
402 | i++; | |
403 | } | |
404 | return result.toString(); | |
405 | } | |
406 | ||
9588c713 KL |
407 | /** |
408 | * Determine display width of a Unicode code point. | |
409 | * | |
410 | * @param ch the code point, can be char | |
411 | * @return the number of text cell columns required to display this code | |
412 | * point, one of 0, 1, or 2 | |
413 | */ | |
414 | public static int width(final int ch) { | |
415 | /* | |
416 | * This routine is a modified version of mk_wcwidth() available | |
417 | * at: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c | |
418 | * | |
419 | * The combining characters list has been omitted from this | |
420 | * implementation. Hopefully no users will be impacted. | |
421 | */ | |
422 | ||
423 | // 8-bit control characters: width 0 | |
424 | if (ch == 0) { | |
425 | return 0; | |
426 | } | |
427 | if ((ch < 32) || ((ch >= 0x7f) && (ch < 0xa0))) { | |
428 | return 0; | |
429 | } | |
430 | ||
431 | // All others: either 1 or 2 | |
432 | if ((ch >= 0x1100) | |
433 | && ((ch <= 0x115f) | |
434 | // Hangul Jamo init. consonants | |
435 | || (ch == 0x2329) | |
436 | || (ch == 0x232a) | |
437 | // CJK ... Yi | |
438 | || ((ch >= 0x2e80) && (ch <= 0xa4cf) && (ch != 0x303f)) | |
439 | // Hangul Syllables | |
440 | || ((ch >= 0xac00) && (ch <= 0xd7a3)) | |
441 | // CJK Compatibility Ideographs | |
442 | || ((ch >= 0xf900) && (ch <= 0xfaff)) | |
443 | // Vertical forms | |
444 | || ((ch >= 0xfe10) && (ch <= 0xfe19)) | |
445 | // CJK Compatibility Forms | |
446 | || ((ch >= 0xfe30) && (ch <= 0xfe6f)) | |
447 | // Fullwidth Forms | |
448 | || ((ch >= 0xff00) && (ch <= 0xff60)) | |
449 | || ((ch >= 0xffe0) && (ch <= 0xffe6)) | |
450 | || ((ch >= 0x20000) && (ch <= 0x2fffd)) | |
451 | || ((ch >= 0x30000) && (ch <= 0x3fffd)) | |
218d18db | 452 | // emoji |
afdec5e9 | 453 | || ((ch >= 0x1f004) && (ch <= 0x1fffd)) |
9588c713 KL |
454 | ) |
455 | ) { | |
456 | return 2; | |
457 | } | |
458 | return 1; | |
459 | } | |
460 | ||
461 | /** | |
462 | * Determine display width of a string. This ASSUMES that no characters | |
463 | * are combining. Hopefully no users will be impacted. | |
464 | * | |
465 | * @param str the string | |
466 | * @return the number of text cell columns required to display this string | |
467 | */ | |
468 | public static int width(final String str) { | |
469 | int n = 0; | |
218d18db KL |
470 | for (int i = 0; i < str.length();) { |
471 | int ch = str.codePointAt(i); | |
472 | n += width(ch); | |
473 | i += Character.charCount(ch); | |
9588c713 KL |
474 | } |
475 | return n; | |
476 | } | |
477 | ||
66edb445 KL |
478 | /** |
479 | * Check if character is in the CJK range. | |
480 | * | |
481 | * @param ch character to check | |
482 | * @return true if this character is in the CJK range | |
483 | */ | |
484 | public static boolean isCjk(final int ch) { | |
485 | return ((ch >= 0x2e80) && (ch <= 0x9fff)); | |
486 | } | |
487 | ||
488 | /** | |
489 | * Check if character is in the emoji range. | |
490 | * | |
491 | * @param ch character to check | |
492 | * @return true if this character is in the emoji range | |
493 | */ | |
494 | public static boolean isEmoji(final int ch) { | |
495 | return ((ch >= 0x1f004) && (ch <= 0x1fffd)); | |
496 | } | |
497 | ||
7657ad8c | 498 | } |