Commit | Line | Data |
---|---|---|
7657ad8c KL |
1 | /* |
2 | * Jexer - Java Text User Interface | |
3 | * | |
4 | * The MIT License (MIT) | |
5 | * | |
a69ed767 | 6 | * Copyright (C) 2019 Kevin Lamonte |
7657ad8c KL |
7 | * |
8 | * Permission is hereby granted, free of charge, to any person obtaining a | |
9 | * copy of this software and associated documentation files (the "Software"), | |
10 | * to deal in the Software without restriction, including without limitation | |
11 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
12 | * and/or sell copies of the Software, and to permit persons to whom the | |
13 | * Software is furnished to do so, subject to the following conditions: | |
14 | * | |
15 | * The above copyright notice and this permission notice shall be included in | |
16 | * all copies or substantial portions of the Software. | |
17 | * | |
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
19 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
20 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
21 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
22 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | |
24 | * DEALINGS IN THE SOFTWARE. | |
25 | * | |
26 | * @author Kevin Lamonte [kevin.lamonte@gmail.com] | |
27 | * @version 1 | |
28 | */ | |
29 | package jexer.bits; | |
30 | ||
31 | import java.util.List; | |
656c0ddd | 32 | import java.util.ArrayList; |
34bb6e52 | 33 | import java.util.Arrays; |
7657ad8c KL |
34 | |
35 | /** | |
d36057df KL |
36 | * StringUtils contains methods to: |
37 | * | |
38 | * - Convert one or more long lines of strings into justified text | |
39 | * paragraphs. | |
40 | * | |
41 | * - Unescape C0 control codes. | |
42 | * | |
656c0ddd KL |
43 | * - Read/write a line of RFC4180 comma-separated values strings to/from a |
44 | * list of strings. | |
7657ad8c | 45 | */ |
051e2913 | 46 | public class StringUtils { |
7657ad8c KL |
47 | |
48 | /** | |
49 | * Left-justify a string into a list of lines. | |
50 | * | |
51 | * @param str the string | |
52 | * @param n the maximum number of characters in a line | |
53 | * @return the list of lines | |
54 | */ | |
55 | public static List<String> left(final String str, final int n) { | |
656c0ddd | 56 | List<String> result = new ArrayList<String>(); |
7657ad8c KL |
57 | |
58 | /* | |
59 | * General procedure: | |
60 | * | |
61 | * 1. Split on '\n' into paragraphs. | |
62 | * | |
63 | * 2. Scan each line, noting the position of the last | |
64 | * beginning-of-a-word. | |
65 | * | |
66 | * 3. Chop at the last #2 if the next beginning-of-a-word exceeds | |
67 | * n. | |
68 | * | |
69 | * 4. Return the lines. | |
70 | */ | |
71 | ||
72 | String [] rawLines = str.split("\n"); | |
73 | for (int i = 0; i < rawLines.length; i++) { | |
74 | StringBuilder line = new StringBuilder(); | |
75 | StringBuilder word = new StringBuilder(); | |
76 | boolean inWord = false; | |
77 | for (int j = 0; j < rawLines[i].length(); j++) { | |
78 | char ch = rawLines[i].charAt(j); | |
79 | if ((ch == ' ') || (ch == '\t')) { | |
80 | if (inWord == true) { | |
81 | // We have just transitioned from a word to | |
82 | // whitespace. See if we have enough space to add | |
83 | // the word to the line. | |
e820d5dd | 84 | if (width(word.toString()) + width(line.toString()) > n) { |
7657ad8c KL |
85 | // This word will exceed the line length. Wrap |
86 | // at it instead. | |
87 | result.add(line.toString()); | |
88 | line = new StringBuilder(); | |
89 | } | |
90 | if ((word.toString().startsWith(" ")) | |
e820d5dd | 91 | && (width(line.toString()) == 0) |
7657ad8c KL |
92 | ) { |
93 | line.append(word.substring(1)); | |
94 | } else { | |
95 | line.append(word); | |
96 | } | |
97 | word = new StringBuilder(); | |
98 | word.append(ch); | |
99 | inWord = false; | |
100 | } else { | |
101 | // We are in the whitespace before another word. Do | |
102 | // nothing. | |
103 | } | |
104 | } else { | |
105 | if (inWord == true) { | |
106 | // We are appending to a word. | |
107 | word.append(ch); | |
108 | } else { | |
109 | // We have transitioned from whitespace to a word. | |
110 | word.append(ch); | |
111 | inWord = true; | |
112 | } | |
113 | } | |
114 | } // for (int j = 0; j < rawLines[i].length(); j++) | |
115 | ||
e820d5dd | 116 | if (width(word.toString()) + width(line.toString()) > n) { |
7657ad8c KL |
117 | // This word will exceed the line length. Wrap at it |
118 | // instead. | |
119 | result.add(line.toString()); | |
120 | line = new StringBuilder(); | |
121 | } | |
122 | if ((word.toString().startsWith(" ")) | |
e820d5dd | 123 | && (width(line.toString()) == 0) |
7657ad8c KL |
124 | ) { |
125 | line.append(word.substring(1)); | |
126 | } else { | |
127 | line.append(word); | |
128 | } | |
129 | result.add(line.toString()); | |
130 | } // for (int i = 0; i < rawLines.length; i++) { | |
131 | ||
132 | return result; | |
133 | } | |
134 | ||
135 | /** | |
136 | * Right-justify a string into a list of lines. | |
137 | * | |
138 | * @param str the string | |
139 | * @param n the maximum number of characters in a line | |
140 | * @return the list of lines | |
141 | */ | |
142 | public static List<String> right(final String str, final int n) { | |
656c0ddd | 143 | List<String> result = new ArrayList<String>(); |
7657ad8c KL |
144 | |
145 | /* | |
146 | * Same as left(), but preceed each line with spaces to make it n | |
147 | * chars long. | |
148 | */ | |
149 | List<String> lines = left(str, n); | |
150 | for (String line: lines) { | |
151 | StringBuilder sb = new StringBuilder(); | |
e820d5dd | 152 | for (int i = 0; i < n - width(line); i++) { |
7657ad8c KL |
153 | sb.append(' '); |
154 | } | |
155 | sb.append(line); | |
156 | result.add(sb.toString()); | |
157 | } | |
158 | ||
159 | return result; | |
160 | } | |
161 | ||
162 | /** | |
163 | * Center a string into a list of lines. | |
164 | * | |
165 | * @param str the string | |
166 | * @param n the maximum number of characters in a line | |
167 | * @return the list of lines | |
168 | */ | |
169 | public static List<String> center(final String str, final int n) { | |
656c0ddd | 170 | List<String> result = new ArrayList<String>(); |
7657ad8c KL |
171 | |
172 | /* | |
173 | * Same as left(), but preceed/succeed each line with spaces to make | |
174 | * it n chars long. | |
175 | */ | |
176 | List<String> lines = left(str, n); | |
177 | for (String line: lines) { | |
178 | StringBuilder sb = new StringBuilder(); | |
e820d5dd KL |
179 | int l = (n - width(line)) / 2; |
180 | int r = n - width(line) - l; | |
7657ad8c KL |
181 | for (int i = 0; i < l; i++) { |
182 | sb.append(' '); | |
183 | } | |
184 | sb.append(line); | |
185 | for (int i = 0; i < r; i++) { | |
186 | sb.append(' '); | |
187 | } | |
188 | result.add(sb.toString()); | |
189 | } | |
190 | ||
191 | return result; | |
192 | } | |
193 | ||
194 | /** | |
195 | * Fully-justify a string into a list of lines. | |
196 | * | |
197 | * @param str the string | |
198 | * @param n the maximum number of characters in a line | |
199 | * @return the list of lines | |
200 | */ | |
201 | public static List<String> full(final String str, final int n) { | |
656c0ddd | 202 | List<String> result = new ArrayList<String>(); |
7657ad8c KL |
203 | |
204 | /* | |
205 | * Same as left(), but insert spaces between words to make each line | |
206 | * n chars long. The "algorithm" here is pretty dumb: it performs a | |
207 | * split on space and then re-inserts multiples of n between words. | |
208 | */ | |
209 | List<String> lines = left(str, n); | |
210 | for (int lineI = 0; lineI < lines.size() - 1; lineI++) { | |
211 | String line = lines.get(lineI); | |
212 | String [] words = line.split(" "); | |
213 | if (words.length > 1) { | |
214 | int charCount = 0; | |
215 | for (int i = 0; i < words.length; i++) { | |
216 | charCount += words[i].length(); | |
217 | } | |
218 | int spaceCount = n - charCount; | |
219 | int q = spaceCount / (words.length - 1); | |
220 | int r = spaceCount % (words.length - 1); | |
221 | StringBuilder sb = new StringBuilder(); | |
222 | for (int i = 0; i < words.length - 1; i++) { | |
223 | sb.append(words[i]); | |
224 | for (int j = 0; j < q; j++) { | |
225 | sb.append(' '); | |
226 | } | |
227 | if (r > 0) { | |
228 | sb.append(' '); | |
229 | r--; | |
230 | } | |
231 | } | |
232 | for (int j = 0; j < r; j++) { | |
233 | sb.append(' '); | |
234 | } | |
235 | sb.append(words[words.length - 1]); | |
236 | result.add(sb.toString()); | |
237 | } else { | |
238 | result.add(line); | |
239 | } | |
240 | } | |
241 | if (lines.size() > 0) { | |
242 | result.add(lines.get(lines.size() - 1)); | |
243 | } | |
244 | ||
245 | return result; | |
246 | } | |
247 | ||
d36057df KL |
248 | /** |
249 | * Convert raw strings into escaped strings that be splatted on the | |
250 | * screen. | |
251 | * | |
252 | * @param str the string | |
253 | * @return a string that can be passed into Screen.putStringXY() | |
254 | */ | |
255 | public static String unescape(final String str) { | |
256 | StringBuilder sb = new StringBuilder(); | |
257 | for (int i = 0; i < str.length(); i++) { | |
258 | char ch = str.charAt(i); | |
259 | if ((ch < 0x20) || (ch == 0x7F)) { | |
260 | switch (ch) { | |
261 | case '\b': | |
262 | sb.append("\\b"); | |
263 | continue; | |
264 | case '\f': | |
265 | sb.append("\\f"); | |
266 | continue; | |
267 | case '\n': | |
268 | sb.append("\\n"); | |
269 | continue; | |
270 | case '\r': | |
271 | sb.append("\\r"); | |
272 | continue; | |
273 | case '\t': | |
274 | sb.append("\\t"); | |
275 | continue; | |
276 | case 0x7f: | |
277 | sb.append("^?"); | |
278 | continue; | |
279 | default: | |
280 | sb.append(' '); | |
281 | continue; | |
282 | } | |
283 | } | |
284 | sb.append(ch); | |
285 | } | |
286 | return sb.toString(); | |
287 | } | |
288 | ||
656c0ddd KL |
289 | /** |
290 | * Read a line of RFC4180 comma-separated values (CSV) into a list of | |
291 | * strings. | |
292 | * | |
293 | * @param line the CSV line, with or without without line terminators | |
294 | * @return the list of strings | |
295 | */ | |
296 | public static List<String> fromCsv(final String line) { | |
297 | List<String> result = new ArrayList<String>(); | |
298 | ||
299 | StringBuilder str = new StringBuilder(); | |
300 | boolean quoted = false; | |
301 | boolean fieldQuoted = false; | |
302 | ||
303 | for (int i = 0; i < line.length(); i++) { | |
304 | char ch = line.charAt(i); | |
305 | ||
306 | /* | |
307 | System.err.println("ch '" + ch + "' str '" + str + "' " + | |
308 | " fieldQuoted " + fieldQuoted + " quoted " + quoted); | |
309 | */ | |
310 | ||
311 | if (ch == ',') { | |
312 | if (fieldQuoted && quoted) { | |
313 | // Terminating a quoted field. | |
314 | result.add(str.toString()); | |
315 | str = new StringBuilder(); | |
316 | quoted = false; | |
317 | fieldQuoted = false; | |
318 | } else if (fieldQuoted) { | |
319 | // Still waiting to see the terminating quote for this | |
320 | // field. | |
321 | str.append(ch); | |
322 | } else if (quoted) { | |
323 | // An unmatched double-quote and comma. This should be | |
324 | // an invalid sequence. We will treat it as a quote | |
325 | // terminating the field. | |
326 | str.append('\"'); | |
327 | result.add(str.toString()); | |
328 | str = new StringBuilder(); | |
329 | quoted = false; | |
330 | fieldQuoted = false; | |
331 | } else { | |
332 | // A field separator. | |
333 | result.add(str.toString()); | |
334 | str = new StringBuilder(); | |
335 | quoted = false; | |
336 | fieldQuoted = false; | |
337 | } | |
338 | continue; | |
339 | } | |
340 | ||
341 | if (ch == '\"') { | |
342 | if ((str.length() == 0) && (!fieldQuoted)) { | |
343 | // The opening quote to a quoted field. | |
344 | fieldQuoted = true; | |
345 | } else if (quoted) { | |
346 | // This is a double-quote. | |
347 | str.append('\"'); | |
348 | quoted = false; | |
349 | } else { | |
350 | // This is the beginning of a quote. | |
351 | quoted = true; | |
352 | } | |
353 | continue; | |
354 | } | |
355 | ||
356 | // Normal character, pass it on. | |
357 | str.append(ch); | |
358 | } | |
359 | ||
360 | // Include the final field. | |
361 | result.add(str.toString()); | |
362 | ||
363 | return result; | |
364 | } | |
365 | ||
366 | /** | |
367 | * Write a list of strings to on line of RFC4180 comma-separated values | |
368 | * (CSV). | |
369 | * | |
370 | * @param list the list of strings | |
371 | * @return the CSV line, without any line terminators | |
372 | */ | |
373 | public static String toCsv(final List<String> list) { | |
374 | StringBuilder result = new StringBuilder(); | |
375 | int i = 0; | |
376 | for (String str: list) { | |
377 | ||
378 | if (!str.contains("\"") && !str.contains(",")) { | |
379 | // Just append the string with a comma. | |
380 | result.append(str); | |
381 | } else if (!str.contains("\"") && str.contains(",")) { | |
382 | // Contains commas, but no quotes. Just double-quote it. | |
383 | result.append("\""); | |
384 | result.append(str); | |
385 | result.append("\""); | |
386 | } else if (str.contains("\"")) { | |
387 | // Contains quotes and maybe commas. Double-quote it and | |
388 | // replace quotes inside. | |
389 | result.append("\""); | |
390 | for (int j = 0; j < str.length(); j++) { | |
391 | char ch = str.charAt(j); | |
392 | result.append(ch); | |
393 | if (ch == '\"') { | |
394 | result.append("\""); | |
395 | } | |
396 | } | |
397 | result.append("\""); | |
398 | } | |
399 | ||
400 | if (i < list.size() - 1) { | |
401 | result.append(","); | |
402 | } | |
403 | i++; | |
404 | } | |
405 | return result.toString(); | |
406 | } | |
407 | ||
9588c713 KL |
408 | /** |
409 | * Determine display width of a Unicode code point. | |
410 | * | |
411 | * @param ch the code point, can be char | |
412 | * @return the number of text cell columns required to display this code | |
413 | * point, one of 0, 1, or 2 | |
414 | */ | |
415 | public static int width(final int ch) { | |
416 | /* | |
417 | * This routine is a modified version of mk_wcwidth() available | |
418 | * at: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c | |
419 | * | |
420 | * The combining characters list has been omitted from this | |
421 | * implementation. Hopefully no users will be impacted. | |
422 | */ | |
423 | ||
424 | // 8-bit control characters: width 0 | |
425 | if (ch == 0) { | |
426 | return 0; | |
427 | } | |
428 | if ((ch < 32) || ((ch >= 0x7f) && (ch < 0xa0))) { | |
429 | return 0; | |
430 | } | |
431 | ||
432 | // All others: either 1 or 2 | |
433 | if ((ch >= 0x1100) | |
434 | && ((ch <= 0x115f) | |
435 | // Hangul Jamo init. consonants | |
436 | || (ch == 0x2329) | |
437 | || (ch == 0x232a) | |
438 | // CJK ... Yi | |
439 | || ((ch >= 0x2e80) && (ch <= 0xa4cf) && (ch != 0x303f)) | |
440 | // Hangul Syllables | |
441 | || ((ch >= 0xac00) && (ch <= 0xd7a3)) | |
442 | // CJK Compatibility Ideographs | |
443 | || ((ch >= 0xf900) && (ch <= 0xfaff)) | |
444 | // Vertical forms | |
445 | || ((ch >= 0xfe10) && (ch <= 0xfe19)) | |
446 | // CJK Compatibility Forms | |
447 | || ((ch >= 0xfe30) && (ch <= 0xfe6f)) | |
448 | // Fullwidth Forms | |
449 | || ((ch >= 0xff00) && (ch <= 0xff60)) | |
450 | || ((ch >= 0xffe0) && (ch <= 0xffe6)) | |
451 | || ((ch >= 0x20000) && (ch <= 0x2fffd)) | |
452 | || ((ch >= 0x30000) && (ch <= 0x3fffd)) | |
218d18db | 453 | // emoji |
afdec5e9 | 454 | || ((ch >= 0x1f004) && (ch <= 0x1fffd)) |
9588c713 KL |
455 | ) |
456 | ) { | |
457 | return 2; | |
458 | } | |
459 | return 1; | |
460 | } | |
461 | ||
462 | /** | |
463 | * Determine display width of a string. This ASSUMES that no characters | |
464 | * are combining. Hopefully no users will be impacted. | |
465 | * | |
466 | * @param str the string | |
467 | * @return the number of text cell columns required to display this string | |
468 | */ | |
469 | public static int width(final String str) { | |
470 | int n = 0; | |
218d18db KL |
471 | for (int i = 0; i < str.length();) { |
472 | int ch = str.codePointAt(i); | |
473 | n += width(ch); | |
474 | i += Character.charCount(ch); | |
9588c713 KL |
475 | } |
476 | return n; | |
477 | } | |
478 | ||
66edb445 KL |
479 | /** |
480 | * Check if character is in the CJK range. | |
481 | * | |
482 | * @param ch character to check | |
483 | * @return true if this character is in the CJK range | |
484 | */ | |
485 | public static boolean isCjk(final int ch) { | |
486 | return ((ch >= 0x2e80) && (ch <= 0x9fff)); | |
487 | } | |
488 | ||
489 | /** | |
490 | * Check if character is in the emoji range. | |
491 | * | |
492 | * @param ch character to check | |
493 | * @return true if this character is in the emoji range | |
494 | */ | |
495 | public static boolean isEmoji(final int ch) { | |
496 | return ((ch >= 0x1f004) && (ch <= 0x1fffd)); | |
497 | } | |
498 | ||
34bb6e52 KL |
499 | // ------------------------------------------------------------------------ |
500 | // Base64 ----------------------------------------------------------------- | |
501 | // ------------------------------------------------------------------------ | |
502 | ||
503 | /* | |
504 | * The Base64 encoder/decoder below is provided to support JDK 1.6 - JDK | |
505 | * 11. It was taken from https://sourceforge.net/projects/migbase64/ | |
506 | * | |
507 | * The following changes were made: | |
508 | * | |
509 | * - Code has been indented and long lines cut to fit within 80 columns. | |
510 | * | |
511 | * - Char, String, and "fast" byte functions removed. byte versions | |
512 | * retained and called toBase64()/fromBase64(). | |
513 | * | |
514 | * - Enclosing braces added to blocks. | |
515 | */ | |
516 | ||
517 | /** | |
518 | * A very fast and memory efficient class to encode and decode to and | |
519 | * from BASE64 in full accordance with RFC 2045.<br><br> On Windows XP | |
520 | * sp1 with 1.4.2_04 and later ;), this encoder and decoder is about 10 | |
521 | * times faster on small arrays (10 - 1000 bytes) and 2-3 times as fast | |
522 | * on larger arrays (10000 - 1000000 bytes) compared to | |
523 | * <code>sun.misc.Encoder()/Decoder()</code>.<br><br> | |
524 | * | |
525 | * On byte arrays the encoder is about 20% faster than Jakarta Commons | |
526 | * Base64 Codec for encode and about 50% faster for decoding large | |
527 | * arrays. This implementation is about twice as fast on very small | |
528 | * arrays (< 30 bytes). If source/destination is a <code>String</code> | |
529 | * this version is about three times as fast due to the fact that the | |
530 | * Commons Codec result has to be recoded to a <code>String</code> from | |
531 | * <code>byte[]</code>, which is very expensive.<br><br> | |
532 | * | |
533 | * This encode/decode algorithm doesn't create any temporary arrays as | |
534 | * many other codecs do, it only allocates the resulting array. This | |
535 | * produces less garbage and it is possible to handle arrays twice as | |
536 | * large as algorithms that create a temporary array. (E.g. Jakarta | |
537 | * Commons Codec). It is unknown whether Sun's | |
538 | * <code>sun.misc.Encoder()/Decoder()</code> produce temporary arrays but | |
539 | * since performance is quite low it probably does.<br><br> | |
540 | * | |
541 | * The encoder produces the same output as the Sun one except that the | |
542 | * Sun's encoder appends a trailing line separator if the last character | |
543 | * isn't a pad. Unclear why but it only adds to the length and is | |
544 | * probably a side effect. Both are in conformance with RFC 2045 | |
545 | * though.<br> Commons codec seem to always att a trailing line | |
546 | * separator.<br><br> | |
547 | * | |
548 | * <b>Note!</b> The encode/decode method pairs (types) come in three | |
549 | * versions with the <b>exact</b> same algorithm and thus a lot of code | |
550 | * redundancy. This is to not create any temporary arrays for transcoding | |
551 | * to/from different format types. The methods not used can simply be | |
552 | * commented out.<br><br> | |
553 | * | |
554 | * There is also a "fast" version of all decode methods that works the | |
555 | * same way as the normal ones, but har a few demands on the decoded | |
556 | * input. Normally though, these fast verions should be used if the | |
557 | * source if the input is known and it hasn't bee tampered with.<br><br> | |
558 | * | |
559 | * If you find the code useful or you find a bug, please send me a note | |
560 | * at base64 @ miginfocom . com. | |
561 | * | |
562 | * Licence (BSD): | |
563 | * ============== | |
564 | * | |
565 | * Copyright (c) 2004, Mikael Grev, MiG InfoCom AB. (base64 @ miginfocom | |
566 | * . com) All rights reserved. | |
567 | * | |
568 | * Redistribution and use in source and binary forms, with or without | |
569 | * modification, are permitted provided that the following conditions are | |
570 | * met: Redistributions of source code must retain the above copyright | |
571 | * notice, this list of conditions and the following disclaimer. | |
572 | * Redistributions in binary form must reproduce the above copyright | |
573 | * notice, this list of conditions and the following disclaimer in the | |
574 | * documentation and/or other materials provided with the distribution. | |
575 | * Neither the name of the MiG InfoCom AB nor the names of its | |
576 | * contributors may be used to endorse or promote products derived from | |
577 | * this software without specific prior written permission. | |
578 | * | |
579 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
580 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
581 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
582 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
583 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
584 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
585 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
586 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
587 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
588 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
589 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
590 | * | |
591 | * @version 2.2 | |
592 | * @author Mikael Grev | |
593 | * Date: 2004-aug-02 | |
594 | * Time: 11:31:11 | |
595 | */ | |
596 | ||
597 | private static final char[] CA = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".toCharArray(); | |
598 | private static final int[] IA = new int[256]; | |
599 | static { | |
600 | Arrays.fill(IA, -1); | |
601 | for (int i = 0, iS = CA.length; i < iS; i++) { | |
602 | IA[CA[i]] = i; | |
603 | } | |
604 | IA['='] = 0; | |
605 | } | |
606 | ||
607 | /** | |
608 | * Encodes a raw byte array into a BASE64 <code>byte[]</code> | |
609 | * representation i accordance with RFC 2045. | |
610 | * @param sArr The bytes to convert. If <code>null</code> or length 0 | |
611 | * an empty array will be returned. | |
612 | * @param lineSep Optional "\r\n" after 76 characters, unless end of | |
613 | * file.<br> No line separator will be in breach of RFC 2045 which | |
614 | * specifies max 76 per line but will be a little faster. | |
615 | * @return A BASE64 encoded array. Never <code>null</code>. | |
616 | */ | |
617 | public final static String toBase64(byte[] sArr) { | |
618 | // Check special case | |
619 | int sLen = sArr != null ? sArr.length : 0; | |
620 | if (sLen == 0) { | |
621 | return ""; | |
622 | } | |
623 | ||
624 | final boolean lineSep = true; | |
625 | ||
626 | int eLen = (sLen / 3) * 3; // Length of even 24-bits. | |
627 | int cCnt = ((sLen - 1) / 3 + 1) << 2; // Returned character count | |
628 | int dLen = cCnt + (lineSep ? (cCnt - 1) / 76 << 1 : 0); // Length of returned array | |
629 | byte[] dArr = new byte[dLen]; | |
630 | ||
631 | // Encode even 24-bits | |
632 | for (int s = 0, d = 0, cc = 0; s < eLen;) { | |
633 | // Copy next three bytes into lower 24 bits of int, paying | |
634 | // attension to sign. | |
635 | int i = (sArr[s++] & 0xff) << 16 | (sArr[s++] & 0xff) << 8 | (sArr[s++] & 0xff); | |
636 | ||
637 | // Encode the int into four chars | |
638 | dArr[d++] = (byte) CA[(i >>> 18) & 0x3f]; | |
639 | dArr[d++] = (byte) CA[(i >>> 12) & 0x3f]; | |
640 | dArr[d++] = (byte) CA[(i >>> 6) & 0x3f]; | |
641 | dArr[d++] = (byte) CA[i & 0x3f]; | |
642 | ||
643 | // Add optional line separator | |
644 | if (lineSep && ++cc == 19 && d < dLen - 2) { | |
645 | dArr[d++] = '\r'; | |
646 | dArr[d++] = '\n'; | |
647 | cc = 0; | |
648 | } | |
649 | } | |
650 | ||
651 | // Pad and encode last bits if source isn't an even 24 bits. | |
652 | int left = sLen - eLen; // 0 - 2. | |
653 | if (left > 0) { | |
654 | // Prepare the int | |
655 | int i = ((sArr[eLen] & 0xff) << 10) | (left == 2 ? ((sArr[sLen - 1] & 0xff) << 2) : 0); | |
656 | ||
657 | // Set last four chars | |
658 | dArr[dLen - 4] = (byte) CA[i >> 12]; | |
659 | dArr[dLen - 3] = (byte) CA[(i >>> 6) & 0x3f]; | |
660 | dArr[dLen - 2] = left == 2 ? (byte) CA[i & 0x3f] : (byte) '='; | |
661 | dArr[dLen - 1] = '='; | |
662 | } | |
663 | try { | |
664 | return new String(dArr, "UTF-8"); | |
665 | } catch (java.io.UnsupportedEncodingException e) { | |
666 | throw new IllegalArgumentException(e); | |
667 | } | |
668 | ||
669 | } | |
670 | ||
671 | /** | |
672 | * Decodes a BASE64 encoded byte array. All illegal characters will | |
673 | * be ignored and can handle both arrays with and without line | |
674 | * separators. | |
675 | * @param sArr The source array. Length 0 will return an empty | |
676 | * array. <code>null</code> will throw an exception. | |
677 | * @return The decoded array of bytes. May be of length 0. Will be | |
678 | * <code>null</code> if the legal characters (including '=') isn't | |
679 | * divideable by 4. (I.e. definitely corrupted). | |
680 | */ | |
681 | public final static byte[] fromBase64(byte[] sArr) { | |
682 | // Check special case | |
683 | int sLen = sArr.length; | |
684 | ||
685 | // Count illegal characters (including '\r', '\n') to know what | |
686 | // size the returned array will be, so we don't have to | |
687 | // reallocate & copy it later. | |
688 | int sepCnt = 0; // Number of separator characters. (Actually illegal characters, but that's a bonus...) | |
689 | for (int i = 0; i < sLen; i++) { | |
690 | // If input is "pure" (I.e. no line separators or illegal chars) | |
691 | // base64 this loop can be commented out. | |
692 | if (IA[sArr[i] & 0xff] < 0) { | |
693 | sepCnt++; | |
694 | } | |
695 | } | |
696 | ||
697 | // Check so that legal chars (including '=') are evenly | |
698 | // divideable by 4 as specified in RFC 2045. | |
699 | if ((sLen - sepCnt) % 4 != 0) { | |
700 | return null; | |
701 | } | |
702 | ||
703 | int pad = 0; | |
704 | for (int i = sLen; i > 1 && IA[sArr[--i] & 0xff] <= 0;) { | |
705 | if (sArr[i] == '=') { | |
706 | pad++; | |
707 | } | |
708 | } | |
709 | ||
710 | int len = ((sLen - sepCnt) * 6 >> 3) - pad; | |
711 | ||
712 | byte[] dArr = new byte[len]; // Preallocate byte[] of exact length | |
713 | ||
714 | for (int s = 0, d = 0; d < len;) { | |
715 | // Assemble three bytes into an int from four "valid" characters. | |
716 | int i = 0; | |
717 | for (int j = 0; j < 4; j++) { // j only increased if a valid char was found. | |
718 | int c = IA[sArr[s++] & 0xff]; | |
719 | if (c >= 0) { | |
720 | i |= c << (18 - j * 6); | |
721 | } else { | |
722 | j--; | |
723 | } | |
724 | } | |
725 | ||
726 | // Add the bytes | |
727 | dArr[d++] = (byte) (i >> 16); | |
728 | if (d < len) { | |
729 | dArr[d++]= (byte) (i >> 8); | |
730 | if (d < len) { | |
731 | dArr[d++] = (byte) i; | |
732 | } | |
733 | } | |
734 | } | |
735 | ||
736 | return dArr; | |
737 | } | |
738 | ||
7657ad8c | 739 | } |