Commit | Line | Data |
---|---|---|
7657ad8c KL |
1 | /* |
2 | * Jexer - Java Text User Interface | |
3 | * | |
4 | * The MIT License (MIT) | |
5 | * | |
a69ed767 | 6 | * Copyright (C) 2019 Kevin Lamonte |
7657ad8c KL |
7 | * |
8 | * Permission is hereby granted, free of charge, to any person obtaining a | |
9 | * copy of this software and associated documentation files (the "Software"), | |
10 | * to deal in the Software without restriction, including without limitation | |
11 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
12 | * and/or sell copies of the Software, and to permit persons to whom the | |
13 | * Software is furnished to do so, subject to the following conditions: | |
14 | * | |
15 | * The above copyright notice and this permission notice shall be included in | |
16 | * all copies or substantial portions of the Software. | |
17 | * | |
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
19 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
20 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
21 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
22 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | |
24 | * DEALINGS IN THE SOFTWARE. | |
25 | * | |
26 | * @author Kevin Lamonte [kevin.lamonte@gmail.com] | |
27 | * @version 1 | |
28 | */ | |
29 | package jexer.bits; | |
30 | ||
31 | import java.util.List; | |
656c0ddd | 32 | import java.util.ArrayList; |
34bb6e52 | 33 | import java.util.Arrays; |
7657ad8c KL |
34 | |
35 | /** | |
d36057df KL |
36 | * StringUtils contains methods to: |
37 | * | |
38 | * - Convert one or more long lines of strings into justified text | |
39 | * paragraphs. | |
40 | * | |
41 | * - Unescape C0 control codes. | |
42 | * | |
656c0ddd KL |
43 | * - Read/write a line of RFC4180 comma-separated values strings to/from a |
44 | * list of strings. | |
54eaded0 KL |
45 | * |
46 | * - Compute number of visible text cells for a given Unicode codepoint or | |
47 | * string. | |
48 | * | |
49 | * - Convert bytes to and from base-64 encoding. | |
7657ad8c | 50 | */ |
051e2913 | 51 | public class StringUtils { |
7657ad8c KL |
52 | |
53 | /** | |
54 | * Left-justify a string into a list of lines. | |
55 | * | |
56 | * @param str the string | |
57 | * @param n the maximum number of characters in a line | |
58 | * @return the list of lines | |
59 | */ | |
60 | public static List<String> left(final String str, final int n) { | |
656c0ddd | 61 | List<String> result = new ArrayList<String>(); |
7657ad8c KL |
62 | |
63 | /* | |
64 | * General procedure: | |
65 | * | |
66 | * 1. Split on '\n' into paragraphs. | |
67 | * | |
68 | * 2. Scan each line, noting the position of the last | |
69 | * beginning-of-a-word. | |
70 | * | |
71 | * 3. Chop at the last #2 if the next beginning-of-a-word exceeds | |
72 | * n. | |
73 | * | |
74 | * 4. Return the lines. | |
75 | */ | |
76 | ||
77 | String [] rawLines = str.split("\n"); | |
78 | for (int i = 0; i < rawLines.length; i++) { | |
79 | StringBuilder line = new StringBuilder(); | |
80 | StringBuilder word = new StringBuilder(); | |
81 | boolean inWord = false; | |
82 | for (int j = 0; j < rawLines[i].length(); j++) { | |
83 | char ch = rawLines[i].charAt(j); | |
84 | if ((ch == ' ') || (ch == '\t')) { | |
85 | if (inWord == true) { | |
86 | // We have just transitioned from a word to | |
87 | // whitespace. See if we have enough space to add | |
88 | // the word to the line. | |
e820d5dd | 89 | if (width(word.toString()) + width(line.toString()) > n) { |
7657ad8c KL |
90 | // This word will exceed the line length. Wrap |
91 | // at it instead. | |
92 | result.add(line.toString()); | |
93 | line = new StringBuilder(); | |
94 | } | |
95 | if ((word.toString().startsWith(" ")) | |
e820d5dd | 96 | && (width(line.toString()) == 0) |
7657ad8c KL |
97 | ) { |
98 | line.append(word.substring(1)); | |
99 | } else { | |
100 | line.append(word); | |
101 | } | |
102 | word = new StringBuilder(); | |
103 | word.append(ch); | |
104 | inWord = false; | |
105 | } else { | |
106 | // We are in the whitespace before another word. Do | |
107 | // nothing. | |
108 | } | |
109 | } else { | |
110 | if (inWord == true) { | |
111 | // We are appending to a word. | |
112 | word.append(ch); | |
113 | } else { | |
114 | // We have transitioned from whitespace to a word. | |
115 | word.append(ch); | |
116 | inWord = true; | |
117 | } | |
118 | } | |
119 | } // for (int j = 0; j < rawLines[i].length(); j++) | |
120 | ||
e820d5dd | 121 | if (width(word.toString()) + width(line.toString()) > n) { |
7657ad8c KL |
122 | // This word will exceed the line length. Wrap at it |
123 | // instead. | |
124 | result.add(line.toString()); | |
125 | line = new StringBuilder(); | |
126 | } | |
127 | if ((word.toString().startsWith(" ")) | |
e820d5dd | 128 | && (width(line.toString()) == 0) |
7657ad8c KL |
129 | ) { |
130 | line.append(word.substring(1)); | |
131 | } else { | |
132 | line.append(word); | |
133 | } | |
134 | result.add(line.toString()); | |
135 | } // for (int i = 0; i < rawLines.length; i++) { | |
136 | ||
137 | return result; | |
138 | } | |
139 | ||
140 | /** | |
141 | * Right-justify a string into a list of lines. | |
142 | * | |
143 | * @param str the string | |
144 | * @param n the maximum number of characters in a line | |
145 | * @return the list of lines | |
146 | */ | |
147 | public static List<String> right(final String str, final int n) { | |
656c0ddd | 148 | List<String> result = new ArrayList<String>(); |
7657ad8c KL |
149 | |
150 | /* | |
151 | * Same as left(), but preceed each line with spaces to make it n | |
152 | * chars long. | |
153 | */ | |
154 | List<String> lines = left(str, n); | |
155 | for (String line: lines) { | |
156 | StringBuilder sb = new StringBuilder(); | |
e820d5dd | 157 | for (int i = 0; i < n - width(line); i++) { |
7657ad8c KL |
158 | sb.append(' '); |
159 | } | |
160 | sb.append(line); | |
161 | result.add(sb.toString()); | |
162 | } | |
163 | ||
164 | return result; | |
165 | } | |
166 | ||
167 | /** | |
168 | * Center a string into a list of lines. | |
169 | * | |
170 | * @param str the string | |
171 | * @param n the maximum number of characters in a line | |
172 | * @return the list of lines | |
173 | */ | |
174 | public static List<String> center(final String str, final int n) { | |
656c0ddd | 175 | List<String> result = new ArrayList<String>(); |
7657ad8c KL |
176 | |
177 | /* | |
178 | * Same as left(), but preceed/succeed each line with spaces to make | |
179 | * it n chars long. | |
180 | */ | |
181 | List<String> lines = left(str, n); | |
182 | for (String line: lines) { | |
183 | StringBuilder sb = new StringBuilder(); | |
e820d5dd KL |
184 | int l = (n - width(line)) / 2; |
185 | int r = n - width(line) - l; | |
7657ad8c KL |
186 | for (int i = 0; i < l; i++) { |
187 | sb.append(' '); | |
188 | } | |
189 | sb.append(line); | |
190 | for (int i = 0; i < r; i++) { | |
191 | sb.append(' '); | |
192 | } | |
193 | result.add(sb.toString()); | |
194 | } | |
195 | ||
196 | return result; | |
197 | } | |
198 | ||
199 | /** | |
200 | * Fully-justify a string into a list of lines. | |
201 | * | |
202 | * @param str the string | |
203 | * @param n the maximum number of characters in a line | |
204 | * @return the list of lines | |
205 | */ | |
206 | public static List<String> full(final String str, final int n) { | |
656c0ddd | 207 | List<String> result = new ArrayList<String>(); |
7657ad8c KL |
208 | |
209 | /* | |
210 | * Same as left(), but insert spaces between words to make each line | |
211 | * n chars long. The "algorithm" here is pretty dumb: it performs a | |
212 | * split on space and then re-inserts multiples of n between words. | |
213 | */ | |
214 | List<String> lines = left(str, n); | |
215 | for (int lineI = 0; lineI < lines.size() - 1; lineI++) { | |
216 | String line = lines.get(lineI); | |
217 | String [] words = line.split(" "); | |
218 | if (words.length > 1) { | |
219 | int charCount = 0; | |
220 | for (int i = 0; i < words.length; i++) { | |
221 | charCount += words[i].length(); | |
222 | } | |
223 | int spaceCount = n - charCount; | |
224 | int q = spaceCount / (words.length - 1); | |
225 | int r = spaceCount % (words.length - 1); | |
226 | StringBuilder sb = new StringBuilder(); | |
227 | for (int i = 0; i < words.length - 1; i++) { | |
228 | sb.append(words[i]); | |
229 | for (int j = 0; j < q; j++) { | |
230 | sb.append(' '); | |
231 | } | |
232 | if (r > 0) { | |
233 | sb.append(' '); | |
234 | r--; | |
235 | } | |
236 | } | |
237 | for (int j = 0; j < r; j++) { | |
238 | sb.append(' '); | |
239 | } | |
240 | sb.append(words[words.length - 1]); | |
241 | result.add(sb.toString()); | |
242 | } else { | |
243 | result.add(line); | |
244 | } | |
245 | } | |
246 | if (lines.size() > 0) { | |
247 | result.add(lines.get(lines.size() - 1)); | |
248 | } | |
249 | ||
250 | return result; | |
251 | } | |
252 | ||
d36057df KL |
253 | /** |
254 | * Convert raw strings into escaped strings that be splatted on the | |
255 | * screen. | |
256 | * | |
257 | * @param str the string | |
258 | * @return a string that can be passed into Screen.putStringXY() | |
259 | */ | |
260 | public static String unescape(final String str) { | |
261 | StringBuilder sb = new StringBuilder(); | |
262 | for (int i = 0; i < str.length(); i++) { | |
263 | char ch = str.charAt(i); | |
264 | if ((ch < 0x20) || (ch == 0x7F)) { | |
265 | switch (ch) { | |
266 | case '\b': | |
267 | sb.append("\\b"); | |
268 | continue; | |
269 | case '\f': | |
270 | sb.append("\\f"); | |
271 | continue; | |
272 | case '\n': | |
273 | sb.append("\\n"); | |
274 | continue; | |
275 | case '\r': | |
276 | sb.append("\\r"); | |
277 | continue; | |
278 | case '\t': | |
279 | sb.append("\\t"); | |
280 | continue; | |
281 | case 0x7f: | |
282 | sb.append("^?"); | |
283 | continue; | |
284 | default: | |
285 | sb.append(' '); | |
286 | continue; | |
287 | } | |
288 | } | |
289 | sb.append(ch); | |
290 | } | |
291 | return sb.toString(); | |
292 | } | |
293 | ||
656c0ddd KL |
294 | /** |
295 | * Read a line of RFC4180 comma-separated values (CSV) into a list of | |
296 | * strings. | |
297 | * | |
298 | * @param line the CSV line, with or without without line terminators | |
299 | * @return the list of strings | |
300 | */ | |
301 | public static List<String> fromCsv(final String line) { | |
302 | List<String> result = new ArrayList<String>(); | |
303 | ||
304 | StringBuilder str = new StringBuilder(); | |
305 | boolean quoted = false; | |
306 | boolean fieldQuoted = false; | |
307 | ||
308 | for (int i = 0; i < line.length(); i++) { | |
309 | char ch = line.charAt(i); | |
310 | ||
311 | /* | |
312 | System.err.println("ch '" + ch + "' str '" + str + "' " + | |
313 | " fieldQuoted " + fieldQuoted + " quoted " + quoted); | |
314 | */ | |
315 | ||
316 | if (ch == ',') { | |
317 | if (fieldQuoted && quoted) { | |
318 | // Terminating a quoted field. | |
319 | result.add(str.toString()); | |
320 | str = new StringBuilder(); | |
321 | quoted = false; | |
322 | fieldQuoted = false; | |
323 | } else if (fieldQuoted) { | |
324 | // Still waiting to see the terminating quote for this | |
325 | // field. | |
326 | str.append(ch); | |
327 | } else if (quoted) { | |
328 | // An unmatched double-quote and comma. This should be | |
329 | // an invalid sequence. We will treat it as a quote | |
330 | // terminating the field. | |
331 | str.append('\"'); | |
332 | result.add(str.toString()); | |
333 | str = new StringBuilder(); | |
334 | quoted = false; | |
335 | fieldQuoted = false; | |
336 | } else { | |
337 | // A field separator. | |
338 | result.add(str.toString()); | |
339 | str = new StringBuilder(); | |
340 | quoted = false; | |
341 | fieldQuoted = false; | |
342 | } | |
343 | continue; | |
344 | } | |
345 | ||
346 | if (ch == '\"') { | |
347 | if ((str.length() == 0) && (!fieldQuoted)) { | |
348 | // The opening quote to a quoted field. | |
349 | fieldQuoted = true; | |
350 | } else if (quoted) { | |
351 | // This is a double-quote. | |
352 | str.append('\"'); | |
353 | quoted = false; | |
354 | } else { | |
355 | // This is the beginning of a quote. | |
356 | quoted = true; | |
357 | } | |
358 | continue; | |
359 | } | |
360 | ||
361 | // Normal character, pass it on. | |
362 | str.append(ch); | |
363 | } | |
364 | ||
365 | // Include the final field. | |
366 | result.add(str.toString()); | |
367 | ||
368 | return result; | |
369 | } | |
370 | ||
371 | /** | |
372 | * Write a list of strings to on line of RFC4180 comma-separated values | |
373 | * (CSV). | |
374 | * | |
375 | * @param list the list of strings | |
376 | * @return the CSV line, without any line terminators | |
377 | */ | |
378 | public static String toCsv(final List<String> list) { | |
379 | StringBuilder result = new StringBuilder(); | |
380 | int i = 0; | |
381 | for (String str: list) { | |
382 | ||
383 | if (!str.contains("\"") && !str.contains(",")) { | |
384 | // Just append the string with a comma. | |
385 | result.append(str); | |
386 | } else if (!str.contains("\"") && str.contains(",")) { | |
387 | // Contains commas, but no quotes. Just double-quote it. | |
388 | result.append("\""); | |
389 | result.append(str); | |
390 | result.append("\""); | |
391 | } else if (str.contains("\"")) { | |
392 | // Contains quotes and maybe commas. Double-quote it and | |
393 | // replace quotes inside. | |
394 | result.append("\""); | |
395 | for (int j = 0; j < str.length(); j++) { | |
396 | char ch = str.charAt(j); | |
397 | result.append(ch); | |
398 | if (ch == '\"') { | |
399 | result.append("\""); | |
400 | } | |
401 | } | |
402 | result.append("\""); | |
403 | } | |
404 | ||
405 | if (i < list.size() - 1) { | |
406 | result.append(","); | |
407 | } | |
408 | i++; | |
409 | } | |
410 | return result.toString(); | |
411 | } | |
412 | ||
9588c713 KL |
413 | /** |
414 | * Determine display width of a Unicode code point. | |
415 | * | |
416 | * @param ch the code point, can be char | |
417 | * @return the number of text cell columns required to display this code | |
418 | * point, one of 0, 1, or 2 | |
419 | */ | |
420 | public static int width(final int ch) { | |
421 | /* | |
422 | * This routine is a modified version of mk_wcwidth() available | |
423 | * at: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c | |
424 | * | |
425 | * The combining characters list has been omitted from this | |
426 | * implementation. Hopefully no users will be impacted. | |
427 | */ | |
428 | ||
429 | // 8-bit control characters: width 0 | |
430 | if (ch == 0) { | |
431 | return 0; | |
432 | } | |
433 | if ((ch < 32) || ((ch >= 0x7f) && (ch < 0xa0))) { | |
434 | return 0; | |
435 | } | |
436 | ||
437 | // All others: either 1 or 2 | |
438 | if ((ch >= 0x1100) | |
439 | && ((ch <= 0x115f) | |
440 | // Hangul Jamo init. consonants | |
441 | || (ch == 0x2329) | |
442 | || (ch == 0x232a) | |
443 | // CJK ... Yi | |
444 | || ((ch >= 0x2e80) && (ch <= 0xa4cf) && (ch != 0x303f)) | |
445 | // Hangul Syllables | |
446 | || ((ch >= 0xac00) && (ch <= 0xd7a3)) | |
447 | // CJK Compatibility Ideographs | |
448 | || ((ch >= 0xf900) && (ch <= 0xfaff)) | |
449 | // Vertical forms | |
450 | || ((ch >= 0xfe10) && (ch <= 0xfe19)) | |
451 | // CJK Compatibility Forms | |
452 | || ((ch >= 0xfe30) && (ch <= 0xfe6f)) | |
453 | // Fullwidth Forms | |
454 | || ((ch >= 0xff00) && (ch <= 0xff60)) | |
455 | || ((ch >= 0xffe0) && (ch <= 0xffe6)) | |
456 | || ((ch >= 0x20000) && (ch <= 0x2fffd)) | |
457 | || ((ch >= 0x30000) && (ch <= 0x3fffd)) | |
218d18db | 458 | // emoji |
afdec5e9 | 459 | || ((ch >= 0x1f004) && (ch <= 0x1fffd)) |
9588c713 KL |
460 | ) |
461 | ) { | |
462 | return 2; | |
463 | } | |
464 | return 1; | |
465 | } | |
466 | ||
467 | /** | |
468 | * Determine display width of a string. This ASSUMES that no characters | |
469 | * are combining. Hopefully no users will be impacted. | |
470 | * | |
471 | * @param str the string | |
472 | * @return the number of text cell columns required to display this string | |
473 | */ | |
474 | public static int width(final String str) { | |
4941d2d6 KL |
475 | if (str == null) { |
476 | return 0; | |
477 | } | |
478 | ||
9588c713 | 479 | int n = 0; |
218d18db KL |
480 | for (int i = 0; i < str.length();) { |
481 | int ch = str.codePointAt(i); | |
482 | n += width(ch); | |
483 | i += Character.charCount(ch); | |
9588c713 KL |
484 | } |
485 | return n; | |
486 | } | |
487 | ||
66edb445 KL |
488 | /** |
489 | * Check if character is in the CJK range. | |
490 | * | |
491 | * @param ch character to check | |
492 | * @return true if this character is in the CJK range | |
493 | */ | |
494 | public static boolean isCjk(final int ch) { | |
495 | return ((ch >= 0x2e80) && (ch <= 0x9fff)); | |
496 | } | |
497 | ||
498 | /** | |
499 | * Check if character is in the emoji range. | |
500 | * | |
501 | * @param ch character to check | |
502 | * @return true if this character is in the emoji range | |
503 | */ | |
504 | public static boolean isEmoji(final int ch) { | |
505 | return ((ch >= 0x1f004) && (ch <= 0x1fffd)); | |
506 | } | |
507 | ||
34bb6e52 KL |
508 | // ------------------------------------------------------------------------ |
509 | // Base64 ----------------------------------------------------------------- | |
510 | // ------------------------------------------------------------------------ | |
511 | ||
512 | /* | |
513 | * The Base64 encoder/decoder below is provided to support JDK 1.6 - JDK | |
514 | * 11. It was taken from https://sourceforge.net/projects/migbase64/ | |
515 | * | |
516 | * The following changes were made: | |
517 | * | |
518 | * - Code has been indented and long lines cut to fit within 80 columns. | |
519 | * | |
520 | * - Char, String, and "fast" byte functions removed. byte versions | |
521 | * retained and called toBase64()/fromBase64(). | |
522 | * | |
523 | * - Enclosing braces added to blocks. | |
524 | */ | |
525 | ||
526 | /** | |
527 | * A very fast and memory efficient class to encode and decode to and | |
528 | * from BASE64 in full accordance with RFC 2045.<br><br> On Windows XP | |
529 | * sp1 with 1.4.2_04 and later ;), this encoder and decoder is about 10 | |
530 | * times faster on small arrays (10 - 1000 bytes) and 2-3 times as fast | |
531 | * on larger arrays (10000 - 1000000 bytes) compared to | |
532 | * <code>sun.misc.Encoder()/Decoder()</code>.<br><br> | |
533 | * | |
534 | * On byte arrays the encoder is about 20% faster than Jakarta Commons | |
535 | * Base64 Codec for encode and about 50% faster for decoding large | |
536 | * arrays. This implementation is about twice as fast on very small | |
537 | * arrays (< 30 bytes). If source/destination is a <code>String</code> | |
538 | * this version is about three times as fast due to the fact that the | |
539 | * Commons Codec result has to be recoded to a <code>String</code> from | |
540 | * <code>byte[]</code>, which is very expensive.<br><br> | |
541 | * | |
542 | * This encode/decode algorithm doesn't create any temporary arrays as | |
543 | * many other codecs do, it only allocates the resulting array. This | |
544 | * produces less garbage and it is possible to handle arrays twice as | |
545 | * large as algorithms that create a temporary array. (E.g. Jakarta | |
546 | * Commons Codec). It is unknown whether Sun's | |
547 | * <code>sun.misc.Encoder()/Decoder()</code> produce temporary arrays but | |
548 | * since performance is quite low it probably does.<br><br> | |
549 | * | |
550 | * The encoder produces the same output as the Sun one except that the | |
551 | * Sun's encoder appends a trailing line separator if the last character | |
552 | * isn't a pad. Unclear why but it only adds to the length and is | |
553 | * probably a side effect. Both are in conformance with RFC 2045 | |
554 | * though.<br> Commons codec seem to always att a trailing line | |
555 | * separator.<br><br> | |
556 | * | |
557 | * <b>Note!</b> The encode/decode method pairs (types) come in three | |
558 | * versions with the <b>exact</b> same algorithm and thus a lot of code | |
559 | * redundancy. This is to not create any temporary arrays for transcoding | |
560 | * to/from different format types. The methods not used can simply be | |
561 | * commented out.<br><br> | |
562 | * | |
563 | * There is also a "fast" version of all decode methods that works the | |
564 | * same way as the normal ones, but har a few demands on the decoded | |
565 | * input. Normally though, these fast verions should be used if the | |
566 | * source if the input is known and it hasn't bee tampered with.<br><br> | |
567 | * | |
568 | * If you find the code useful or you find a bug, please send me a note | |
569 | * at base64 @ miginfocom . com. | |
570 | * | |
571 | * Licence (BSD): | |
572 | * ============== | |
573 | * | |
574 | * Copyright (c) 2004, Mikael Grev, MiG InfoCom AB. (base64 @ miginfocom | |
575 | * . com) All rights reserved. | |
576 | * | |
577 | * Redistribution and use in source and binary forms, with or without | |
578 | * modification, are permitted provided that the following conditions are | |
579 | * met: Redistributions of source code must retain the above copyright | |
580 | * notice, this list of conditions and the following disclaimer. | |
581 | * Redistributions in binary form must reproduce the above copyright | |
582 | * notice, this list of conditions and the following disclaimer in the | |
583 | * documentation and/or other materials provided with the distribution. | |
584 | * Neither the name of the MiG InfoCom AB nor the names of its | |
585 | * contributors may be used to endorse or promote products derived from | |
586 | * this software without specific prior written permission. | |
587 | * | |
588 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
589 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
590 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
591 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
592 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
593 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
594 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
595 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
596 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
597 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
598 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
599 | * | |
600 | * @version 2.2 | |
601 | * @author Mikael Grev | |
602 | * Date: 2004-aug-02 | |
603 | * Time: 11:31:11 | |
604 | */ | |
605 | ||
606 | private static final char[] CA = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".toCharArray(); | |
607 | private static final int[] IA = new int[256]; | |
608 | static { | |
609 | Arrays.fill(IA, -1); | |
610 | for (int i = 0, iS = CA.length; i < iS; i++) { | |
611 | IA[CA[i]] = i; | |
612 | } | |
613 | IA['='] = 0; | |
614 | } | |
615 | ||
616 | /** | |
617 | * Encodes a raw byte array into a BASE64 <code>byte[]</code> | |
618 | * representation i accordance with RFC 2045. | |
619 | * @param sArr The bytes to convert. If <code>null</code> or length 0 | |
620 | * an empty array will be returned. | |
34bb6e52 KL |
621 | * @return A BASE64 encoded array. Never <code>null</code>. |
622 | */ | |
623 | public final static String toBase64(byte[] sArr) { | |
624 | // Check special case | |
625 | int sLen = sArr != null ? sArr.length : 0; | |
626 | if (sLen == 0) { | |
627 | return ""; | |
628 | } | |
629 | ||
630 | final boolean lineSep = true; | |
631 | ||
632 | int eLen = (sLen / 3) * 3; // Length of even 24-bits. | |
633 | int cCnt = ((sLen - 1) / 3 + 1) << 2; // Returned character count | |
634 | int dLen = cCnt + (lineSep ? (cCnt - 1) / 76 << 1 : 0); // Length of returned array | |
635 | byte[] dArr = new byte[dLen]; | |
636 | ||
637 | // Encode even 24-bits | |
638 | for (int s = 0, d = 0, cc = 0; s < eLen;) { | |
639 | // Copy next three bytes into lower 24 bits of int, paying | |
640 | // attension to sign. | |
641 | int i = (sArr[s++] & 0xff) << 16 | (sArr[s++] & 0xff) << 8 | (sArr[s++] & 0xff); | |
642 | ||
643 | // Encode the int into four chars | |
644 | dArr[d++] = (byte) CA[(i >>> 18) & 0x3f]; | |
645 | dArr[d++] = (byte) CA[(i >>> 12) & 0x3f]; | |
646 | dArr[d++] = (byte) CA[(i >>> 6) & 0x3f]; | |
647 | dArr[d++] = (byte) CA[i & 0x3f]; | |
648 | ||
649 | // Add optional line separator | |
650 | if (lineSep && ++cc == 19 && d < dLen - 2) { | |
651 | dArr[d++] = '\r'; | |
652 | dArr[d++] = '\n'; | |
653 | cc = 0; | |
654 | } | |
655 | } | |
656 | ||
657 | // Pad and encode last bits if source isn't an even 24 bits. | |
658 | int left = sLen - eLen; // 0 - 2. | |
659 | if (left > 0) { | |
660 | // Prepare the int | |
661 | int i = ((sArr[eLen] & 0xff) << 10) | (left == 2 ? ((sArr[sLen - 1] & 0xff) << 2) : 0); | |
662 | ||
663 | // Set last four chars | |
664 | dArr[dLen - 4] = (byte) CA[i >> 12]; | |
665 | dArr[dLen - 3] = (byte) CA[(i >>> 6) & 0x3f]; | |
666 | dArr[dLen - 2] = left == 2 ? (byte) CA[i & 0x3f] : (byte) '='; | |
667 | dArr[dLen - 1] = '='; | |
668 | } | |
669 | try { | |
670 | return new String(dArr, "UTF-8"); | |
671 | } catch (java.io.UnsupportedEncodingException e) { | |
672 | throw new IllegalArgumentException(e); | |
673 | } | |
674 | ||
675 | } | |
676 | ||
677 | /** | |
678 | * Decodes a BASE64 encoded byte array. All illegal characters will | |
679 | * be ignored and can handle both arrays with and without line | |
680 | * separators. | |
681 | * @param sArr The source array. Length 0 will return an empty | |
682 | * array. <code>null</code> will throw an exception. | |
683 | * @return The decoded array of bytes. May be of length 0. Will be | |
684 | * <code>null</code> if the legal characters (including '=') isn't | |
685 | * divideable by 4. (I.e. definitely corrupted). | |
686 | */ | |
687 | public final static byte[] fromBase64(byte[] sArr) { | |
688 | // Check special case | |
689 | int sLen = sArr.length; | |
690 | ||
691 | // Count illegal characters (including '\r', '\n') to know what | |
692 | // size the returned array will be, so we don't have to | |
693 | // reallocate & copy it later. | |
694 | int sepCnt = 0; // Number of separator characters. (Actually illegal characters, but that's a bonus...) | |
695 | for (int i = 0; i < sLen; i++) { | |
696 | // If input is "pure" (I.e. no line separators or illegal chars) | |
697 | // base64 this loop can be commented out. | |
698 | if (IA[sArr[i] & 0xff] < 0) { | |
699 | sepCnt++; | |
700 | } | |
701 | } | |
702 | ||
703 | // Check so that legal chars (including '=') are evenly | |
704 | // divideable by 4 as specified in RFC 2045. | |
705 | if ((sLen - sepCnt) % 4 != 0) { | |
706 | return null; | |
707 | } | |
708 | ||
709 | int pad = 0; | |
710 | for (int i = sLen; i > 1 && IA[sArr[--i] & 0xff] <= 0;) { | |
711 | if (sArr[i] == '=') { | |
712 | pad++; | |
713 | } | |
714 | } | |
715 | ||
716 | int len = ((sLen - sepCnt) * 6 >> 3) - pad; | |
717 | ||
718 | byte[] dArr = new byte[len]; // Preallocate byte[] of exact length | |
719 | ||
720 | for (int s = 0, d = 0; d < len;) { | |
721 | // Assemble three bytes into an int from four "valid" characters. | |
722 | int i = 0; | |
723 | for (int j = 0; j < 4; j++) { // j only increased if a valid char was found. | |
724 | int c = IA[sArr[s++] & 0xff]; | |
725 | if (c >= 0) { | |
726 | i |= c << (18 - j * 6); | |
727 | } else { | |
728 | j--; | |
729 | } | |
730 | } | |
731 | ||
732 | // Add the bytes | |
733 | dArr[d++] = (byte) (i >> 16); | |
734 | if (d < len) { | |
735 | dArr[d++]= (byte) (i >> 8); | |
736 | if (d < len) { | |
737 | dArr[d++] = (byte) i; | |
738 | } | |
739 | } | |
740 | } | |
741 | ||
742 | return dArr; | |
743 | } | |
744 | ||
7657ad8c | 745 | } |