| 1 | /* |
| 2 | * Jexer - Java Text User Interface |
| 3 | * |
| 4 | * The MIT License (MIT) |
| 5 | * |
| 6 | * Copyright (C) 2019 Kevin Lamonte |
| 7 | * |
| 8 | * Permission is hereby granted, free of charge, to any person obtaining a |
| 9 | * copy of this software and associated documentation files (the "Software"), |
| 10 | * to deal in the Software without restriction, including without limitation |
| 11 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| 12 | * and/or sell copies of the Software, and to permit persons to whom the |
| 13 | * Software is furnished to do so, subject to the following conditions: |
| 14 | * |
| 15 | * The above copyright notice and this permission notice shall be included in |
| 16 | * all copies or substantial portions of the Software. |
| 17 | * |
| 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 19 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 20 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| 21 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 22 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| 23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
| 24 | * DEALINGS IN THE SOFTWARE. |
| 25 | * |
| 26 | * @author Kevin Lamonte [kevin.lamonte@gmail.com] |
| 27 | * @version 1 |
| 28 | */ |
| 29 | package jexer.bits; |
| 30 | |
| 31 | import java.util.List; |
| 32 | import java.util.ArrayList; |
| 33 | import java.util.Arrays; |
| 34 | |
| 35 | /** |
| 36 | * StringUtils contains methods to: |
| 37 | * |
| 38 | * - Convert one or more long lines of strings into justified text |
| 39 | * paragraphs. |
| 40 | * |
| 41 | * - Unescape C0 control codes. |
| 42 | * |
| 43 | * - Read/write a line of RFC4180 comma-separated values strings to/from a |
| 44 | * list of strings. |
| 45 | * |
| 46 | * - Compute number of visible text cells for a given Unicode codepoint or |
| 47 | * string. |
| 48 | * |
| 49 | * - Convert bytes to and from base-64 encoding. |
| 50 | */ |
| 51 | public class StringUtils { |
| 52 | |
| 53 | /** |
| 54 | * Left-justify a string into a list of lines. |
| 55 | * |
| 56 | * @param str the string |
| 57 | * @param n the maximum number of characters in a line |
| 58 | * @return the list of lines |
| 59 | */ |
| 60 | public static List<String> left(final String str, final int n) { |
| 61 | List<String> result = new ArrayList<String>(); |
| 62 | |
| 63 | /* |
| 64 | * General procedure: |
| 65 | * |
| 66 | * 1. Split on '\n' into paragraphs. |
| 67 | * |
| 68 | * 2. Scan each line, noting the position of the last |
| 69 | * beginning-of-a-word. |
| 70 | * |
| 71 | * 3. Chop at the last #2 if the next beginning-of-a-word exceeds |
| 72 | * n. |
| 73 | * |
| 74 | * 4. Return the lines. |
| 75 | */ |
| 76 | |
| 77 | String [] rawLines = str.split("\n"); |
| 78 | for (int i = 0; i < rawLines.length; i++) { |
| 79 | StringBuilder line = new StringBuilder(); |
| 80 | StringBuilder word = new StringBuilder(); |
| 81 | boolean inWord = false; |
| 82 | for (int j = 0; j < rawLines[i].length(); j++) { |
| 83 | char ch = rawLines[i].charAt(j); |
| 84 | if ((ch == ' ') || (ch == '\t')) { |
| 85 | if (inWord == true) { |
| 86 | // We have just transitioned from a word to |
| 87 | // whitespace. See if we have enough space to add |
| 88 | // the word to the line. |
| 89 | if (width(word.toString()) + width(line.toString()) > n) { |
| 90 | // This word will exceed the line length. Wrap |
| 91 | // at it instead. |
| 92 | result.add(line.toString()); |
| 93 | line = new StringBuilder(); |
| 94 | } |
| 95 | if ((word.toString().startsWith(" ")) |
| 96 | && (width(line.toString()) == 0) |
| 97 | ) { |
| 98 | line.append(word.substring(1)); |
| 99 | } else { |
| 100 | line.append(word); |
| 101 | } |
| 102 | word = new StringBuilder(); |
| 103 | word.append(ch); |
| 104 | inWord = false; |
| 105 | } else { |
| 106 | // We are in the whitespace before another word. Do |
| 107 | // nothing. |
| 108 | } |
| 109 | } else { |
| 110 | if (inWord == true) { |
| 111 | // We are appending to a word. |
| 112 | word.append(ch); |
| 113 | } else { |
| 114 | // We have transitioned from whitespace to a word. |
| 115 | word.append(ch); |
| 116 | inWord = true; |
| 117 | } |
| 118 | } |
| 119 | } // for (int j = 0; j < rawLines[i].length(); j++) |
| 120 | |
| 121 | if (width(word.toString()) + width(line.toString()) > n) { |
| 122 | // This word will exceed the line length. Wrap at it |
| 123 | // instead. |
| 124 | result.add(line.toString()); |
| 125 | line = new StringBuilder(); |
| 126 | } |
| 127 | if ((word.toString().startsWith(" ")) |
| 128 | && (width(line.toString()) == 0) |
| 129 | ) { |
| 130 | line.append(word.substring(1)); |
| 131 | } else { |
| 132 | line.append(word); |
| 133 | } |
| 134 | result.add(line.toString()); |
| 135 | } // for (int i = 0; i < rawLines.length; i++) { |
| 136 | |
| 137 | return result; |
| 138 | } |
| 139 | |
| 140 | /** |
| 141 | * Right-justify a string into a list of lines. |
| 142 | * |
| 143 | * @param str the string |
| 144 | * @param n the maximum number of characters in a line |
| 145 | * @return the list of lines |
| 146 | */ |
| 147 | public static List<String> right(final String str, final int n) { |
| 148 | List<String> result = new ArrayList<String>(); |
| 149 | |
| 150 | /* |
| 151 | * Same as left(), but preceed each line with spaces to make it n |
| 152 | * chars long. |
| 153 | */ |
| 154 | List<String> lines = left(str, n); |
| 155 | for (String line: lines) { |
| 156 | StringBuilder sb = new StringBuilder(); |
| 157 | for (int i = 0; i < n - width(line); i++) { |
| 158 | sb.append(' '); |
| 159 | } |
| 160 | sb.append(line); |
| 161 | result.add(sb.toString()); |
| 162 | } |
| 163 | |
| 164 | return result; |
| 165 | } |
| 166 | |
| 167 | /** |
| 168 | * Center a string into a list of lines. |
| 169 | * |
| 170 | * @param str the string |
| 171 | * @param n the maximum number of characters in a line |
| 172 | * @return the list of lines |
| 173 | */ |
| 174 | public static List<String> center(final String str, final int n) { |
| 175 | List<String> result = new ArrayList<String>(); |
| 176 | |
| 177 | /* |
| 178 | * Same as left(), but preceed/succeed each line with spaces to make |
| 179 | * it n chars long. |
| 180 | */ |
| 181 | List<String> lines = left(str, n); |
| 182 | for (String line: lines) { |
| 183 | StringBuilder sb = new StringBuilder(); |
| 184 | int l = (n - width(line)) / 2; |
| 185 | int r = n - width(line) - l; |
| 186 | for (int i = 0; i < l; i++) { |
| 187 | sb.append(' '); |
| 188 | } |
| 189 | sb.append(line); |
| 190 | for (int i = 0; i < r; i++) { |
| 191 | sb.append(' '); |
| 192 | } |
| 193 | result.add(sb.toString()); |
| 194 | } |
| 195 | |
| 196 | return result; |
| 197 | } |
| 198 | |
| 199 | /** |
| 200 | * Fully-justify a string into a list of lines. |
| 201 | * |
| 202 | * @param str the string |
| 203 | * @param n the maximum number of characters in a line |
| 204 | * @return the list of lines |
| 205 | */ |
| 206 | public static List<String> full(final String str, final int n) { |
| 207 | List<String> result = new ArrayList<String>(); |
| 208 | |
| 209 | /* |
| 210 | * Same as left(), but insert spaces between words to make each line |
| 211 | * n chars long. The "algorithm" here is pretty dumb: it performs a |
| 212 | * split on space and then re-inserts multiples of n between words. |
| 213 | */ |
| 214 | List<String> lines = left(str, n); |
| 215 | for (int lineI = 0; lineI < lines.size() - 1; lineI++) { |
| 216 | String line = lines.get(lineI); |
| 217 | String [] words = line.split(" "); |
| 218 | if (words.length > 1) { |
| 219 | int charCount = 0; |
| 220 | for (int i = 0; i < words.length; i++) { |
| 221 | charCount += words[i].length(); |
| 222 | } |
| 223 | int spaceCount = n - charCount; |
| 224 | int q = spaceCount / (words.length - 1); |
| 225 | int r = spaceCount % (words.length - 1); |
| 226 | StringBuilder sb = new StringBuilder(); |
| 227 | for (int i = 0; i < words.length - 1; i++) { |
| 228 | sb.append(words[i]); |
| 229 | for (int j = 0; j < q; j++) { |
| 230 | sb.append(' '); |
| 231 | } |
| 232 | if (r > 0) { |
| 233 | sb.append(' '); |
| 234 | r--; |
| 235 | } |
| 236 | } |
| 237 | for (int j = 0; j < r; j++) { |
| 238 | sb.append(' '); |
| 239 | } |
| 240 | sb.append(words[words.length - 1]); |
| 241 | result.add(sb.toString()); |
| 242 | } else { |
| 243 | result.add(line); |
| 244 | } |
| 245 | } |
| 246 | if (lines.size() > 0) { |
| 247 | result.add(lines.get(lines.size() - 1)); |
| 248 | } |
| 249 | |
| 250 | return result; |
| 251 | } |
| 252 | |
| 253 | /** |
| 254 | * Convert raw strings into escaped strings that be splatted on the |
| 255 | * screen. |
| 256 | * |
| 257 | * @param str the string |
| 258 | * @return a string that can be passed into Screen.putStringXY() |
| 259 | */ |
| 260 | public static String unescape(final String str) { |
| 261 | StringBuilder sb = new StringBuilder(); |
| 262 | for (int i = 0; i < str.length(); i++) { |
| 263 | char ch = str.charAt(i); |
| 264 | if ((ch < 0x20) || (ch == 0x7F)) { |
| 265 | switch (ch) { |
| 266 | case '\b': |
| 267 | sb.append("\\b"); |
| 268 | continue; |
| 269 | case '\f': |
| 270 | sb.append("\\f"); |
| 271 | continue; |
| 272 | case '\n': |
| 273 | sb.append("\\n"); |
| 274 | continue; |
| 275 | case '\r': |
| 276 | sb.append("\\r"); |
| 277 | continue; |
| 278 | case '\t': |
| 279 | sb.append("\\t"); |
| 280 | continue; |
| 281 | case 0x7f: |
| 282 | sb.append("^?"); |
| 283 | continue; |
| 284 | default: |
| 285 | sb.append(' '); |
| 286 | continue; |
| 287 | } |
| 288 | } |
| 289 | sb.append(ch); |
| 290 | } |
| 291 | return sb.toString(); |
| 292 | } |
| 293 | |
| 294 | /** |
| 295 | * Read a line of RFC4180 comma-separated values (CSV) into a list of |
| 296 | * strings. |
| 297 | * |
| 298 | * @param line the CSV line, with or without without line terminators |
| 299 | * @return the list of strings |
| 300 | */ |
| 301 | public static List<String> fromCsv(final String line) { |
| 302 | List<String> result = new ArrayList<String>(); |
| 303 | |
| 304 | StringBuilder str = new StringBuilder(); |
| 305 | boolean quoted = false; |
| 306 | boolean fieldQuoted = false; |
| 307 | |
| 308 | for (int i = 0; i < line.length(); i++) { |
| 309 | char ch = line.charAt(i); |
| 310 | |
| 311 | /* |
| 312 | System.err.println("ch '" + ch + "' str '" + str + "' " + |
| 313 | " fieldQuoted " + fieldQuoted + " quoted " + quoted); |
| 314 | */ |
| 315 | |
| 316 | if (ch == ',') { |
| 317 | if (fieldQuoted && quoted) { |
| 318 | // Terminating a quoted field. |
| 319 | result.add(str.toString()); |
| 320 | str = new StringBuilder(); |
| 321 | quoted = false; |
| 322 | fieldQuoted = false; |
| 323 | } else if (fieldQuoted) { |
| 324 | // Still waiting to see the terminating quote for this |
| 325 | // field. |
| 326 | str.append(ch); |
| 327 | } else if (quoted) { |
| 328 | // An unmatched double-quote and comma. This should be |
| 329 | // an invalid sequence. We will treat it as a quote |
| 330 | // terminating the field. |
| 331 | str.append('\"'); |
| 332 | result.add(str.toString()); |
| 333 | str = new StringBuilder(); |
| 334 | quoted = false; |
| 335 | fieldQuoted = false; |
| 336 | } else { |
| 337 | // A field separator. |
| 338 | result.add(str.toString()); |
| 339 | str = new StringBuilder(); |
| 340 | quoted = false; |
| 341 | fieldQuoted = false; |
| 342 | } |
| 343 | continue; |
| 344 | } |
| 345 | |
| 346 | if (ch == '\"') { |
| 347 | if ((str.length() == 0) && (!fieldQuoted)) { |
| 348 | // The opening quote to a quoted field. |
| 349 | fieldQuoted = true; |
| 350 | } else if (quoted) { |
| 351 | // This is a double-quote. |
| 352 | str.append('\"'); |
| 353 | quoted = false; |
| 354 | } else { |
| 355 | // This is the beginning of a quote. |
| 356 | quoted = true; |
| 357 | } |
| 358 | continue; |
| 359 | } |
| 360 | |
| 361 | // Normal character, pass it on. |
| 362 | str.append(ch); |
| 363 | } |
| 364 | |
| 365 | // Include the final field. |
| 366 | result.add(str.toString()); |
| 367 | |
| 368 | return result; |
| 369 | } |
| 370 | |
| 371 | /** |
| 372 | * Write a list of strings to on line of RFC4180 comma-separated values |
| 373 | * (CSV). |
| 374 | * |
| 375 | * @param list the list of strings |
| 376 | * @return the CSV line, without any line terminators |
| 377 | */ |
| 378 | public static String toCsv(final List<String> list) { |
| 379 | StringBuilder result = new StringBuilder(); |
| 380 | int i = 0; |
| 381 | for (String str: list) { |
| 382 | |
| 383 | if (!str.contains("\"") && !str.contains(",")) { |
| 384 | // Just append the string with a comma. |
| 385 | result.append(str); |
| 386 | } else if (!str.contains("\"") && str.contains(",")) { |
| 387 | // Contains commas, but no quotes. Just double-quote it. |
| 388 | result.append("\""); |
| 389 | result.append(str); |
| 390 | result.append("\""); |
| 391 | } else if (str.contains("\"")) { |
| 392 | // Contains quotes and maybe commas. Double-quote it and |
| 393 | // replace quotes inside. |
| 394 | result.append("\""); |
| 395 | for (int j = 0; j < str.length(); j++) { |
| 396 | char ch = str.charAt(j); |
| 397 | result.append(ch); |
| 398 | if (ch == '\"') { |
| 399 | result.append("\""); |
| 400 | } |
| 401 | } |
| 402 | result.append("\""); |
| 403 | } |
| 404 | |
| 405 | if (i < list.size() - 1) { |
| 406 | result.append(","); |
| 407 | } |
| 408 | i++; |
| 409 | } |
| 410 | return result.toString(); |
| 411 | } |
| 412 | |
| 413 | /** |
| 414 | * Determine display width of a Unicode code point. |
| 415 | * |
| 416 | * @param ch the code point, can be char |
| 417 | * @return the number of text cell columns required to display this code |
| 418 | * point, one of 0, 1, or 2 |
| 419 | */ |
| 420 | public static int width(final int ch) { |
| 421 | /* |
| 422 | * This routine is a modified version of mk_wcwidth() available |
| 423 | * at: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c |
| 424 | * |
| 425 | * The combining characters list has been omitted from this |
| 426 | * implementation. Hopefully no users will be impacted. |
| 427 | */ |
| 428 | |
| 429 | // 8-bit control characters: width 0 |
| 430 | if (ch == 0) { |
| 431 | return 0; |
| 432 | } |
| 433 | if ((ch < 32) || ((ch >= 0x7f) && (ch < 0xa0))) { |
| 434 | return 0; |
| 435 | } |
| 436 | |
| 437 | // All others: either 1 or 2 |
| 438 | if ((ch >= 0x1100) |
| 439 | && ((ch <= 0x115f) |
| 440 | // Hangul Jamo init. consonants |
| 441 | || (ch == 0x2329) |
| 442 | || (ch == 0x232a) |
| 443 | // CJK ... Yi |
| 444 | || ((ch >= 0x2e80) && (ch <= 0xa4cf) && (ch != 0x303f)) |
| 445 | // Hangul Syllables |
| 446 | || ((ch >= 0xac00) && (ch <= 0xd7a3)) |
| 447 | // CJK Compatibility Ideographs |
| 448 | || ((ch >= 0xf900) && (ch <= 0xfaff)) |
| 449 | // Vertical forms |
| 450 | || ((ch >= 0xfe10) && (ch <= 0xfe19)) |
| 451 | // CJK Compatibility Forms |
| 452 | || ((ch >= 0xfe30) && (ch <= 0xfe6f)) |
| 453 | // Fullwidth Forms |
| 454 | || ((ch >= 0xff00) && (ch <= 0xff60)) |
| 455 | || ((ch >= 0xffe0) && (ch <= 0xffe6)) |
| 456 | || ((ch >= 0x20000) && (ch <= 0x2fffd)) |
| 457 | || ((ch >= 0x30000) && (ch <= 0x3fffd)) |
| 458 | // emoji |
| 459 | || ((ch >= 0x1f004) && (ch <= 0x1fffd)) |
| 460 | ) |
| 461 | ) { |
| 462 | return 2; |
| 463 | } |
| 464 | return 1; |
| 465 | } |
| 466 | |
| 467 | /** |
| 468 | * Determine display width of a string. This ASSUMES that no characters |
| 469 | * are combining. Hopefully no users will be impacted. |
| 470 | * |
| 471 | * @param str the string |
| 472 | * @return the number of text cell columns required to display this string |
| 473 | */ |
| 474 | public static int width(final String str) { |
| 475 | if (str == null) { |
| 476 | return 0; |
| 477 | } |
| 478 | |
| 479 | int n = 0; |
| 480 | for (int i = 0; i < str.length();) { |
| 481 | int ch = str.codePointAt(i); |
| 482 | n += width(ch); |
| 483 | i += Character.charCount(ch); |
| 484 | } |
| 485 | return n; |
| 486 | } |
| 487 | |
| 488 | /** |
| 489 | * Check if character is in the CJK range. |
| 490 | * |
| 491 | * @param ch character to check |
| 492 | * @return true if this character is in the CJK range |
| 493 | */ |
| 494 | public static boolean isCjk(final int ch) { |
| 495 | return ((ch >= 0x2e80) && (ch <= 0x9fff)); |
| 496 | } |
| 497 | |
| 498 | /** |
| 499 | * Check if character is in the emoji range. |
| 500 | * |
| 501 | * @param ch character to check |
| 502 | * @return true if this character is in the emoji range |
| 503 | */ |
| 504 | public static boolean isEmoji(final int ch) { |
| 505 | return ((ch >= 0x1f004) && (ch <= 0x1fffd)); |
| 506 | } |
| 507 | |
| 508 | // ------------------------------------------------------------------------ |
| 509 | // Base64 ----------------------------------------------------------------- |
| 510 | // ------------------------------------------------------------------------ |
| 511 | |
| 512 | /* |
| 513 | * The Base64 encoder/decoder below is provided to support JDK 1.6 - JDK |
| 514 | * 11. It was taken from https://sourceforge.net/projects/migbase64/ |
| 515 | * |
| 516 | * The following changes were made: |
| 517 | * |
| 518 | * - Code has been indented and long lines cut to fit within 80 columns. |
| 519 | * |
| 520 | * - Char, String, and "fast" byte functions removed. byte versions |
| 521 | * retained and called toBase64()/fromBase64(). |
| 522 | * |
| 523 | * - Enclosing braces added to blocks. |
| 524 | */ |
| 525 | |
| 526 | /** |
| 527 | * A very fast and memory efficient class to encode and decode to and |
| 528 | * from BASE64 in full accordance with RFC 2045.<br><br> On Windows XP |
| 529 | * sp1 with 1.4.2_04 and later ;), this encoder and decoder is about 10 |
| 530 | * times faster on small arrays (10 - 1000 bytes) and 2-3 times as fast |
| 531 | * on larger arrays (10000 - 1000000 bytes) compared to |
| 532 | * <code>sun.misc.Encoder()/Decoder()</code>.<br><br> |
| 533 | * |
| 534 | * On byte arrays the encoder is about 20% faster than Jakarta Commons |
| 535 | * Base64 Codec for encode and about 50% faster for decoding large |
| 536 | * arrays. This implementation is about twice as fast on very small |
| 537 | * arrays (< 30 bytes). If source/destination is a <code>String</code> |
| 538 | * this version is about three times as fast due to the fact that the |
| 539 | * Commons Codec result has to be recoded to a <code>String</code> from |
| 540 | * <code>byte[]</code>, which is very expensive.<br><br> |
| 541 | * |
| 542 | * This encode/decode algorithm doesn't create any temporary arrays as |
| 543 | * many other codecs do, it only allocates the resulting array. This |
| 544 | * produces less garbage and it is possible to handle arrays twice as |
| 545 | * large as algorithms that create a temporary array. (E.g. Jakarta |
| 546 | * Commons Codec). It is unknown whether Sun's |
| 547 | * <code>sun.misc.Encoder()/Decoder()</code> produce temporary arrays but |
| 548 | * since performance is quite low it probably does.<br><br> |
| 549 | * |
| 550 | * The encoder produces the same output as the Sun one except that the |
| 551 | * Sun's encoder appends a trailing line separator if the last character |
| 552 | * isn't a pad. Unclear why but it only adds to the length and is |
| 553 | * probably a side effect. Both are in conformance with RFC 2045 |
| 554 | * though.<br> Commons codec seem to always att a trailing line |
| 555 | * separator.<br><br> |
| 556 | * |
| 557 | * <b>Note!</b> The encode/decode method pairs (types) come in three |
| 558 | * versions with the <b>exact</b> same algorithm and thus a lot of code |
| 559 | * redundancy. This is to not create any temporary arrays for transcoding |
| 560 | * to/from different format types. The methods not used can simply be |
| 561 | * commented out.<br><br> |
| 562 | * |
| 563 | * There is also a "fast" version of all decode methods that works the |
| 564 | * same way as the normal ones, but har a few demands on the decoded |
| 565 | * input. Normally though, these fast verions should be used if the |
| 566 | * source if the input is known and it hasn't bee tampered with.<br><br> |
| 567 | * |
| 568 | * If you find the code useful or you find a bug, please send me a note |
| 569 | * at base64 @ miginfocom . com. |
| 570 | * |
| 571 | * Licence (BSD): |
| 572 | * ============== |
| 573 | * |
| 574 | * Copyright (c) 2004, Mikael Grev, MiG InfoCom AB. (base64 @ miginfocom |
| 575 | * . com) All rights reserved. |
| 576 | * |
| 577 | * Redistribution and use in source and binary forms, with or without |
| 578 | * modification, are permitted provided that the following conditions are |
| 579 | * met: Redistributions of source code must retain the above copyright |
| 580 | * notice, this list of conditions and the following disclaimer. |
| 581 | * Redistributions in binary form must reproduce the above copyright |
| 582 | * notice, this list of conditions and the following disclaimer in the |
| 583 | * documentation and/or other materials provided with the distribution. |
| 584 | * Neither the name of the MiG InfoCom AB nor the names of its |
| 585 | * contributors may be used to endorse or promote products derived from |
| 586 | * this software without specific prior written permission. |
| 587 | * |
| 588 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 589 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 590 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 591 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 592 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 593 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 594 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 595 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 596 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 597 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 598 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 599 | * |
| 600 | * @version 2.2 |
| 601 | * @author Mikael Grev |
| 602 | * Date: 2004-aug-02 |
| 603 | * Time: 11:31:11 |
| 604 | */ |
| 605 | |
| 606 | private static final char[] CA = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".toCharArray(); |
| 607 | private static final int[] IA = new int[256]; |
| 608 | static { |
| 609 | Arrays.fill(IA, -1); |
| 610 | for (int i = 0, iS = CA.length; i < iS; i++) { |
| 611 | IA[CA[i]] = i; |
| 612 | } |
| 613 | IA['='] = 0; |
| 614 | } |
| 615 | |
| 616 | /** |
| 617 | * Encodes a raw byte array into a BASE64 <code>byte[]</code> |
| 618 | * representation i accordance with RFC 2045. |
| 619 | * @param sArr The bytes to convert. If <code>null</code> or length 0 |
| 620 | * an empty array will be returned. |
| 621 | * @return A BASE64 encoded array. Never <code>null</code>. |
| 622 | */ |
| 623 | public final static String toBase64(byte[] sArr) { |
| 624 | // Check special case |
| 625 | int sLen = sArr != null ? sArr.length : 0; |
| 626 | if (sLen == 0) { |
| 627 | return ""; |
| 628 | } |
| 629 | |
| 630 | final boolean lineSep = true; |
| 631 | |
| 632 | int eLen = (sLen / 3) * 3; // Length of even 24-bits. |
| 633 | int cCnt = ((sLen - 1) / 3 + 1) << 2; // Returned character count |
| 634 | int dLen = cCnt + (lineSep ? (cCnt - 1) / 76 << 1 : 0); // Length of returned array |
| 635 | byte[] dArr = new byte[dLen]; |
| 636 | |
| 637 | // Encode even 24-bits |
| 638 | for (int s = 0, d = 0, cc = 0; s < eLen;) { |
| 639 | // Copy next three bytes into lower 24 bits of int, paying |
| 640 | // attension to sign. |
| 641 | int i = (sArr[s++] & 0xff) << 16 | (sArr[s++] & 0xff) << 8 | (sArr[s++] & 0xff); |
| 642 | |
| 643 | // Encode the int into four chars |
| 644 | dArr[d++] = (byte) CA[(i >>> 18) & 0x3f]; |
| 645 | dArr[d++] = (byte) CA[(i >>> 12) & 0x3f]; |
| 646 | dArr[d++] = (byte) CA[(i >>> 6) & 0x3f]; |
| 647 | dArr[d++] = (byte) CA[i & 0x3f]; |
| 648 | |
| 649 | // Add optional line separator |
| 650 | if (lineSep && ++cc == 19 && d < dLen - 2) { |
| 651 | dArr[d++] = '\r'; |
| 652 | dArr[d++] = '\n'; |
| 653 | cc = 0; |
| 654 | } |
| 655 | } |
| 656 | |
| 657 | // Pad and encode last bits if source isn't an even 24 bits. |
| 658 | int left = sLen - eLen; // 0 - 2. |
| 659 | if (left > 0) { |
| 660 | // Prepare the int |
| 661 | int i = ((sArr[eLen] & 0xff) << 10) | (left == 2 ? ((sArr[sLen - 1] & 0xff) << 2) : 0); |
| 662 | |
| 663 | // Set last four chars |
| 664 | dArr[dLen - 4] = (byte) CA[i >> 12]; |
| 665 | dArr[dLen - 3] = (byte) CA[(i >>> 6) & 0x3f]; |
| 666 | dArr[dLen - 2] = left == 2 ? (byte) CA[i & 0x3f] : (byte) '='; |
| 667 | dArr[dLen - 1] = '='; |
| 668 | } |
| 669 | try { |
| 670 | return new String(dArr, "UTF-8"); |
| 671 | } catch (java.io.UnsupportedEncodingException e) { |
| 672 | throw new IllegalArgumentException(e); |
| 673 | } |
| 674 | |
| 675 | } |
| 676 | |
| 677 | /** |
| 678 | * Decodes a BASE64 encoded byte array. All illegal characters will |
| 679 | * be ignored and can handle both arrays with and without line |
| 680 | * separators. |
| 681 | * @param sArr The source array. Length 0 will return an empty |
| 682 | * array. <code>null</code> will throw an exception. |
| 683 | * @return The decoded array of bytes. May be of length 0. Will be |
| 684 | * <code>null</code> if the legal characters (including '=') isn't |
| 685 | * divideable by 4. (I.e. definitely corrupted). |
| 686 | */ |
| 687 | public final static byte[] fromBase64(byte[] sArr) { |
| 688 | // Check special case |
| 689 | int sLen = sArr.length; |
| 690 | |
| 691 | // Count illegal characters (including '\r', '\n') to know what |
| 692 | // size the returned array will be, so we don't have to |
| 693 | // reallocate & copy it later. |
| 694 | int sepCnt = 0; // Number of separator characters. (Actually illegal characters, but that's a bonus...) |
| 695 | for (int i = 0; i < sLen; i++) { |
| 696 | // If input is "pure" (I.e. no line separators or illegal chars) |
| 697 | // base64 this loop can be commented out. |
| 698 | if (IA[sArr[i] & 0xff] < 0) { |
| 699 | sepCnt++; |
| 700 | } |
| 701 | } |
| 702 | |
| 703 | // Check so that legal chars (including '=') are evenly |
| 704 | // divideable by 4 as specified in RFC 2045. |
| 705 | if ((sLen - sepCnt) % 4 != 0) { |
| 706 | return null; |
| 707 | } |
| 708 | |
| 709 | int pad = 0; |
| 710 | for (int i = sLen; i > 1 && IA[sArr[--i] & 0xff] <= 0;) { |
| 711 | if (sArr[i] == '=') { |
| 712 | pad++; |
| 713 | } |
| 714 | } |
| 715 | |
| 716 | int len = ((sLen - sepCnt) * 6 >> 3) - pad; |
| 717 | |
| 718 | byte[] dArr = new byte[len]; // Preallocate byte[] of exact length |
| 719 | |
| 720 | for (int s = 0, d = 0; d < len;) { |
| 721 | // Assemble three bytes into an int from four "valid" characters. |
| 722 | int i = 0; |
| 723 | for (int j = 0; j < 4; j++) { // j only increased if a valid char was found. |
| 724 | int c = IA[sArr[s++] & 0xff]; |
| 725 | if (c >= 0) { |
| 726 | i |= c << (18 - j * 6); |
| 727 | } else { |
| 728 | j--; |
| 729 | } |
| 730 | } |
| 731 | |
| 732 | // Add the bytes |
| 733 | dArr[d++] = (byte) (i >> 16); |
| 734 | if (d < len) { |
| 735 | dArr[d++]= (byte) (i >> 8); |
| 736 | if (d < len) { |
| 737 | dArr[d++] = (byte) i; |
| 738 | } |
| 739 | } |
| 740 | } |
| 741 | |
| 742 | return dArr; |
| 743 | } |
| 744 | |
| 745 | } |