| 1 | /* |
| 2 | * Jexer - Java Text User Interface |
| 3 | * |
| 4 | * The MIT License (MIT) |
| 5 | * |
| 6 | * Copyright (C) 2019 Kevin Lamonte |
| 7 | * |
| 8 | * Permission is hereby granted, free of charge, to any person obtaining a |
| 9 | * copy of this software and associated documentation files (the "Software"), |
| 10 | * to deal in the Software without restriction, including without limitation |
| 11 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| 12 | * and/or sell copies of the Software, and to permit persons to whom the |
| 13 | * Software is furnished to do so, subject to the following conditions: |
| 14 | * |
| 15 | * The above copyright notice and this permission notice shall be included in |
| 16 | * all copies or substantial portions of the Software. |
| 17 | * |
| 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 19 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 20 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| 21 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 22 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| 23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
| 24 | * DEALINGS IN THE SOFTWARE. |
| 25 | * |
| 26 | * @author Kevin Lamonte [kevin.lamonte@gmail.com] |
| 27 | * @version 1 |
| 28 | */ |
| 29 | package jexer.bits; |
| 30 | |
| 31 | import java.util.List; |
| 32 | import java.util.ArrayList; |
| 33 | import java.util.Arrays; |
| 34 | |
| 35 | /** |
| 36 | * StringUtils contains methods to: |
| 37 | * |
| 38 | * - Convert one or more long lines of strings into justified text |
| 39 | * paragraphs. |
| 40 | * |
| 41 | * - Unescape C0 control codes. |
| 42 | * |
| 43 | * - Read/write a line of RFC4180 comma-separated values strings to/from a |
| 44 | * list of strings. |
| 45 | */ |
| 46 | public class StringUtils { |
| 47 | |
| 48 | /** |
| 49 | * Left-justify a string into a list of lines. |
| 50 | * |
| 51 | * @param str the string |
| 52 | * @param n the maximum number of characters in a line |
| 53 | * @return the list of lines |
| 54 | */ |
| 55 | public static List<String> left(final String str, final int n) { |
| 56 | List<String> result = new ArrayList<String>(); |
| 57 | |
| 58 | /* |
| 59 | * General procedure: |
| 60 | * |
| 61 | * 1. Split on '\n' into paragraphs. |
| 62 | * |
| 63 | * 2. Scan each line, noting the position of the last |
| 64 | * beginning-of-a-word. |
| 65 | * |
| 66 | * 3. Chop at the last #2 if the next beginning-of-a-word exceeds |
| 67 | * n. |
| 68 | * |
| 69 | * 4. Return the lines. |
| 70 | */ |
| 71 | |
| 72 | String [] rawLines = str.split("\n"); |
| 73 | for (int i = 0; i < rawLines.length; i++) { |
| 74 | StringBuilder line = new StringBuilder(); |
| 75 | StringBuilder word = new StringBuilder(); |
| 76 | boolean inWord = false; |
| 77 | for (int j = 0; j < rawLines[i].length(); j++) { |
| 78 | char ch = rawLines[i].charAt(j); |
| 79 | if ((ch == ' ') || (ch == '\t')) { |
| 80 | if (inWord == true) { |
| 81 | // We have just transitioned from a word to |
| 82 | // whitespace. See if we have enough space to add |
| 83 | // the word to the line. |
| 84 | if (width(word.toString()) + width(line.toString()) > n) { |
| 85 | // This word will exceed the line length. Wrap |
| 86 | // at it instead. |
| 87 | result.add(line.toString()); |
| 88 | line = new StringBuilder(); |
| 89 | } |
| 90 | if ((word.toString().startsWith(" ")) |
| 91 | && (width(line.toString()) == 0) |
| 92 | ) { |
| 93 | line.append(word.substring(1)); |
| 94 | } else { |
| 95 | line.append(word); |
| 96 | } |
| 97 | word = new StringBuilder(); |
| 98 | word.append(ch); |
| 99 | inWord = false; |
| 100 | } else { |
| 101 | // We are in the whitespace before another word. Do |
| 102 | // nothing. |
| 103 | } |
| 104 | } else { |
| 105 | if (inWord == true) { |
| 106 | // We are appending to a word. |
| 107 | word.append(ch); |
| 108 | } else { |
| 109 | // We have transitioned from whitespace to a word. |
| 110 | word.append(ch); |
| 111 | inWord = true; |
| 112 | } |
| 113 | } |
| 114 | } // for (int j = 0; j < rawLines[i].length(); j++) |
| 115 | |
| 116 | if (width(word.toString()) + width(line.toString()) > n) { |
| 117 | // This word will exceed the line length. Wrap at it |
| 118 | // instead. |
| 119 | result.add(line.toString()); |
| 120 | line = new StringBuilder(); |
| 121 | } |
| 122 | if ((word.toString().startsWith(" ")) |
| 123 | && (width(line.toString()) == 0) |
| 124 | ) { |
| 125 | line.append(word.substring(1)); |
| 126 | } else { |
| 127 | line.append(word); |
| 128 | } |
| 129 | result.add(line.toString()); |
| 130 | } // for (int i = 0; i < rawLines.length; i++) { |
| 131 | |
| 132 | return result; |
| 133 | } |
| 134 | |
| 135 | /** |
| 136 | * Right-justify a string into a list of lines. |
| 137 | * |
| 138 | * @param str the string |
| 139 | * @param n the maximum number of characters in a line |
| 140 | * @return the list of lines |
| 141 | */ |
| 142 | public static List<String> right(final String str, final int n) { |
| 143 | List<String> result = new ArrayList<String>(); |
| 144 | |
| 145 | /* |
| 146 | * Same as left(), but preceed each line with spaces to make it n |
| 147 | * chars long. |
| 148 | */ |
| 149 | List<String> lines = left(str, n); |
| 150 | for (String line: lines) { |
| 151 | StringBuilder sb = new StringBuilder(); |
| 152 | for (int i = 0; i < n - width(line); i++) { |
| 153 | sb.append(' '); |
| 154 | } |
| 155 | sb.append(line); |
| 156 | result.add(sb.toString()); |
| 157 | } |
| 158 | |
| 159 | return result; |
| 160 | } |
| 161 | |
| 162 | /** |
| 163 | * Center a string into a list of lines. |
| 164 | * |
| 165 | * @param str the string |
| 166 | * @param n the maximum number of characters in a line |
| 167 | * @return the list of lines |
| 168 | */ |
| 169 | public static List<String> center(final String str, final int n) { |
| 170 | List<String> result = new ArrayList<String>(); |
| 171 | |
| 172 | /* |
| 173 | * Same as left(), but preceed/succeed each line with spaces to make |
| 174 | * it n chars long. |
| 175 | */ |
| 176 | List<String> lines = left(str, n); |
| 177 | for (String line: lines) { |
| 178 | StringBuilder sb = new StringBuilder(); |
| 179 | int l = (n - width(line)) / 2; |
| 180 | int r = n - width(line) - l; |
| 181 | for (int i = 0; i < l; i++) { |
| 182 | sb.append(' '); |
| 183 | } |
| 184 | sb.append(line); |
| 185 | for (int i = 0; i < r; i++) { |
| 186 | sb.append(' '); |
| 187 | } |
| 188 | result.add(sb.toString()); |
| 189 | } |
| 190 | |
| 191 | return result; |
| 192 | } |
| 193 | |
| 194 | /** |
| 195 | * Fully-justify a string into a list of lines. |
| 196 | * |
| 197 | * @param str the string |
| 198 | * @param n the maximum number of characters in a line |
| 199 | * @return the list of lines |
| 200 | */ |
| 201 | public static List<String> full(final String str, final int n) { |
| 202 | List<String> result = new ArrayList<String>(); |
| 203 | |
| 204 | /* |
| 205 | * Same as left(), but insert spaces between words to make each line |
| 206 | * n chars long. The "algorithm" here is pretty dumb: it performs a |
| 207 | * split on space and then re-inserts multiples of n between words. |
| 208 | */ |
| 209 | List<String> lines = left(str, n); |
| 210 | for (int lineI = 0; lineI < lines.size() - 1; lineI++) { |
| 211 | String line = lines.get(lineI); |
| 212 | String [] words = line.split(" "); |
| 213 | if (words.length > 1) { |
| 214 | int charCount = 0; |
| 215 | for (int i = 0; i < words.length; i++) { |
| 216 | charCount += words[i].length(); |
| 217 | } |
| 218 | int spaceCount = n - charCount; |
| 219 | int q = spaceCount / (words.length - 1); |
| 220 | int r = spaceCount % (words.length - 1); |
| 221 | StringBuilder sb = new StringBuilder(); |
| 222 | for (int i = 0; i < words.length - 1; i++) { |
| 223 | sb.append(words[i]); |
| 224 | for (int j = 0; j < q; j++) { |
| 225 | sb.append(' '); |
| 226 | } |
| 227 | if (r > 0) { |
| 228 | sb.append(' '); |
| 229 | r--; |
| 230 | } |
| 231 | } |
| 232 | for (int j = 0; j < r; j++) { |
| 233 | sb.append(' '); |
| 234 | } |
| 235 | sb.append(words[words.length - 1]); |
| 236 | result.add(sb.toString()); |
| 237 | } else { |
| 238 | result.add(line); |
| 239 | } |
| 240 | } |
| 241 | if (lines.size() > 0) { |
| 242 | result.add(lines.get(lines.size() - 1)); |
| 243 | } |
| 244 | |
| 245 | return result; |
| 246 | } |
| 247 | |
| 248 | /** |
| 249 | * Convert raw strings into escaped strings that be splatted on the |
| 250 | * screen. |
| 251 | * |
| 252 | * @param str the string |
| 253 | * @return a string that can be passed into Screen.putStringXY() |
| 254 | */ |
| 255 | public static String unescape(final String str) { |
| 256 | StringBuilder sb = new StringBuilder(); |
| 257 | for (int i = 0; i < str.length(); i++) { |
| 258 | char ch = str.charAt(i); |
| 259 | if ((ch < 0x20) || (ch == 0x7F)) { |
| 260 | switch (ch) { |
| 261 | case '\b': |
| 262 | sb.append("\\b"); |
| 263 | continue; |
| 264 | case '\f': |
| 265 | sb.append("\\f"); |
| 266 | continue; |
| 267 | case '\n': |
| 268 | sb.append("\\n"); |
| 269 | continue; |
| 270 | case '\r': |
| 271 | sb.append("\\r"); |
| 272 | continue; |
| 273 | case '\t': |
| 274 | sb.append("\\t"); |
| 275 | continue; |
| 276 | case 0x7f: |
| 277 | sb.append("^?"); |
| 278 | continue; |
| 279 | default: |
| 280 | sb.append(' '); |
| 281 | continue; |
| 282 | } |
| 283 | } |
| 284 | sb.append(ch); |
| 285 | } |
| 286 | return sb.toString(); |
| 287 | } |
| 288 | |
| 289 | /** |
| 290 | * Read a line of RFC4180 comma-separated values (CSV) into a list of |
| 291 | * strings. |
| 292 | * |
| 293 | * @param line the CSV line, with or without without line terminators |
| 294 | * @return the list of strings |
| 295 | */ |
| 296 | public static List<String> fromCsv(final String line) { |
| 297 | List<String> result = new ArrayList<String>(); |
| 298 | |
| 299 | StringBuilder str = new StringBuilder(); |
| 300 | boolean quoted = false; |
| 301 | boolean fieldQuoted = false; |
| 302 | |
| 303 | for (int i = 0; i < line.length(); i++) { |
| 304 | char ch = line.charAt(i); |
| 305 | |
| 306 | /* |
| 307 | System.err.println("ch '" + ch + "' str '" + str + "' " + |
| 308 | " fieldQuoted " + fieldQuoted + " quoted " + quoted); |
| 309 | */ |
| 310 | |
| 311 | if (ch == ',') { |
| 312 | if (fieldQuoted && quoted) { |
| 313 | // Terminating a quoted field. |
| 314 | result.add(str.toString()); |
| 315 | str = new StringBuilder(); |
| 316 | quoted = false; |
| 317 | fieldQuoted = false; |
| 318 | } else if (fieldQuoted) { |
| 319 | // Still waiting to see the terminating quote for this |
| 320 | // field. |
| 321 | str.append(ch); |
| 322 | } else if (quoted) { |
| 323 | // An unmatched double-quote and comma. This should be |
| 324 | // an invalid sequence. We will treat it as a quote |
| 325 | // terminating the field. |
| 326 | str.append('\"'); |
| 327 | result.add(str.toString()); |
| 328 | str = new StringBuilder(); |
| 329 | quoted = false; |
| 330 | fieldQuoted = false; |
| 331 | } else { |
| 332 | // A field separator. |
| 333 | result.add(str.toString()); |
| 334 | str = new StringBuilder(); |
| 335 | quoted = false; |
| 336 | fieldQuoted = false; |
| 337 | } |
| 338 | continue; |
| 339 | } |
| 340 | |
| 341 | if (ch == '\"') { |
| 342 | if ((str.length() == 0) && (!fieldQuoted)) { |
| 343 | // The opening quote to a quoted field. |
| 344 | fieldQuoted = true; |
| 345 | } else if (quoted) { |
| 346 | // This is a double-quote. |
| 347 | str.append('\"'); |
| 348 | quoted = false; |
| 349 | } else { |
| 350 | // This is the beginning of a quote. |
| 351 | quoted = true; |
| 352 | } |
| 353 | continue; |
| 354 | } |
| 355 | |
| 356 | // Normal character, pass it on. |
| 357 | str.append(ch); |
| 358 | } |
| 359 | |
| 360 | // Include the final field. |
| 361 | result.add(str.toString()); |
| 362 | |
| 363 | return result; |
| 364 | } |
| 365 | |
| 366 | /** |
| 367 | * Write a list of strings to on line of RFC4180 comma-separated values |
| 368 | * (CSV). |
| 369 | * |
| 370 | * @param list the list of strings |
| 371 | * @return the CSV line, without any line terminators |
| 372 | */ |
| 373 | public static String toCsv(final List<String> list) { |
| 374 | StringBuilder result = new StringBuilder(); |
| 375 | int i = 0; |
| 376 | for (String str: list) { |
| 377 | |
| 378 | if (!str.contains("\"") && !str.contains(",")) { |
| 379 | // Just append the string with a comma. |
| 380 | result.append(str); |
| 381 | } else if (!str.contains("\"") && str.contains(",")) { |
| 382 | // Contains commas, but no quotes. Just double-quote it. |
| 383 | result.append("\""); |
| 384 | result.append(str); |
| 385 | result.append("\""); |
| 386 | } else if (str.contains("\"")) { |
| 387 | // Contains quotes and maybe commas. Double-quote it and |
| 388 | // replace quotes inside. |
| 389 | result.append("\""); |
| 390 | for (int j = 0; j < str.length(); j++) { |
| 391 | char ch = str.charAt(j); |
| 392 | result.append(ch); |
| 393 | if (ch == '\"') { |
| 394 | result.append("\""); |
| 395 | } |
| 396 | } |
| 397 | result.append("\""); |
| 398 | } |
| 399 | |
| 400 | if (i < list.size() - 1) { |
| 401 | result.append(","); |
| 402 | } |
| 403 | i++; |
| 404 | } |
| 405 | return result.toString(); |
| 406 | } |
| 407 | |
| 408 | /** |
| 409 | * Determine display width of a Unicode code point. |
| 410 | * |
| 411 | * @param ch the code point, can be char |
| 412 | * @return the number of text cell columns required to display this code |
| 413 | * point, one of 0, 1, or 2 |
| 414 | */ |
| 415 | public static int width(final int ch) { |
| 416 | /* |
| 417 | * This routine is a modified version of mk_wcwidth() available |
| 418 | * at: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c |
| 419 | * |
| 420 | * The combining characters list has been omitted from this |
| 421 | * implementation. Hopefully no users will be impacted. |
| 422 | */ |
| 423 | |
| 424 | // 8-bit control characters: width 0 |
| 425 | if (ch == 0) { |
| 426 | return 0; |
| 427 | } |
| 428 | if ((ch < 32) || ((ch >= 0x7f) && (ch < 0xa0))) { |
| 429 | return 0; |
| 430 | } |
| 431 | |
| 432 | // All others: either 1 or 2 |
| 433 | if ((ch >= 0x1100) |
| 434 | && ((ch <= 0x115f) |
| 435 | // Hangul Jamo init. consonants |
| 436 | || (ch == 0x2329) |
| 437 | || (ch == 0x232a) |
| 438 | // CJK ... Yi |
| 439 | || ((ch >= 0x2e80) && (ch <= 0xa4cf) && (ch != 0x303f)) |
| 440 | // Hangul Syllables |
| 441 | || ((ch >= 0xac00) && (ch <= 0xd7a3)) |
| 442 | // CJK Compatibility Ideographs |
| 443 | || ((ch >= 0xf900) && (ch <= 0xfaff)) |
| 444 | // Vertical forms |
| 445 | || ((ch >= 0xfe10) && (ch <= 0xfe19)) |
| 446 | // CJK Compatibility Forms |
| 447 | || ((ch >= 0xfe30) && (ch <= 0xfe6f)) |
| 448 | // Fullwidth Forms |
| 449 | || ((ch >= 0xff00) && (ch <= 0xff60)) |
| 450 | || ((ch >= 0xffe0) && (ch <= 0xffe6)) |
| 451 | || ((ch >= 0x20000) && (ch <= 0x2fffd)) |
| 452 | || ((ch >= 0x30000) && (ch <= 0x3fffd)) |
| 453 | // emoji |
| 454 | || ((ch >= 0x1f004) && (ch <= 0x1fffd)) |
| 455 | ) |
| 456 | ) { |
| 457 | return 2; |
| 458 | } |
| 459 | return 1; |
| 460 | } |
| 461 | |
| 462 | /** |
| 463 | * Determine display width of a string. This ASSUMES that no characters |
| 464 | * are combining. Hopefully no users will be impacted. |
| 465 | * |
| 466 | * @param str the string |
| 467 | * @return the number of text cell columns required to display this string |
| 468 | */ |
| 469 | public static int width(final String str) { |
| 470 | int n = 0; |
| 471 | for (int i = 0; i < str.length();) { |
| 472 | int ch = str.codePointAt(i); |
| 473 | n += width(ch); |
| 474 | i += Character.charCount(ch); |
| 475 | } |
| 476 | return n; |
| 477 | } |
| 478 | |
| 479 | /** |
| 480 | * Check if character is in the CJK range. |
| 481 | * |
| 482 | * @param ch character to check |
| 483 | * @return true if this character is in the CJK range |
| 484 | */ |
| 485 | public static boolean isCjk(final int ch) { |
| 486 | return ((ch >= 0x2e80) && (ch <= 0x9fff)); |
| 487 | } |
| 488 | |
| 489 | /** |
| 490 | * Check if character is in the emoji range. |
| 491 | * |
| 492 | * @param ch character to check |
| 493 | * @return true if this character is in the emoji range |
| 494 | */ |
| 495 | public static boolean isEmoji(final int ch) { |
| 496 | return ((ch >= 0x1f004) && (ch <= 0x1fffd)); |
| 497 | } |
| 498 | |
| 499 | // ------------------------------------------------------------------------ |
| 500 | // Base64 ----------------------------------------------------------------- |
| 501 | // ------------------------------------------------------------------------ |
| 502 | |
| 503 | /* |
| 504 | * The Base64 encoder/decoder below is provided to support JDK 1.6 - JDK |
| 505 | * 11. It was taken from https://sourceforge.net/projects/migbase64/ |
| 506 | * |
| 507 | * The following changes were made: |
| 508 | * |
| 509 | * - Code has been indented and long lines cut to fit within 80 columns. |
| 510 | * |
| 511 | * - Char, String, and "fast" byte functions removed. byte versions |
| 512 | * retained and called toBase64()/fromBase64(). |
| 513 | * |
| 514 | * - Enclosing braces added to blocks. |
| 515 | */ |
| 516 | |
| 517 | /** |
| 518 | * A very fast and memory efficient class to encode and decode to and |
| 519 | * from BASE64 in full accordance with RFC 2045.<br><br> On Windows XP |
| 520 | * sp1 with 1.4.2_04 and later ;), this encoder and decoder is about 10 |
| 521 | * times faster on small arrays (10 - 1000 bytes) and 2-3 times as fast |
| 522 | * on larger arrays (10000 - 1000000 bytes) compared to |
| 523 | * <code>sun.misc.Encoder()/Decoder()</code>.<br><br> |
| 524 | * |
| 525 | * On byte arrays the encoder is about 20% faster than Jakarta Commons |
| 526 | * Base64 Codec for encode and about 50% faster for decoding large |
| 527 | * arrays. This implementation is about twice as fast on very small |
| 528 | * arrays (< 30 bytes). If source/destination is a <code>String</code> |
| 529 | * this version is about three times as fast due to the fact that the |
| 530 | * Commons Codec result has to be recoded to a <code>String</code> from |
| 531 | * <code>byte[]</code>, which is very expensive.<br><br> |
| 532 | * |
| 533 | * This encode/decode algorithm doesn't create any temporary arrays as |
| 534 | * many other codecs do, it only allocates the resulting array. This |
| 535 | * produces less garbage and it is possible to handle arrays twice as |
| 536 | * large as algorithms that create a temporary array. (E.g. Jakarta |
| 537 | * Commons Codec). It is unknown whether Sun's |
| 538 | * <code>sun.misc.Encoder()/Decoder()</code> produce temporary arrays but |
| 539 | * since performance is quite low it probably does.<br><br> |
| 540 | * |
| 541 | * The encoder produces the same output as the Sun one except that the |
| 542 | * Sun's encoder appends a trailing line separator if the last character |
| 543 | * isn't a pad. Unclear why but it only adds to the length and is |
| 544 | * probably a side effect. Both are in conformance with RFC 2045 |
| 545 | * though.<br> Commons codec seem to always att a trailing line |
| 546 | * separator.<br><br> |
| 547 | * |
| 548 | * <b>Note!</b> The encode/decode method pairs (types) come in three |
| 549 | * versions with the <b>exact</b> same algorithm and thus a lot of code |
| 550 | * redundancy. This is to not create any temporary arrays for transcoding |
| 551 | * to/from different format types. The methods not used can simply be |
| 552 | * commented out.<br><br> |
| 553 | * |
| 554 | * There is also a "fast" version of all decode methods that works the |
| 555 | * same way as the normal ones, but har a few demands on the decoded |
| 556 | * input. Normally though, these fast verions should be used if the |
| 557 | * source if the input is known and it hasn't bee tampered with.<br><br> |
| 558 | * |
| 559 | * If you find the code useful or you find a bug, please send me a note |
| 560 | * at base64 @ miginfocom . com. |
| 561 | * |
| 562 | * Licence (BSD): |
| 563 | * ============== |
| 564 | * |
| 565 | * Copyright (c) 2004, Mikael Grev, MiG InfoCom AB. (base64 @ miginfocom |
| 566 | * . com) All rights reserved. |
| 567 | * |
| 568 | * Redistribution and use in source and binary forms, with or without |
| 569 | * modification, are permitted provided that the following conditions are |
| 570 | * met: Redistributions of source code must retain the above copyright |
| 571 | * notice, this list of conditions and the following disclaimer. |
| 572 | * Redistributions in binary form must reproduce the above copyright |
| 573 | * notice, this list of conditions and the following disclaimer in the |
| 574 | * documentation and/or other materials provided with the distribution. |
| 575 | * Neither the name of the MiG InfoCom AB nor the names of its |
| 576 | * contributors may be used to endorse or promote products derived from |
| 577 | * this software without specific prior written permission. |
| 578 | * |
| 579 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 580 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 581 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 582 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 583 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 584 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 585 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 586 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 587 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 588 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 589 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 590 | * |
| 591 | * @version 2.2 |
| 592 | * @author Mikael Grev |
| 593 | * Date: 2004-aug-02 |
| 594 | * Time: 11:31:11 |
| 595 | */ |
| 596 | |
| 597 | private static final char[] CA = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".toCharArray(); |
| 598 | private static final int[] IA = new int[256]; |
| 599 | static { |
| 600 | Arrays.fill(IA, -1); |
| 601 | for (int i = 0, iS = CA.length; i < iS; i++) { |
| 602 | IA[CA[i]] = i; |
| 603 | } |
| 604 | IA['='] = 0; |
| 605 | } |
| 606 | |
| 607 | /** |
| 608 | * Encodes a raw byte array into a BASE64 <code>byte[]</code> |
| 609 | * representation i accordance with RFC 2045. |
| 610 | * @param sArr The bytes to convert. If <code>null</code> or length 0 |
| 611 | * an empty array will be returned. |
| 612 | * @param lineSep Optional "\r\n" after 76 characters, unless end of |
| 613 | * file.<br> No line separator will be in breach of RFC 2045 which |
| 614 | * specifies max 76 per line but will be a little faster. |
| 615 | * @return A BASE64 encoded array. Never <code>null</code>. |
| 616 | */ |
| 617 | public final static String toBase64(byte[] sArr) { |
| 618 | // Check special case |
| 619 | int sLen = sArr != null ? sArr.length : 0; |
| 620 | if (sLen == 0) { |
| 621 | return ""; |
| 622 | } |
| 623 | |
| 624 | final boolean lineSep = true; |
| 625 | |
| 626 | int eLen = (sLen / 3) * 3; // Length of even 24-bits. |
| 627 | int cCnt = ((sLen - 1) / 3 + 1) << 2; // Returned character count |
| 628 | int dLen = cCnt + (lineSep ? (cCnt - 1) / 76 << 1 : 0); // Length of returned array |
| 629 | byte[] dArr = new byte[dLen]; |
| 630 | |
| 631 | // Encode even 24-bits |
| 632 | for (int s = 0, d = 0, cc = 0; s < eLen;) { |
| 633 | // Copy next three bytes into lower 24 bits of int, paying |
| 634 | // attension to sign. |
| 635 | int i = (sArr[s++] & 0xff) << 16 | (sArr[s++] & 0xff) << 8 | (sArr[s++] & 0xff); |
| 636 | |
| 637 | // Encode the int into four chars |
| 638 | dArr[d++] = (byte) CA[(i >>> 18) & 0x3f]; |
| 639 | dArr[d++] = (byte) CA[(i >>> 12) & 0x3f]; |
| 640 | dArr[d++] = (byte) CA[(i >>> 6) & 0x3f]; |
| 641 | dArr[d++] = (byte) CA[i & 0x3f]; |
| 642 | |
| 643 | // Add optional line separator |
| 644 | if (lineSep && ++cc == 19 && d < dLen - 2) { |
| 645 | dArr[d++] = '\r'; |
| 646 | dArr[d++] = '\n'; |
| 647 | cc = 0; |
| 648 | } |
| 649 | } |
| 650 | |
| 651 | // Pad and encode last bits if source isn't an even 24 bits. |
| 652 | int left = sLen - eLen; // 0 - 2. |
| 653 | if (left > 0) { |
| 654 | // Prepare the int |
| 655 | int i = ((sArr[eLen] & 0xff) << 10) | (left == 2 ? ((sArr[sLen - 1] & 0xff) << 2) : 0); |
| 656 | |
| 657 | // Set last four chars |
| 658 | dArr[dLen - 4] = (byte) CA[i >> 12]; |
| 659 | dArr[dLen - 3] = (byte) CA[(i >>> 6) & 0x3f]; |
| 660 | dArr[dLen - 2] = left == 2 ? (byte) CA[i & 0x3f] : (byte) '='; |
| 661 | dArr[dLen - 1] = '='; |
| 662 | } |
| 663 | try { |
| 664 | return new String(dArr, "UTF-8"); |
| 665 | } catch (java.io.UnsupportedEncodingException e) { |
| 666 | throw new IllegalArgumentException(e); |
| 667 | } |
| 668 | |
| 669 | } |
| 670 | |
| 671 | /** |
| 672 | * Decodes a BASE64 encoded byte array. All illegal characters will |
| 673 | * be ignored and can handle both arrays with and without line |
| 674 | * separators. |
| 675 | * @param sArr The source array. Length 0 will return an empty |
| 676 | * array. <code>null</code> will throw an exception. |
| 677 | * @return The decoded array of bytes. May be of length 0. Will be |
| 678 | * <code>null</code> if the legal characters (including '=') isn't |
| 679 | * divideable by 4. (I.e. definitely corrupted). |
| 680 | */ |
| 681 | public final static byte[] fromBase64(byte[] sArr) { |
| 682 | // Check special case |
| 683 | int sLen = sArr.length; |
| 684 | |
| 685 | // Count illegal characters (including '\r', '\n') to know what |
| 686 | // size the returned array will be, so we don't have to |
| 687 | // reallocate & copy it later. |
| 688 | int sepCnt = 0; // Number of separator characters. (Actually illegal characters, but that's a bonus...) |
| 689 | for (int i = 0; i < sLen; i++) { |
| 690 | // If input is "pure" (I.e. no line separators or illegal chars) |
| 691 | // base64 this loop can be commented out. |
| 692 | if (IA[sArr[i] & 0xff] < 0) { |
| 693 | sepCnt++; |
| 694 | } |
| 695 | } |
| 696 | |
| 697 | // Check so that legal chars (including '=') are evenly |
| 698 | // divideable by 4 as specified in RFC 2045. |
| 699 | if ((sLen - sepCnt) % 4 != 0) { |
| 700 | return null; |
| 701 | } |
| 702 | |
| 703 | int pad = 0; |
| 704 | for (int i = sLen; i > 1 && IA[sArr[--i] & 0xff] <= 0;) { |
| 705 | if (sArr[i] == '=') { |
| 706 | pad++; |
| 707 | } |
| 708 | } |
| 709 | |
| 710 | int len = ((sLen - sepCnt) * 6 >> 3) - pad; |
| 711 | |
| 712 | byte[] dArr = new byte[len]; // Preallocate byte[] of exact length |
| 713 | |
| 714 | for (int s = 0, d = 0; d < len;) { |
| 715 | // Assemble three bytes into an int from four "valid" characters. |
| 716 | int i = 0; |
| 717 | for (int j = 0; j < 4; j++) { // j only increased if a valid char was found. |
| 718 | int c = IA[sArr[s++] & 0xff]; |
| 719 | if (c >= 0) { |
| 720 | i |= c << (18 - j * 6); |
| 721 | } else { |
| 722 | j--; |
| 723 | } |
| 724 | } |
| 725 | |
| 726 | // Add the bytes |
| 727 | dArr[d++] = (byte) (i >> 16); |
| 728 | if (d < len) { |
| 729 | dArr[d++]= (byte) (i >> 8); |
| 730 | if (d < len) { |
| 731 | dArr[d++] = (byte) i; |
| 732 | } |
| 733 | } |
| 734 | } |
| 735 | |
| 736 | return dArr; |
| 737 | } |
| 738 | |
| 739 | } |