Merge commit '7a455971fed716123933d0f685a0d6eebcf3282b'
[fanfix.git] / src / jexer / bits / StringUtils.java
CommitLineData
7657ad8c
KL
1/*
2 * Jexer - Java Text User Interface
3 *
4 * The MIT License (MIT)
5 *
a69ed767 6 * Copyright (C) 2019 Kevin Lamonte
7657ad8c
KL
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 * DEALINGS IN THE SOFTWARE.
25 *
26 * @author Kevin Lamonte [kevin.lamonte@gmail.com]
27 * @version 1
28 */
29package jexer.bits;
30
31import java.util.List;
656c0ddd 32import java.util.ArrayList;
34bb6e52 33import java.util.Arrays;
7657ad8c
KL
34
35/**
d36057df
KL
36 * StringUtils contains methods to:
37 *
38 * - Convert one or more long lines of strings into justified text
39 * paragraphs.
40 *
41 * - Unescape C0 control codes.
42 *
656c0ddd
KL
43 * - Read/write a line of RFC4180 comma-separated values strings to/from a
44 * list of strings.
54eaded0
KL
45 *
46 * - Compute number of visible text cells for a given Unicode codepoint or
47 * string.
48 *
49 * - Convert bytes to and from base-64 encoding.
7657ad8c 50 */
051e2913 51public class StringUtils {
7657ad8c
KL
52
53 /**
54 * Left-justify a string into a list of lines.
55 *
56 * @param str the string
57 * @param n the maximum number of characters in a line
58 * @return the list of lines
59 */
60 public static List<String> left(final String str, final int n) {
656c0ddd 61 List<String> result = new ArrayList<String>();
7657ad8c
KL
62
63 /*
64 * General procedure:
65 *
66 * 1. Split on '\n' into paragraphs.
67 *
68 * 2. Scan each line, noting the position of the last
69 * beginning-of-a-word.
70 *
71 * 3. Chop at the last #2 if the next beginning-of-a-word exceeds
72 * n.
73 *
74 * 4. Return the lines.
75 */
76
77 String [] rawLines = str.split("\n");
78 for (int i = 0; i < rawLines.length; i++) {
79 StringBuilder line = new StringBuilder();
80 StringBuilder word = new StringBuilder();
81 boolean inWord = false;
82 for (int j = 0; j < rawLines[i].length(); j++) {
83 char ch = rawLines[i].charAt(j);
84 if ((ch == ' ') || (ch == '\t')) {
85 if (inWord == true) {
86 // We have just transitioned from a word to
87 // whitespace. See if we have enough space to add
88 // the word to the line.
e820d5dd 89 if (width(word.toString()) + width(line.toString()) > n) {
7657ad8c
KL
90 // This word will exceed the line length. Wrap
91 // at it instead.
92 result.add(line.toString());
93 line = new StringBuilder();
94 }
95 if ((word.toString().startsWith(" "))
e820d5dd 96 && (width(line.toString()) == 0)
7657ad8c
KL
97 ) {
98 line.append(word.substring(1));
99 } else {
100 line.append(word);
101 }
102 word = new StringBuilder();
103 word.append(ch);
104 inWord = false;
105 } else {
106 // We are in the whitespace before another word. Do
107 // nothing.
108 }
109 } else {
110 if (inWord == true) {
111 // We are appending to a word.
112 word.append(ch);
113 } else {
114 // We have transitioned from whitespace to a word.
115 word.append(ch);
116 inWord = true;
117 }
118 }
119 } // for (int j = 0; j < rawLines[i].length(); j++)
120
e820d5dd 121 if (width(word.toString()) + width(line.toString()) > n) {
7657ad8c
KL
122 // This word will exceed the line length. Wrap at it
123 // instead.
124 result.add(line.toString());
125 line = new StringBuilder();
126 }
127 if ((word.toString().startsWith(" "))
e820d5dd 128 && (width(line.toString()) == 0)
7657ad8c
KL
129 ) {
130 line.append(word.substring(1));
131 } else {
132 line.append(word);
133 }
134 result.add(line.toString());
135 } // for (int i = 0; i < rawLines.length; i++) {
136
137 return result;
138 }
139
140 /**
141 * Right-justify a string into a list of lines.
142 *
143 * @param str the string
144 * @param n the maximum number of characters in a line
145 * @return the list of lines
146 */
147 public static List<String> right(final String str, final int n) {
656c0ddd 148 List<String> result = new ArrayList<String>();
7657ad8c
KL
149
150 /*
151 * Same as left(), but preceed each line with spaces to make it n
152 * chars long.
153 */
154 List<String> lines = left(str, n);
155 for (String line: lines) {
156 StringBuilder sb = new StringBuilder();
e820d5dd 157 for (int i = 0; i < n - width(line); i++) {
7657ad8c
KL
158 sb.append(' ');
159 }
160 sb.append(line);
161 result.add(sb.toString());
162 }
163
164 return result;
165 }
166
167 /**
168 * Center a string into a list of lines.
169 *
170 * @param str the string
171 * @param n the maximum number of characters in a line
172 * @return the list of lines
173 */
174 public static List<String> center(final String str, final int n) {
656c0ddd 175 List<String> result = new ArrayList<String>();
7657ad8c
KL
176
177 /*
178 * Same as left(), but preceed/succeed each line with spaces to make
179 * it n chars long.
180 */
181 List<String> lines = left(str, n);
182 for (String line: lines) {
183 StringBuilder sb = new StringBuilder();
e820d5dd
KL
184 int l = (n - width(line)) / 2;
185 int r = n - width(line) - l;
7657ad8c
KL
186 for (int i = 0; i < l; i++) {
187 sb.append(' ');
188 }
189 sb.append(line);
190 for (int i = 0; i < r; i++) {
191 sb.append(' ');
192 }
193 result.add(sb.toString());
194 }
195
196 return result;
197 }
198
199 /**
200 * Fully-justify a string into a list of lines.
201 *
202 * @param str the string
203 * @param n the maximum number of characters in a line
204 * @return the list of lines
205 */
206 public static List<String> full(final String str, final int n) {
656c0ddd 207 List<String> result = new ArrayList<String>();
7657ad8c
KL
208
209 /*
210 * Same as left(), but insert spaces between words to make each line
211 * n chars long. The "algorithm" here is pretty dumb: it performs a
212 * split on space and then re-inserts multiples of n between words.
213 */
214 List<String> lines = left(str, n);
215 for (int lineI = 0; lineI < lines.size() - 1; lineI++) {
216 String line = lines.get(lineI);
217 String [] words = line.split(" ");
218 if (words.length > 1) {
219 int charCount = 0;
220 for (int i = 0; i < words.length; i++) {
221 charCount += words[i].length();
222 }
223 int spaceCount = n - charCount;
224 int q = spaceCount / (words.length - 1);
225 int r = spaceCount % (words.length - 1);
226 StringBuilder sb = new StringBuilder();
227 for (int i = 0; i < words.length - 1; i++) {
228 sb.append(words[i]);
229 for (int j = 0; j < q; j++) {
230 sb.append(' ');
231 }
232 if (r > 0) {
233 sb.append(' ');
234 r--;
235 }
236 }
237 for (int j = 0; j < r; j++) {
238 sb.append(' ');
239 }
240 sb.append(words[words.length - 1]);
241 result.add(sb.toString());
242 } else {
243 result.add(line);
244 }
245 }
246 if (lines.size() > 0) {
247 result.add(lines.get(lines.size() - 1));
248 }
249
250 return result;
251 }
252
d36057df
KL
253 /**
254 * Convert raw strings into escaped strings that be splatted on the
255 * screen.
256 *
257 * @param str the string
258 * @return a string that can be passed into Screen.putStringXY()
259 */
260 public static String unescape(final String str) {
261 StringBuilder sb = new StringBuilder();
262 for (int i = 0; i < str.length(); i++) {
263 char ch = str.charAt(i);
264 if ((ch < 0x20) || (ch == 0x7F)) {
265 switch (ch) {
266 case '\b':
267 sb.append("\\b");
268 continue;
269 case '\f':
270 sb.append("\\f");
271 continue;
272 case '\n':
273 sb.append("\\n");
274 continue;
275 case '\r':
276 sb.append("\\r");
277 continue;
278 case '\t':
279 sb.append("\\t");
280 continue;
281 case 0x7f:
282 sb.append("^?");
283 continue;
284 default:
285 sb.append(' ');
286 continue;
287 }
288 }
289 sb.append(ch);
290 }
291 return sb.toString();
292 }
293
656c0ddd
KL
294 /**
295 * Read a line of RFC4180 comma-separated values (CSV) into a list of
296 * strings.
297 *
298 * @param line the CSV line, with or without without line terminators
299 * @return the list of strings
300 */
301 public static List<String> fromCsv(final String line) {
302 List<String> result = new ArrayList<String>();
303
304 StringBuilder str = new StringBuilder();
305 boolean quoted = false;
306 boolean fieldQuoted = false;
307
308 for (int i = 0; i < line.length(); i++) {
309 char ch = line.charAt(i);
310
311 /*
312 System.err.println("ch '" + ch + "' str '" + str + "' " +
313 " fieldQuoted " + fieldQuoted + " quoted " + quoted);
314 */
315
316 if (ch == ',') {
317 if (fieldQuoted && quoted) {
318 // Terminating a quoted field.
319 result.add(str.toString());
320 str = new StringBuilder();
321 quoted = false;
322 fieldQuoted = false;
323 } else if (fieldQuoted) {
324 // Still waiting to see the terminating quote for this
325 // field.
326 str.append(ch);
327 } else if (quoted) {
328 // An unmatched double-quote and comma. This should be
329 // an invalid sequence. We will treat it as a quote
330 // terminating the field.
331 str.append('\"');
332 result.add(str.toString());
333 str = new StringBuilder();
334 quoted = false;
335 fieldQuoted = false;
336 } else {
337 // A field separator.
338 result.add(str.toString());
339 str = new StringBuilder();
340 quoted = false;
341 fieldQuoted = false;
342 }
343 continue;
344 }
345
346 if (ch == '\"') {
347 if ((str.length() == 0) && (!fieldQuoted)) {
348 // The opening quote to a quoted field.
349 fieldQuoted = true;
350 } else if (quoted) {
351 // This is a double-quote.
352 str.append('\"');
353 quoted = false;
354 } else {
355 // This is the beginning of a quote.
356 quoted = true;
357 }
358 continue;
359 }
360
361 // Normal character, pass it on.
362 str.append(ch);
363 }
364
365 // Include the final field.
366 result.add(str.toString());
367
368 return result;
369 }
370
371 /**
372 * Write a list of strings to on line of RFC4180 comma-separated values
373 * (CSV).
374 *
375 * @param list the list of strings
376 * @return the CSV line, without any line terminators
377 */
378 public static String toCsv(final List<String> list) {
379 StringBuilder result = new StringBuilder();
380 int i = 0;
381 for (String str: list) {
382
383 if (!str.contains("\"") && !str.contains(",")) {
384 // Just append the string with a comma.
385 result.append(str);
386 } else if (!str.contains("\"") && str.contains(",")) {
387 // Contains commas, but no quotes. Just double-quote it.
388 result.append("\"");
389 result.append(str);
390 result.append("\"");
391 } else if (str.contains("\"")) {
392 // Contains quotes and maybe commas. Double-quote it and
393 // replace quotes inside.
394 result.append("\"");
395 for (int j = 0; j < str.length(); j++) {
396 char ch = str.charAt(j);
397 result.append(ch);
398 if (ch == '\"') {
399 result.append("\"");
400 }
401 }
402 result.append("\"");
403 }
404
405 if (i < list.size() - 1) {
406 result.append(",");
407 }
408 i++;
409 }
410 return result.toString();
411 }
412
9588c713
KL
413 /**
414 * Determine display width of a Unicode code point.
415 *
416 * @param ch the code point, can be char
417 * @return the number of text cell columns required to display this code
418 * point, one of 0, 1, or 2
419 */
420 public static int width(final int ch) {
421 /*
422 * This routine is a modified version of mk_wcwidth() available
423 * at: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
424 *
425 * The combining characters list has been omitted from this
426 * implementation. Hopefully no users will be impacted.
427 */
428
429 // 8-bit control characters: width 0
430 if (ch == 0) {
431 return 0;
432 }
433 if ((ch < 32) || ((ch >= 0x7f) && (ch < 0xa0))) {
434 return 0;
435 }
436
437 // All others: either 1 or 2
438 if ((ch >= 0x1100)
439 && ((ch <= 0x115f)
440 // Hangul Jamo init. consonants
441 || (ch == 0x2329)
442 || (ch == 0x232a)
443 // CJK ... Yi
444 || ((ch >= 0x2e80) && (ch <= 0xa4cf) && (ch != 0x303f))
445 // Hangul Syllables
446 || ((ch >= 0xac00) && (ch <= 0xd7a3))
447 // CJK Compatibility Ideographs
448 || ((ch >= 0xf900) && (ch <= 0xfaff))
449 // Vertical forms
450 || ((ch >= 0xfe10) && (ch <= 0xfe19))
451 // CJK Compatibility Forms
452 || ((ch >= 0xfe30) && (ch <= 0xfe6f))
453 // Fullwidth Forms
454 || ((ch >= 0xff00) && (ch <= 0xff60))
455 || ((ch >= 0xffe0) && (ch <= 0xffe6))
456 || ((ch >= 0x20000) && (ch <= 0x2fffd))
457 || ((ch >= 0x30000) && (ch <= 0x3fffd))
218d18db 458 // emoji
afdec5e9 459 || ((ch >= 0x1f004) && (ch <= 0x1fffd))
9588c713
KL
460 )
461 ) {
462 return 2;
463 }
464 return 1;
465 }
466
467 /**
468 * Determine display width of a string. This ASSUMES that no characters
469 * are combining. Hopefully no users will be impacted.
470 *
471 * @param str the string
472 * @return the number of text cell columns required to display this string
473 */
474 public static int width(final String str) {
4941d2d6
KL
475 if (str == null) {
476 return 0;
477 }
478
9588c713 479 int n = 0;
218d18db
KL
480 for (int i = 0; i < str.length();) {
481 int ch = str.codePointAt(i);
482 n += width(ch);
483 i += Character.charCount(ch);
9588c713
KL
484 }
485 return n;
486 }
487
66edb445
KL
488 /**
489 * Check if character is in the CJK range.
490 *
491 * @param ch character to check
492 * @return true if this character is in the CJK range
493 */
494 public static boolean isCjk(final int ch) {
495 return ((ch >= 0x2e80) && (ch <= 0x9fff));
496 }
497
498 /**
499 * Check if character is in the emoji range.
500 *
501 * @param ch character to check
502 * @return true if this character is in the emoji range
503 */
504 public static boolean isEmoji(final int ch) {
505 return ((ch >= 0x1f004) && (ch <= 0x1fffd));
506 }
507
34bb6e52
KL
508 // ------------------------------------------------------------------------
509 // Base64 -----------------------------------------------------------------
510 // ------------------------------------------------------------------------
511
512 /*
513 * The Base64 encoder/decoder below is provided to support JDK 1.6 - JDK
514 * 11. It was taken from https://sourceforge.net/projects/migbase64/
515 *
516 * The following changes were made:
517 *
518 * - Code has been indented and long lines cut to fit within 80 columns.
519 *
520 * - Char, String, and "fast" byte functions removed. byte versions
521 * retained and called toBase64()/fromBase64().
522 *
523 * - Enclosing braces added to blocks.
524 */
525
526 /**
527 * A very fast and memory efficient class to encode and decode to and
528 * from BASE64 in full accordance with RFC 2045.<br><br> On Windows XP
529 * sp1 with 1.4.2_04 and later ;), this encoder and decoder is about 10
530 * times faster on small arrays (10 - 1000 bytes) and 2-3 times as fast
531 * on larger arrays (10000 - 1000000 bytes) compared to
532 * <code>sun.misc.Encoder()/Decoder()</code>.<br><br>
533 *
534 * On byte arrays the encoder is about 20% faster than Jakarta Commons
535 * Base64 Codec for encode and about 50% faster for decoding large
536 * arrays. This implementation is about twice as fast on very small
537 * arrays (&lt 30 bytes). If source/destination is a <code>String</code>
538 * this version is about three times as fast due to the fact that the
539 * Commons Codec result has to be recoded to a <code>String</code> from
540 * <code>byte[]</code>, which is very expensive.<br><br>
541 *
542 * This encode/decode algorithm doesn't create any temporary arrays as
543 * many other codecs do, it only allocates the resulting array. This
544 * produces less garbage and it is possible to handle arrays twice as
545 * large as algorithms that create a temporary array. (E.g. Jakarta
546 * Commons Codec). It is unknown whether Sun's
547 * <code>sun.misc.Encoder()/Decoder()</code> produce temporary arrays but
548 * since performance is quite low it probably does.<br><br>
549 *
550 * The encoder produces the same output as the Sun one except that the
551 * Sun's encoder appends a trailing line separator if the last character
552 * isn't a pad. Unclear why but it only adds to the length and is
553 * probably a side effect. Both are in conformance with RFC 2045
554 * though.<br> Commons codec seem to always att a trailing line
555 * separator.<br><br>
556 *
557 * <b>Note!</b> The encode/decode method pairs (types) come in three
558 * versions with the <b>exact</b> same algorithm and thus a lot of code
559 * redundancy. This is to not create any temporary arrays for transcoding
560 * to/from different format types. The methods not used can simply be
561 * commented out.<br><br>
562 *
563 * There is also a "fast" version of all decode methods that works the
564 * same way as the normal ones, but har a few demands on the decoded
565 * input. Normally though, these fast verions should be used if the
566 * source if the input is known and it hasn't bee tampered with.<br><br>
567 *
568 * If you find the code useful or you find a bug, please send me a note
569 * at base64 @ miginfocom . com.
570 *
571 * Licence (BSD):
572 * ==============
573 *
574 * Copyright (c) 2004, Mikael Grev, MiG InfoCom AB. (base64 @ miginfocom
575 * . com) All rights reserved.
576 *
577 * Redistribution and use in source and binary forms, with or without
578 * modification, are permitted provided that the following conditions are
579 * met: Redistributions of source code must retain the above copyright
580 * notice, this list of conditions and the following disclaimer.
581 * Redistributions in binary form must reproduce the above copyright
582 * notice, this list of conditions and the following disclaimer in the
583 * documentation and/or other materials provided with the distribution.
584 * Neither the name of the MiG InfoCom AB nor the names of its
585 * contributors may be used to endorse or promote products derived from
586 * this software without specific prior written permission.
587 *
588 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
589 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
590 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
591 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
592 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
593 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
594 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
595 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
596 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
597 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
598 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
599 *
600 * @version 2.2
601 * @author Mikael Grev
602 * Date: 2004-aug-02
603 * Time: 11:31:11
604 */
605
606 private static final char[] CA = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".toCharArray();
607 private static final int[] IA = new int[256];
608 static {
609 Arrays.fill(IA, -1);
610 for (int i = 0, iS = CA.length; i < iS; i++) {
611 IA[CA[i]] = i;
612 }
613 IA['='] = 0;
614 }
615
616 /**
617 * Encodes a raw byte array into a BASE64 <code>byte[]</code>
618 * representation i accordance with RFC 2045.
619 * @param sArr The bytes to convert. If <code>null</code> or length 0
620 * an empty array will be returned.
34bb6e52
KL
621 * @return A BASE64 encoded array. Never <code>null</code>.
622 */
623 public final static String toBase64(byte[] sArr) {
624 // Check special case
625 int sLen = sArr != null ? sArr.length : 0;
626 if (sLen == 0) {
627 return "";
628 }
629
630 final boolean lineSep = true;
631
632 int eLen = (sLen / 3) * 3; // Length of even 24-bits.
633 int cCnt = ((sLen - 1) / 3 + 1) << 2; // Returned character count
634 int dLen = cCnt + (lineSep ? (cCnt - 1) / 76 << 1 : 0); // Length of returned array
635 byte[] dArr = new byte[dLen];
636
637 // Encode even 24-bits
638 for (int s = 0, d = 0, cc = 0; s < eLen;) {
639 // Copy next three bytes into lower 24 bits of int, paying
640 // attension to sign.
641 int i = (sArr[s++] & 0xff) << 16 | (sArr[s++] & 0xff) << 8 | (sArr[s++] & 0xff);
642
643 // Encode the int into four chars
644 dArr[d++] = (byte) CA[(i >>> 18) & 0x3f];
645 dArr[d++] = (byte) CA[(i >>> 12) & 0x3f];
646 dArr[d++] = (byte) CA[(i >>> 6) & 0x3f];
647 dArr[d++] = (byte) CA[i & 0x3f];
648
649 // Add optional line separator
650 if (lineSep && ++cc == 19 && d < dLen - 2) {
651 dArr[d++] = '\r';
652 dArr[d++] = '\n';
653 cc = 0;
654 }
655 }
656
657 // Pad and encode last bits if source isn't an even 24 bits.
658 int left = sLen - eLen; // 0 - 2.
659 if (left > 0) {
660 // Prepare the int
661 int i = ((sArr[eLen] & 0xff) << 10) | (left == 2 ? ((sArr[sLen - 1] & 0xff) << 2) : 0);
662
663 // Set last four chars
664 dArr[dLen - 4] = (byte) CA[i >> 12];
665 dArr[dLen - 3] = (byte) CA[(i >>> 6) & 0x3f];
666 dArr[dLen - 2] = left == 2 ? (byte) CA[i & 0x3f] : (byte) '=';
667 dArr[dLen - 1] = '=';
668 }
669 try {
670 return new String(dArr, "UTF-8");
671 } catch (java.io.UnsupportedEncodingException e) {
672 throw new IllegalArgumentException(e);
673 }
674
675 }
676
677 /**
678 * Decodes a BASE64 encoded byte array. All illegal characters will
679 * be ignored and can handle both arrays with and without line
680 * separators.
681 * @param sArr The source array. Length 0 will return an empty
682 * array. <code>null</code> will throw an exception.
683 * @return The decoded array of bytes. May be of length 0. Will be
684 * <code>null</code> if the legal characters (including '=') isn't
685 * divideable by 4. (I.e. definitely corrupted).
686 */
687 public final static byte[] fromBase64(byte[] sArr) {
688 // Check special case
689 int sLen = sArr.length;
690
691 // Count illegal characters (including '\r', '\n') to know what
692 // size the returned array will be, so we don't have to
693 // reallocate & copy it later.
694 int sepCnt = 0; // Number of separator characters. (Actually illegal characters, but that's a bonus...)
695 for (int i = 0; i < sLen; i++) {
696 // If input is "pure" (I.e. no line separators or illegal chars)
697 // base64 this loop can be commented out.
698 if (IA[sArr[i] & 0xff] < 0) {
699 sepCnt++;
700 }
701 }
702
703 // Check so that legal chars (including '=') are evenly
704 // divideable by 4 as specified in RFC 2045.
705 if ((sLen - sepCnt) % 4 != 0) {
706 return null;
707 }
708
709 int pad = 0;
710 for (int i = sLen; i > 1 && IA[sArr[--i] & 0xff] <= 0;) {
711 if (sArr[i] == '=') {
712 pad++;
713 }
714 }
715
716 int len = ((sLen - sepCnt) * 6 >> 3) - pad;
717
718 byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
719
720 for (int s = 0, d = 0; d < len;) {
721 // Assemble three bytes into an int from four "valid" characters.
722 int i = 0;
723 for (int j = 0; j < 4; j++) { // j only increased if a valid char was found.
724 int c = IA[sArr[s++] & 0xff];
725 if (c >= 0) {
726 i |= c << (18 - j * 6);
727 } else {
728 j--;
729 }
730 }
731
732 // Add the bytes
733 dArr[d++] = (byte) (i >> 16);
734 if (d < len) {
735 dArr[d++]= (byte) (i >> 8);
736 if (d < len) {
737 dArr[d++] = (byte) i;
738 }
739 }
740 }
741
742 return dArr;
743 }
744
7657ad8c 745}