2 * Copyright (C) 2010 The Android Open Source Project
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * Changes (@author niki):
19 * - default charset -> UTF-8
22 package be
.nikiroo
.utils
.streams
;
24 import java
.io
.UnsupportedEncodingException
;
27 * Utilities for encoding and decoding the Base64 representation of
28 * binary data. See RFCs <a
29 * href="http://www.ietf.org/rfc/rfc2045.txt">2045</a> and <a
30 * href="http://www.ietf.org/rfc/rfc3548.txt">3548</a>.
34 * Default values for encoder/decoder flags.
36 public static final int DEFAULT
= 0;
39 * Encoder flag bit to omit the padding '=' characters at the end
40 * of the output (if any).
42 public static final int NO_PADDING
= 1;
45 * Encoder flag bit to omit all line terminators (i.e., the output
46 * will be on one long line).
48 public static final int NO_WRAP
= 2;
51 * Encoder flag bit to indicate lines should be terminated with a
52 * CRLF pair instead of just an LF. Has no effect if {@code
53 * NO_WRAP} is specified as well.
55 public static final int CRLF
= 4;
58 * Encoder/decoder flag bit to indicate using the "URL and
59 * filename safe" variant of Base64 (see RFC 3548 section 4) where
60 * {@code -} and {@code _} are used in place of {@code +} and
63 public static final int URL_SAFE
= 8;
66 * Flag to pass to {@link Base64OutputStream} to indicate that it
67 * should not close the output stream it is wrapping when it
70 public static final int NO_CLOSE
= 16;
72 // --------------------------------------------------------
74 // --------------------------------------------------------
76 /* package */ static abstract class Coder
{
81 * Encode/decode another block of input data. this.output is
82 * provided by the caller, and must be big enough to hold all
83 * the coded data. On exit, this.opwill be set to the length
86 * @param finish true if this is the final call to process for
87 * this object. Will finalize the coder state and
88 * include any final bytes in the output.
90 * @return true if the input so far is good; false if some
91 * error has been detected in the input stream..
93 public abstract boolean process(byte[] input
, int offset
, int len
, boolean finish
);
96 * @return the maximum number of bytes a call to process()
97 * could produce for the given number of input bytes. This may
100 public abstract int maxOutputSize(int len
);
103 // --------------------------------------------------------
105 // --------------------------------------------------------
108 * Decode the Base64-encoded data in input and return the data in
111 * <p>The padding '=' characters at the end are considered optional, but
112 * if any are present, there must be the correct number of them.
114 * @param str the input String to decode, which is converted to
115 * bytes using the default charset
116 * @param flags controls certain features of the decoded output.
117 * Pass {@code DEFAULT} to decode standard Base64.
119 * @throws IllegalArgumentException if the input contains
122 public static byte[] decode(String str
, int flags
) {
124 return decode(str
.getBytes("UTF-8"), flags
);
125 } catch (UnsupportedEncodingException e
) {
126 // All conforming JVM are expected to support UTF-8
132 * Decode the Base64-encoded data in input and return the data in
135 * <p>The padding '=' characters at the end are considered optional, but
136 * if any are present, there must be the correct number of them.
138 * @param input the input array to decode
139 * @param flags controls certain features of the decoded output.
140 * Pass {@code DEFAULT} to decode standard Base64.
142 * @throws IllegalArgumentException if the input contains
145 public static byte[] decode(byte[] input
, int flags
) {
146 return decode(input
, 0, input
.length
, flags
);
150 * Decode the Base64-encoded data in input and return the data in
153 * <p>The padding '=' characters at the end are considered optional, but
154 * if any are present, there must be the correct number of them.
156 * @param input the data to decode
157 * @param offset the position within the input array at which to start
158 * @param len the number of bytes of input to decode
159 * @param flags controls certain features of the decoded output.
160 * Pass {@code DEFAULT} to decode standard Base64.
162 * @throws IllegalArgumentException if the input contains
165 public static byte[] decode(byte[] input
, int offset
, int len
, int flags
) {
166 // Allocate space for the most data the input could represent.
167 // (It could contain less if it contains whitespace, etc.)
168 Decoder decoder
= new Decoder(flags
, new byte[len
*3/4]);
170 if (!decoder
.process(input
, offset
, len
, true)) {
171 throw new IllegalArgumentException("bad base-64");
174 // Maybe we got lucky and allocated exactly enough output space.
175 if (decoder
.op
== decoder
.output
.length
) {
176 return decoder
.output
;
179 // Need to shorten the array, so allocate a new one of the
180 // right size and copy.
181 byte[] temp
= new byte[decoder
.op
];
182 System
.arraycopy(decoder
.output
, 0, temp
, 0, decoder
.op
);
186 /* package */ static class Decoder
extends Coder
{
188 * Lookup table for turning bytes into their position in the
191 private static final int DECODE
[] = {
192 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
193 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
194 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
195 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -2, -1, -1,
196 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
197 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
198 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
199 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
200 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
201 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
202 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
203 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
204 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
205 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
206 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
207 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
211 * Decode lookup table for the "web safe" variant (RFC 3548
212 * sec. 4) where - and _ replace + and /.
214 private static final int DECODE_WEBSAFE
[] = {
215 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
216 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
217 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1,
218 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -2, -1, -1,
219 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
220 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63,
221 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
222 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
223 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
224 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
225 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
226 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
227 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
228 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
229 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
230 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
233 /** Non-data values in the DECODE arrays. */
234 private static final int SKIP
= -1;
235 private static final int EQUALS
= -2;
238 * States 0-3 are reading through the next input tuple.
239 * State 4 is having read one '=' and expecting exactly
241 * State 5 is expecting no more data or padding characters
243 * State 6 is the error state; an error has been detected
244 * in the input and no future input can "fix" it.
246 private int state
; // state number (0 to 6)
249 final private int[] alphabet
;
251 public Decoder(int flags
, byte[] output
) {
252 this.output
= output
;
254 alphabet
= ((flags
& URL_SAFE
) == 0) ? DECODE
: DECODE_WEBSAFE
;
260 * @return an overestimate for the number of bytes {@code
261 * len} bytes could decode to.
263 public int maxOutputSize(int len
) {
264 return len
* 3/4 + 10;
268 * Decode another block of input data.
270 * @return true if the state machine is still healthy. false if
271 * bad base-64 data has been detected in the input stream.
273 public boolean process(byte[] input
, int offset
, int len
, boolean finish
) {
274 if (this.state
== 6) return false;
279 // Using local variables makes the decoder about 12%
280 // faster than if we manipulate the member variables in
281 // the loop. (Even alphabet makes a measurable
282 // difference, which is somewhat surprising to me since
283 // the member variable is final.)
284 int state
= this.state
;
285 int value
= this.value
;
287 final byte[] output
= this.output
;
288 final int[] alphabet
= this.alphabet
;
291 // Try the fast path: we're starting a new tuple and the
292 // next four bytes of the input stream are all data
293 // bytes. This corresponds to going through states
294 // 0-1-2-3-0. We expect to use this method for most of
297 // If any of the next four bytes of input are non-data
298 // (whitespace, etc.), value will end up negative. (All
299 // the non-data values in decode are small negative
300 // numbers, so shifting any of them up and or'ing them
301 // together will result in a value with its top bit set.)
303 // You can remove this whole block and the output should
304 // be the same, just slower.
307 (value
= ((alphabet
[input
[p
] & 0xff] << 18) |
308 (alphabet
[input
[p
+1] & 0xff] << 12) |
309 (alphabet
[input
[p
+2] & 0xff] << 6) |
310 (alphabet
[input
[p
+3] & 0xff]))) >= 0) {
311 output
[op
+2] = (byte) value
;
312 output
[op
+1] = (byte) (value
>> 8);
313 output
[op
] = (byte) (value
>> 16);
320 // The fast path isn't available -- either we've read a
321 // partial tuple, or the next four input bytes aren't all
322 // data, or whatever. Fall back to the slower state
323 // machine implementation.
325 int d
= alphabet
[input
[p
++] & 0xff];
332 } else if (d
!= SKIP
) {
340 value
= (value
<< 6) | d
;
342 } else if (d
!= SKIP
) {
350 value
= (value
<< 6) | d
;
352 } else if (d
== EQUALS
) {
353 // Emit the last (partial) output tuple;
354 // expect exactly one more padding character.
355 output
[op
++] = (byte) (value
>> 4);
357 } else if (d
!= SKIP
) {
365 // Emit the output triple and return to state 0.
366 value
= (value
<< 6) | d
;
367 output
[op
+2] = (byte) value
;
368 output
[op
+1] = (byte) (value
>> 8);
369 output
[op
] = (byte) (value
>> 16);
372 } else if (d
== EQUALS
) {
373 // Emit the last (partial) output tuple;
374 // expect no further data or padding characters.
375 output
[op
+1] = (byte) (value
>> 2);
376 output
[op
] = (byte) (value
>> 10);
379 } else if (d
!= SKIP
) {
388 } else if (d
!= SKIP
) {
404 // We're out of input, but a future call could provide
412 // Done reading input. Now figure out where we are left in
413 // the state machine and finish up.
417 // Output length is a multiple of three. Fine.
420 // Read one extra input byte, which isn't enough to
421 // make another output byte. Illegal.
425 // Read two extra input bytes, enough to emit 1 more
426 // output byte. Fine.
427 output
[op
++] = (byte) (value
>> 4);
430 // Read three extra input bytes, enough to emit 2 more
431 // output bytes. Fine.
432 output
[op
++] = (byte) (value
>> 10);
433 output
[op
++] = (byte) (value
>> 2);
436 // Read one padding '=' when we expected 2. Illegal.
440 // Read all the padding '='s we expected and no more.
451 // --------------------------------------------------------
453 // --------------------------------------------------------
456 * Base64-encode the given data and return a newly allocated
457 * String with the result.
459 * @param input the data to encode
460 * @param flags controls certain features of the encoded output.
461 * Passing {@code DEFAULT} results in output that
462 * adheres to RFC 2045.
464 public static String
encodeToString(byte[] input
, int flags
) {
466 return new String(encode(input
, flags
), "US-ASCII");
467 } catch (UnsupportedEncodingException e
) {
468 // US-ASCII is guaranteed to be available.
469 throw new AssertionError(e
);
474 * Base64-encode the given data and return a newly allocated
475 * String with the result.
477 * @param input the data to encode
478 * @param offset the position within the input array at which to
480 * @param len the number of bytes of input to encode
481 * @param flags controls certain features of the encoded output.
482 * Passing {@code DEFAULT} results in output that
483 * adheres to RFC 2045.
485 public static String
encodeToString(byte[] input
, int offset
, int len
, int flags
) {
487 return new String(encode(input
, offset
, len
, flags
), "US-ASCII");
488 } catch (UnsupportedEncodingException e
) {
489 // US-ASCII is guaranteed to be available.
490 throw new AssertionError(e
);
495 * Base64-encode the given data and return a newly allocated
496 * byte[] with the result.
498 * @param input the data to encode
499 * @param flags controls certain features of the encoded output.
500 * Passing {@code DEFAULT} results in output that
501 * adheres to RFC 2045.
503 public static byte[] encode(byte[] input
, int flags
) {
504 return encode(input
, 0, input
.length
, flags
);
508 * Base64-encode the given data and return a newly allocated
509 * byte[] with the result.
511 * @param input the data to encode
512 * @param offset the position within the input array at which to
514 * @param len the number of bytes of input to encode
515 * @param flags controls certain features of the encoded output.
516 * Passing {@code DEFAULT} results in output that
517 * adheres to RFC 2045.
519 public static byte[] encode(byte[] input
, int offset
, int len
, int flags
) {
520 Encoder encoder
= new Encoder(flags
, null);
522 // Compute the exact length of the array we will produce.
523 int output_len
= len
/ 3 * 4;
525 // Account for the tail of the data and the padding bytes, if any.
526 if (encoder
.do_padding
) {
533 case 1: output_len
+= 2; break;
534 case 2: output_len
+= 3; break;
538 // Account for the newlines, if any.
539 if (encoder
.do_newline
&& len
> 0) {
540 output_len
+= (((len
-1) / (3 * Encoder
.LINE_GROUPS
)) + 1) *
541 (encoder
.do_cr ?
2 : 1);
544 encoder
.output
= new byte[output_len
];
545 encoder
.process(input
, offset
, len
, true);
547 assert encoder
.op
== output_len
;
549 return encoder
.output
;
552 /* package */ static class Encoder
extends Coder
{
554 * Emit a new line every this many output tuples. Corresponds to
555 * a 76-character line length (the maximum allowable according to
556 * <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>).
558 public static final int LINE_GROUPS
= 19;
561 * Lookup table for turning Base64 alphabet positions (6 bits)
564 private static final byte ENCODE
[] = {
565 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
566 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
567 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
568 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/',
572 * Lookup table for turning Base64 alphabet positions (6 bits)
575 private static final byte ENCODE_WEBSAFE
[] = {
576 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
577 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
578 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
579 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_',
582 final private byte[] tail
;
583 /* package */ int tailLen
;
586 final public boolean do_padding
;
587 final public boolean do_newline
;
588 final public boolean do_cr
;
589 final private byte[] alphabet
;
591 public Encoder(int flags
, byte[] output
) {
592 this.output
= output
;
594 do_padding
= (flags
& NO_PADDING
) == 0;
595 do_newline
= (flags
& NO_WRAP
) == 0;
596 do_cr
= (flags
& CRLF
) != 0;
597 alphabet
= ((flags
& URL_SAFE
) == 0) ? ENCODE
: ENCODE_WEBSAFE
;
602 count
= do_newline ? LINE_GROUPS
: -1;
606 * @return an overestimate for the number of bytes {@code
607 * len} bytes could encode to.
609 public int maxOutputSize(int len
) {
610 return len
* 8/5 + 10;
613 public boolean process(byte[] input
, int offset
, int len
, boolean finish
) {
614 // Using local variables makes the encoder about 9% faster.
615 final byte[] alphabet
= this.alphabet
;
616 final byte[] output
= this.output
;
618 int count
= this.count
;
624 // First we need to concatenate the tail of the previous call
625 // with any input bytes available now and see if we can empty
630 // There was no tail.
635 // A 1-byte tail with at least 2 bytes of
636 // input available now.
637 v
= ((tail
[0] & 0xff) << 16) |
638 ((input
[p
++] & 0xff) << 8) |
646 // A 2-byte tail with at least 1 byte of input.
647 v
= ((tail
[0] & 0xff) << 16) |
648 ((tail
[1] & 0xff) << 8) |
656 output
[op
++] = alphabet
[(v
>> 18) & 0x3f];
657 output
[op
++] = alphabet
[(v
>> 12) & 0x3f];
658 output
[op
++] = alphabet
[(v
>> 6) & 0x3f];
659 output
[op
++] = alphabet
[v
& 0x3f];
661 if (do_cr
) output
[op
++] = '\r';
667 // At this point either there is no tail, or there are fewer
668 // than 3 bytes of input available.
670 // The main loop, turning 3 input bytes into 4 output bytes on
673 v
= ((input
[p
] & 0xff) << 16) |
674 ((input
[p
+1] & 0xff) << 8) |
676 output
[op
] = alphabet
[(v
>> 18) & 0x3f];
677 output
[op
+1] = alphabet
[(v
>> 12) & 0x3f];
678 output
[op
+2] = alphabet
[(v
>> 6) & 0x3f];
679 output
[op
+3] = alphabet
[v
& 0x3f];
683 if (do_cr
) output
[op
++] = '\r';
690 // Finish up the tail of the input. Note that we need to
691 // consume any bytes in tail before any bytes
692 // remaining in input; there should be at most two bytes
695 if (p
-tailLen
== len
-1) {
697 v
= ((tailLen
> 0 ? tail
[t
++] : input
[p
++]) & 0xff) << 4;
699 output
[op
++] = alphabet
[(v
>> 6) & 0x3f];
700 output
[op
++] = alphabet
[v
& 0x3f];
706 if (do_cr
) output
[op
++] = '\r';
709 } else if (p
-tailLen
== len
-2) {
711 v
= (((tailLen
> 1 ? tail
[t
++] : input
[p
++]) & 0xff) << 10) |
712 (((tailLen
> 0 ? tail
[t
++] : input
[p
++]) & 0xff) << 2);
714 output
[op
++] = alphabet
[(v
>> 12) & 0x3f];
715 output
[op
++] = alphabet
[(v
>> 6) & 0x3f];
716 output
[op
++] = alphabet
[v
& 0x3f];
721 if (do_cr
) output
[op
++] = '\r';
724 } else if (do_newline
&& op
> 0 && count
!= LINE_GROUPS
) {
725 if (do_cr
) output
[op
++] = '\r';
732 // Save the leftovers in tail to be consumed on the next
733 // call to encodeInternal.
736 tail
[tailLen
++] = input
[p
];
737 } else if (p
== len
-2) {
738 tail
[tailLen
++] = input
[p
];
739 tail
[tailLen
++] = input
[p
+1];
750 private Base64() { } // don't instantiate