2 * Copyright (C) 2010 The Android Open Source Project
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 package be
.nikiroo
.utils
.streams
;
19 import java
.io
.UnsupportedEncodingException
;
22 * Utilities for encoding and decoding the Base64 representation of
23 * binary data. See RFCs <a
24 * href="http://www.ietf.org/rfc/rfc2045.txt">2045</a> and <a
25 * href="http://www.ietf.org/rfc/rfc3548.txt">3548</a>.
29 * Default values for encoder/decoder flags.
31 public static final int DEFAULT
= 0;
34 * Encoder flag bit to omit the padding '=' characters at the end
35 * of the output (if any).
37 public static final int NO_PADDING
= 1;
40 * Encoder flag bit to omit all line terminators (i.e., the output
41 * will be on one long line).
43 public static final int NO_WRAP
= 2;
46 * Encoder flag bit to indicate lines should be terminated with a
47 * CRLF pair instead of just an LF. Has no effect if {@code
48 * NO_WRAP} is specified as well.
50 public static final int CRLF
= 4;
53 * Encoder/decoder flag bit to indicate using the "URL and
54 * filename safe" variant of Base64 (see RFC 3548 section 4) where
55 * {@code -} and {@code _} are used in place of {@code +} and
58 public static final int URL_SAFE
= 8;
61 * Flag to pass to {@link Base64OutputStream} to indicate that it
62 * should not close the output stream it is wrapping when it
65 public static final int NO_CLOSE
= 16;
67 // --------------------------------------------------------
69 // --------------------------------------------------------
71 /* package */ static abstract class Coder
{
76 * Encode/decode another block of input data. this.output is
77 * provided by the caller, and must be big enough to hold all
78 * the coded data. On exit, this.opwill be set to the length
81 * @param finish true if this is the final call to process for
82 * this object. Will finalize the coder state and
83 * include any final bytes in the output.
85 * @return true if the input so far is good; false if some
86 * error has been detected in the input stream..
88 public abstract boolean process(byte[] input
, int offset
, int len
, boolean finish
);
91 * @return the maximum number of bytes a call to process()
92 * could produce for the given number of input bytes. This may
95 public abstract int maxOutputSize(int len
);
98 // --------------------------------------------------------
100 // --------------------------------------------------------
103 * Decode the Base64-encoded data in input and return the data in
106 * <p>The padding '=' characters at the end are considered optional, but
107 * if any are present, there must be the correct number of them.
109 * @param str the input String to decode, which is converted to
110 * bytes using the default charset
111 * @param flags controls certain features of the decoded output.
112 * Pass {@code DEFAULT} to decode standard Base64.
114 * @throws IllegalArgumentException if the input contains
117 public static byte[] decode(String str
, int flags
) {
118 return decode(str
.getBytes(), flags
);
122 * Decode the Base64-encoded data in input and return the data in
125 * <p>The padding '=' characters at the end are considered optional, but
126 * if any are present, there must be the correct number of them.
128 * @param input the input array to decode
129 * @param flags controls certain features of the decoded output.
130 * Pass {@code DEFAULT} to decode standard Base64.
132 * @throws IllegalArgumentException if the input contains
135 public static byte[] decode(byte[] input
, int flags
) {
136 return decode(input
, 0, input
.length
, flags
);
140 * Decode the Base64-encoded data in input and return the data in
143 * <p>The padding '=' characters at the end are considered optional, but
144 * if any are present, there must be the correct number of them.
146 * @param input the data to decode
147 * @param offset the position within the input array at which to start
148 * @param len the number of bytes of input to decode
149 * @param flags controls certain features of the decoded output.
150 * Pass {@code DEFAULT} to decode standard Base64.
152 * @throws IllegalArgumentException if the input contains
155 public static byte[] decode(byte[] input
, int offset
, int len
, int flags
) {
156 // Allocate space for the most data the input could represent.
157 // (It could contain less if it contains whitespace, etc.)
158 Decoder decoder
= new Decoder(flags
, new byte[len
*3/4]);
160 if (!decoder
.process(input
, offset
, len
, true)) {
161 throw new IllegalArgumentException("bad base-64");
164 // Maybe we got lucky and allocated exactly enough output space.
165 if (decoder
.op
== decoder
.output
.length
) {
166 return decoder
.output
;
169 // Need to shorten the array, so allocate a new one of the
170 // right size and copy.
171 byte[] temp
= new byte[decoder
.op
];
172 System
.arraycopy(decoder
.output
, 0, temp
, 0, decoder
.op
);
176 /* package */ static class Decoder
extends Coder
{
178 * Lookup table for turning bytes into their position in the
181 private static final int DECODE
[] = {
182 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
183 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
184 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
185 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -2, -1, -1,
186 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
187 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
188 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
189 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
190 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
191 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
192 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
193 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
194 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
195 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
196 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
197 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
201 * Decode lookup table for the "web safe" variant (RFC 3548
202 * sec. 4) where - and _ replace + and /.
204 private static final int DECODE_WEBSAFE
[] = {
205 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
206 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
207 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1,
208 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -2, -1, -1,
209 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
210 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63,
211 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
212 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
213 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
214 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
215 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
216 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
217 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
218 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
219 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
220 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
223 /** Non-data values in the DECODE arrays. */
224 private static final int SKIP
= -1;
225 private static final int EQUALS
= -2;
228 * States 0-3 are reading through the next input tuple.
229 * State 4 is having read one '=' and expecting exactly
231 * State 5 is expecting no more data or padding characters
233 * State 6 is the error state; an error has been detected
234 * in the input and no future input can "fix" it.
236 private int state
; // state number (0 to 6)
239 final private int[] alphabet
;
241 public Decoder(int flags
, byte[] output
) {
242 this.output
= output
;
244 alphabet
= ((flags
& URL_SAFE
) == 0) ? DECODE
: DECODE_WEBSAFE
;
250 * @return an overestimate for the number of bytes {@code
251 * len} bytes could decode to.
253 public int maxOutputSize(int len
) {
254 return len
* 3/4 + 10;
258 * Decode another block of input data.
260 * @return true if the state machine is still healthy. false if
261 * bad base-64 data has been detected in the input stream.
263 public boolean process(byte[] input
, int offset
, int len
, boolean finish
) {
264 if (this.state
== 6) return false;
269 // Using local variables makes the decoder about 12%
270 // faster than if we manipulate the member variables in
271 // the loop. (Even alphabet makes a measurable
272 // difference, which is somewhat surprising to me since
273 // the member variable is final.)
274 int state
= this.state
;
275 int value
= this.value
;
277 final byte[] output
= this.output
;
278 final int[] alphabet
= this.alphabet
;
281 // Try the fast path: we're starting a new tuple and the
282 // next four bytes of the input stream are all data
283 // bytes. This corresponds to going through states
284 // 0-1-2-3-0. We expect to use this method for most of
287 // If any of the next four bytes of input are non-data
288 // (whitespace, etc.), value will end up negative. (All
289 // the non-data values in decode are small negative
290 // numbers, so shifting any of them up and or'ing them
291 // together will result in a value with its top bit set.)
293 // You can remove this whole block and the output should
294 // be the same, just slower.
297 (value
= ((alphabet
[input
[p
] & 0xff] << 18) |
298 (alphabet
[input
[p
+1] & 0xff] << 12) |
299 (alphabet
[input
[p
+2] & 0xff] << 6) |
300 (alphabet
[input
[p
+3] & 0xff]))) >= 0) {
301 output
[op
+2] = (byte) value
;
302 output
[op
+1] = (byte) (value
>> 8);
303 output
[op
] = (byte) (value
>> 16);
310 // The fast path isn't available -- either we've read a
311 // partial tuple, or the next four input bytes aren't all
312 // data, or whatever. Fall back to the slower state
313 // machine implementation.
315 int d
= alphabet
[input
[p
++] & 0xff];
322 } else if (d
!= SKIP
) {
330 value
= (value
<< 6) | d
;
332 } else if (d
!= SKIP
) {
340 value
= (value
<< 6) | d
;
342 } else if (d
== EQUALS
) {
343 // Emit the last (partial) output tuple;
344 // expect exactly one more padding character.
345 output
[op
++] = (byte) (value
>> 4);
347 } else if (d
!= SKIP
) {
355 // Emit the output triple and return to state 0.
356 value
= (value
<< 6) | d
;
357 output
[op
+2] = (byte) value
;
358 output
[op
+1] = (byte) (value
>> 8);
359 output
[op
] = (byte) (value
>> 16);
362 } else if (d
== EQUALS
) {
363 // Emit the last (partial) output tuple;
364 // expect no further data or padding characters.
365 output
[op
+1] = (byte) (value
>> 2);
366 output
[op
] = (byte) (value
>> 10);
369 } else if (d
!= SKIP
) {
378 } else if (d
!= SKIP
) {
394 // We're out of input, but a future call could provide
402 // Done reading input. Now figure out where we are left in
403 // the state machine and finish up.
407 // Output length is a multiple of three. Fine.
410 // Read one extra input byte, which isn't enough to
411 // make another output byte. Illegal.
415 // Read two extra input bytes, enough to emit 1 more
416 // output byte. Fine.
417 output
[op
++] = (byte) (value
>> 4);
420 // Read three extra input bytes, enough to emit 2 more
421 // output bytes. Fine.
422 output
[op
++] = (byte) (value
>> 10);
423 output
[op
++] = (byte) (value
>> 2);
426 // Read one padding '=' when we expected 2. Illegal.
430 // Read all the padding '='s we expected and no more.
441 // --------------------------------------------------------
443 // --------------------------------------------------------
446 * Base64-encode the given data and return a newly allocated
447 * String with the result.
449 * @param input the data to encode
450 * @param flags controls certain features of the encoded output.
451 * Passing {@code DEFAULT} results in output that
452 * adheres to RFC 2045.
454 public static String
encodeToString(byte[] input
, int flags
) {
456 return new String(encode(input
, flags
), "US-ASCII");
457 } catch (UnsupportedEncodingException e
) {
458 // US-ASCII is guaranteed to be available.
459 throw new AssertionError(e
);
464 * Base64-encode the given data and return a newly allocated
465 * String with the result.
467 * @param input the data to encode
468 * @param offset the position within the input array at which to
470 * @param len the number of bytes of input to encode
471 * @param flags controls certain features of the encoded output.
472 * Passing {@code DEFAULT} results in output that
473 * adheres to RFC 2045.
475 public static String
encodeToString(byte[] input
, int offset
, int len
, int flags
) {
477 return new String(encode(input
, offset
, len
, flags
), "US-ASCII");
478 } catch (UnsupportedEncodingException e
) {
479 // US-ASCII is guaranteed to be available.
480 throw new AssertionError(e
);
485 * Base64-encode the given data and return a newly allocated
486 * byte[] with the result.
488 * @param input the data to encode
489 * @param flags controls certain features of the encoded output.
490 * Passing {@code DEFAULT} results in output that
491 * adheres to RFC 2045.
493 public static byte[] encode(byte[] input
, int flags
) {
494 return encode(input
, 0, input
.length
, flags
);
498 * Base64-encode the given data and return a newly allocated
499 * byte[] with the result.
501 * @param input the data to encode
502 * @param offset the position within the input array at which to
504 * @param len the number of bytes of input to encode
505 * @param flags controls certain features of the encoded output.
506 * Passing {@code DEFAULT} results in output that
507 * adheres to RFC 2045.
509 public static byte[] encode(byte[] input
, int offset
, int len
, int flags
) {
510 Encoder encoder
= new Encoder(flags
, null);
512 // Compute the exact length of the array we will produce.
513 int output_len
= len
/ 3 * 4;
515 // Account for the tail of the data and the padding bytes, if any.
516 if (encoder
.do_padding
) {
523 case 1: output_len
+= 2; break;
524 case 2: output_len
+= 3; break;
528 // Account for the newlines, if any.
529 if (encoder
.do_newline
&& len
> 0) {
530 output_len
+= (((len
-1) / (3 * Encoder
.LINE_GROUPS
)) + 1) *
531 (encoder
.do_cr ?
2 : 1);
534 encoder
.output
= new byte[output_len
];
535 encoder
.process(input
, offset
, len
, true);
537 assert encoder
.op
== output_len
;
539 return encoder
.output
;
542 /* package */ static class Encoder
extends Coder
{
544 * Emit a new line every this many output tuples. Corresponds to
545 * a 76-character line length (the maximum allowable according to
546 * <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>).
548 public static final int LINE_GROUPS
= 19;
551 * Lookup table for turning Base64 alphabet positions (6 bits)
554 private static final byte ENCODE
[] = {
555 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
556 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
557 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
558 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/',
562 * Lookup table for turning Base64 alphabet positions (6 bits)
565 private static final byte ENCODE_WEBSAFE
[] = {
566 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
567 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
568 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
569 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_',
572 final private byte[] tail
;
573 /* package */ int tailLen
;
576 final public boolean do_padding
;
577 final public boolean do_newline
;
578 final public boolean do_cr
;
579 final private byte[] alphabet
;
581 public Encoder(int flags
, byte[] output
) {
582 this.output
= output
;
584 do_padding
= (flags
& NO_PADDING
) == 0;
585 do_newline
= (flags
& NO_WRAP
) == 0;
586 do_cr
= (flags
& CRLF
) != 0;
587 alphabet
= ((flags
& URL_SAFE
) == 0) ? ENCODE
: ENCODE_WEBSAFE
;
592 count
= do_newline ? LINE_GROUPS
: -1;
596 * @return an overestimate for the number of bytes {@code
597 * len} bytes could encode to.
599 public int maxOutputSize(int len
) {
600 return len
* 8/5 + 10;
603 public boolean process(byte[] input
, int offset
, int len
, boolean finish
) {
604 // Using local variables makes the encoder about 9% faster.
605 final byte[] alphabet
= this.alphabet
;
606 final byte[] output
= this.output
;
608 int count
= this.count
;
614 // First we need to concatenate the tail of the previous call
615 // with any input bytes available now and see if we can empty
620 // There was no tail.
625 // A 1-byte tail with at least 2 bytes of
626 // input available now.
627 v
= ((tail
[0] & 0xff) << 16) |
628 ((input
[p
++] & 0xff) << 8) |
636 // A 2-byte tail with at least 1 byte of input.
637 v
= ((tail
[0] & 0xff) << 16) |
638 ((tail
[1] & 0xff) << 8) |
646 output
[op
++] = alphabet
[(v
>> 18) & 0x3f];
647 output
[op
++] = alphabet
[(v
>> 12) & 0x3f];
648 output
[op
++] = alphabet
[(v
>> 6) & 0x3f];
649 output
[op
++] = alphabet
[v
& 0x3f];
651 if (do_cr
) output
[op
++] = '\r';
657 // At this point either there is no tail, or there are fewer
658 // than 3 bytes of input available.
660 // The main loop, turning 3 input bytes into 4 output bytes on
663 v
= ((input
[p
] & 0xff) << 16) |
664 ((input
[p
+1] & 0xff) << 8) |
666 output
[op
] = alphabet
[(v
>> 18) & 0x3f];
667 output
[op
+1] = alphabet
[(v
>> 12) & 0x3f];
668 output
[op
+2] = alphabet
[(v
>> 6) & 0x3f];
669 output
[op
+3] = alphabet
[v
& 0x3f];
673 if (do_cr
) output
[op
++] = '\r';
680 // Finish up the tail of the input. Note that we need to
681 // consume any bytes in tail before any bytes
682 // remaining in input; there should be at most two bytes
685 if (p
-tailLen
== len
-1) {
687 v
= ((tailLen
> 0 ? tail
[t
++] : input
[p
++]) & 0xff) << 4;
689 output
[op
++] = alphabet
[(v
>> 6) & 0x3f];
690 output
[op
++] = alphabet
[v
& 0x3f];
696 if (do_cr
) output
[op
++] = '\r';
699 } else if (p
-tailLen
== len
-2) {
701 v
= (((tailLen
> 1 ? tail
[t
++] : input
[p
++]) & 0xff) << 10) |
702 (((tailLen
> 0 ? tail
[t
++] : input
[p
++]) & 0xff) << 2);
704 output
[op
++] = alphabet
[(v
>> 12) & 0x3f];
705 output
[op
++] = alphabet
[(v
>> 6) & 0x3f];
706 output
[op
++] = alphabet
[v
& 0x3f];
711 if (do_cr
) output
[op
++] = '\r';
714 } else if (do_newline
&& op
> 0 && count
!= LINE_GROUPS
) {
715 if (do_cr
) output
[op
++] = '\r';
722 // Save the leftovers in tail to be consumed on the next
723 // call to encodeInternal.
726 tail
[tailLen
++] = input
[p
];
727 } else if (p
== len
-2) {
728 tail
[tailLen
++] = input
[p
];
729 tail
[tailLen
++] = input
[p
+1];
740 private Base64() { } // don't instantiate