2 * Copyright (C) 2010 The Android Open Source Project
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * Changes (@author niki):
19 * - default charset -> UTF-8
22 package be
.nikiroo
.utils
.streams
;
24 import java
.io
.UnsupportedEncodingException
;
27 * Utilities for encoding and decoding the Base64 representation of
28 * binary data. See RFCs <a
29 * href="http://www.ietf.org/rfc/rfc2045.txt">2045</a> and <a
30 * href="http://www.ietf.org/rfc/rfc3548.txt">3548</a>.
34 * Default values for encoder/decoder flags.
36 public static final int DEFAULT
= 0;
39 * Encoder flag bit to omit the padding '=' characters at the end
40 * of the output (if any).
42 public static final int NO_PADDING
= 1;
45 * Encoder flag bit to omit all line terminators (i.e., the output
46 * will be on one long line).
48 public static final int NO_WRAP
= 2;
51 * Encoder flag bit to indicate lines should be terminated with a
52 * CRLF pair instead of just an LF. Has no effect if {@code
53 * NO_WRAP} is specified as well.
55 public static final int CRLF
= 4;
58 * Encoder/decoder flag bit to indicate using the "URL and
59 * filename safe" variant of Base64 (see RFC 3548 section 4) where
60 * {@code -} and {@code _} are used in place of {@code +} and
63 public static final int URL_SAFE
= 8;
66 * Flag to pass to {@link Base64OutputStream} to indicate that it
67 * should not close the output stream it is wrapping when it
70 public static final int NO_CLOSE
= 16;
72 // --------------------------------------------------------
74 // --------------------------------------------------------
76 /* package */ static abstract class Coder
{
81 * Encode/decode another block of input data. this.output is
82 * provided by the caller, and must be big enough to hold all
83 * the coded data. On exit, this.opwill be set to the length
86 * @param finish true if this is the final call to process for
87 * this object. Will finalize the coder state and
88 * include any final bytes in the output.
90 * @return true if the input so far is good; false if some
91 * error has been detected in the input stream..
93 public abstract boolean process(byte[] input
, int offset
, int len
, boolean finish
);
96 * @return the maximum number of bytes a call to process()
97 * could produce for the given number of input bytes. This may
100 public abstract int maxOutputSize(int len
);
103 // --------------------------------------------------------
105 // --------------------------------------------------------
108 * Decode the Base64-encoded data in input and return the data in
111 * <p>The padding '=' characters at the end are considered optional, but
112 * if any are present, there must be the correct number of them.
114 * @param str the input String to decode, which is converted to
115 * bytes using the default charset
116 * @param flags controls certain features of the decoded output.
117 * Pass {@code DEFAULT} to decode standard Base64.
119 * @throws IllegalArgumentException if the input contains
122 public static byte[] decode(String str
, int flags
) {
124 return decode(str
.getBytes("UTF-8"), flags
);
125 } catch (UnsupportedEncodingException e
) {
126 // All conforming JVM are expected to support UTF-8
132 * Decode the Base64-encoded data in input and return the data in
135 * <p>The padding '=' characters at the end are considered optional, but
136 * if any are present, there must be the correct number of them.
138 * @param input the input array to decode
139 * @param flags controls certain features of the decoded output.
140 * Pass {@code DEFAULT} to decode standard Base64.
142 * @throws IllegalArgumentException if the input contains
145 public static byte[] decode(byte[] input
, int flags
) {
146 return decode(input
, 0, input
.length
, flags
);
150 * Decode the Base64-encoded data in input and return the data in
153 * <p>The padding '=' characters at the end are considered optional, but
154 * if any are present, there must be the correct number of them.
156 * @param input the data to decode
157 * @param offset the position within the input array at which to start
158 * @param len the number of bytes of input to decode
159 * @param flags controls certain features of the decoded output.
160 * Pass {@code DEFAULT} to decode standard Base64.
162 * @throws IllegalArgumentException if the input contains
165 public static byte[] decode(byte[] input
, int offset
, int len
, int flags
) {
166 // Allocate space for the most data the input could represent.
167 // (It could contain less if it contains whitespace, etc.)
168 Decoder decoder
= new Decoder(flags
, new byte[len
*3/4]);
170 if (!decoder
.process(input
, offset
, len
, true)) {
171 throw new IllegalArgumentException("bad base-64");
174 // Maybe we got lucky and allocated exactly enough output space.
175 if (decoder
.op
== decoder
.output
.length
) {
176 return decoder
.output
;
179 // Need to shorten the array, so allocate a new one of the
180 // right size and copy.
181 byte[] temp
= new byte[decoder
.op
];
182 System
.arraycopy(decoder
.output
, 0, temp
, 0, decoder
.op
);
186 /* package */ static class Decoder
extends Coder
{
188 * Lookup table for turning bytes into their position in the
191 private static final int DECODE
[] = {
192 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
193 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
194 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
195 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -2, -1, -1,
196 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
197 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
198 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
199 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
200 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
201 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
202 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
203 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
204 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
205 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
206 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
207 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
211 * Decode lookup table for the "web safe" variant (RFC 3548
212 * sec. 4) where - and _ replace + and /.
214 private static final int DECODE_WEBSAFE
[] = {
215 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
216 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
217 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1,
218 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -2, -1, -1,
219 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
220 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63,
221 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
222 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
223 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
224 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
225 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
226 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
227 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
228 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
229 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
230 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
233 /** Non-data values in the DECODE arrays. */
234 private static final int SKIP
= -1;
235 private static final int EQUALS
= -2;
238 * States 0-3 are reading through the next input tuple.
239 * State 4 is having read one '=' and expecting exactly
241 * State 5 is expecting no more data or padding characters
243 * State 6 is the error state; an error has been detected
244 * in the input and no future input can "fix" it.
246 private int state
; // state number (0 to 6)
249 final private int[] alphabet
;
251 public Decoder(int flags
, byte[] output
) {
252 this.output
= output
;
254 alphabet
= ((flags
& URL_SAFE
) == 0) ? DECODE
: DECODE_WEBSAFE
;
260 * @return an overestimate for the number of bytes {@code
261 * len} bytes could decode to.
264 public int maxOutputSize(int len
) {
265 return len
* 3/4 + 10;
269 * Decode another block of input data.
271 * @return true if the state machine is still healthy. false if
272 * bad base-64 data has been detected in the input stream.
275 public boolean process(byte[] input
, int offset
, int len
, boolean finish
) {
276 if (this.state
== 6) return false;
281 // Using local variables makes the decoder about 12%
282 // faster than if we manipulate the member variables in
283 // the loop. (Even alphabet makes a measurable
284 // difference, which is somewhat surprising to me since
285 // the member variable is final.)
286 int state
= this.state
;
287 int value
= this.value
;
289 final byte[] output
= this.output
;
290 final int[] alphabet
= this.alphabet
;
293 // Try the fast path: we're starting a new tuple and the
294 // next four bytes of the input stream are all data
295 // bytes. This corresponds to going through states
296 // 0-1-2-3-0. We expect to use this method for most of
299 // If any of the next four bytes of input are non-data
300 // (whitespace, etc.), value will end up negative. (All
301 // the non-data values in decode are small negative
302 // numbers, so shifting any of them up and or'ing them
303 // together will result in a value with its top bit set.)
305 // You can remove this whole block and the output should
306 // be the same, just slower.
309 (value
= ((alphabet
[input
[p
] & 0xff] << 18) |
310 (alphabet
[input
[p
+1] & 0xff] << 12) |
311 (alphabet
[input
[p
+2] & 0xff] << 6) |
312 (alphabet
[input
[p
+3] & 0xff]))) >= 0) {
313 output
[op
+2] = (byte) value
;
314 output
[op
+1] = (byte) (value
>> 8);
315 output
[op
] = (byte) (value
>> 16);
322 // The fast path isn't available -- either we've read a
323 // partial tuple, or the next four input bytes aren't all
324 // data, or whatever. Fall back to the slower state
325 // machine implementation.
327 int d
= alphabet
[input
[p
++] & 0xff];
334 } else if (d
!= SKIP
) {
342 value
= (value
<< 6) | d
;
344 } else if (d
!= SKIP
) {
352 value
= (value
<< 6) | d
;
354 } else if (d
== EQUALS
) {
355 // Emit the last (partial) output tuple;
356 // expect exactly one more padding character.
357 output
[op
++] = (byte) (value
>> 4);
359 } else if (d
!= SKIP
) {
367 // Emit the output triple and return to state 0.
368 value
= (value
<< 6) | d
;
369 output
[op
+2] = (byte) value
;
370 output
[op
+1] = (byte) (value
>> 8);
371 output
[op
] = (byte) (value
>> 16);
374 } else if (d
== EQUALS
) {
375 // Emit the last (partial) output tuple;
376 // expect no further data or padding characters.
377 output
[op
+1] = (byte) (value
>> 2);
378 output
[op
] = (byte) (value
>> 10);
381 } else if (d
!= SKIP
) {
390 } else if (d
!= SKIP
) {
406 // We're out of input, but a future call could provide
414 // Done reading input. Now figure out where we are left in
415 // the state machine and finish up.
419 // Output length is a multiple of three. Fine.
422 // Read one extra input byte, which isn't enough to
423 // make another output byte. Illegal.
427 // Read two extra input bytes, enough to emit 1 more
428 // output byte. Fine.
429 output
[op
++] = (byte) (value
>> 4);
432 // Read three extra input bytes, enough to emit 2 more
433 // output bytes. Fine.
434 output
[op
++] = (byte) (value
>> 10);
435 output
[op
++] = (byte) (value
>> 2);
438 // Read one padding '=' when we expected 2. Illegal.
442 // Read all the padding '='s we expected and no more.
453 // --------------------------------------------------------
455 // --------------------------------------------------------
458 * Base64-encode the given data and return a newly allocated
459 * String with the result.
461 * @param input the data to encode
462 * @param flags controls certain features of the encoded output.
463 * Passing {@code DEFAULT} results in output that
464 * adheres to RFC 2045.
466 public static String
encodeToString(byte[] input
, int flags
) {
468 return new String(encode(input
, flags
), "US-ASCII");
469 } catch (UnsupportedEncodingException e
) {
470 // US-ASCII is guaranteed to be available.
471 throw new AssertionError(e
);
476 * Base64-encode the given data and return a newly allocated
477 * String with the result.
479 * @param input the data to encode
480 * @param offset the position within the input array at which to
482 * @param len the number of bytes of input to encode
483 * @param flags controls certain features of the encoded output.
484 * Passing {@code DEFAULT} results in output that
485 * adheres to RFC 2045.
487 public static String
encodeToString(byte[] input
, int offset
, int len
, int flags
) {
489 return new String(encode(input
, offset
, len
, flags
), "US-ASCII");
490 } catch (UnsupportedEncodingException e
) {
491 // US-ASCII is guaranteed to be available.
492 throw new AssertionError(e
);
497 * Base64-encode the given data and return a newly allocated
498 * byte[] with the result.
500 * @param input the data to encode
501 * @param flags controls certain features of the encoded output.
502 * Passing {@code DEFAULT} results in output that
503 * adheres to RFC 2045.
505 public static byte[] encode(byte[] input
, int flags
) {
506 return encode(input
, 0, input
.length
, flags
);
510 * Base64-encode the given data and return a newly allocated
511 * byte[] with the result.
513 * @param input the data to encode
514 * @param offset the position within the input array at which to
516 * @param len the number of bytes of input to encode
517 * @param flags controls certain features of the encoded output.
518 * Passing {@code DEFAULT} results in output that
519 * adheres to RFC 2045.
521 public static byte[] encode(byte[] input
, int offset
, int len
, int flags
) {
522 Encoder encoder
= new Encoder(flags
, null);
524 // Compute the exact length of the array we will produce.
525 int output_len
= len
/ 3 * 4;
527 // Account for the tail of the data and the padding bytes, if any.
528 if (encoder
.do_padding
) {
535 case 1: output_len
+= 2; break;
536 case 2: output_len
+= 3; break;
540 // Account for the newlines, if any.
541 if (encoder
.do_newline
&& len
> 0) {
542 output_len
+= (((len
-1) / (3 * Encoder
.LINE_GROUPS
)) + 1) *
543 (encoder
.do_cr ?
2 : 1);
546 encoder
.output
= new byte[output_len
];
547 encoder
.process(input
, offset
, len
, true);
549 assert encoder
.op
== output_len
;
551 return encoder
.output
;
554 /* package */ static class Encoder
extends Coder
{
556 * Emit a new line every this many output tuples. Corresponds to
557 * a 76-character line length (the maximum allowable according to
558 * <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>).
560 public static final int LINE_GROUPS
= 19;
563 * Lookup table for turning Base64 alphabet positions (6 bits)
566 private static final byte ENCODE
[] = {
567 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
568 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
569 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
570 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/',
574 * Lookup table for turning Base64 alphabet positions (6 bits)
577 private static final byte ENCODE_WEBSAFE
[] = {
578 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
579 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
580 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
581 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_',
584 final private byte[] tail
;
585 /* package */ int tailLen
;
588 final public boolean do_padding
;
589 final public boolean do_newline
;
590 final public boolean do_cr
;
591 final private byte[] alphabet
;
593 public Encoder(int flags
, byte[] output
) {
594 this.output
= output
;
596 do_padding
= (flags
& NO_PADDING
) == 0;
597 do_newline
= (flags
& NO_WRAP
) == 0;
598 do_cr
= (flags
& CRLF
) != 0;
599 alphabet
= ((flags
& URL_SAFE
) == 0) ? ENCODE
: ENCODE_WEBSAFE
;
604 count
= do_newline ? LINE_GROUPS
: -1;
608 * @return an overestimate for the number of bytes {@code
609 * len} bytes could encode to.
612 public int maxOutputSize(int len
) {
613 return len
* 8/5 + 10;
617 public boolean process(byte[] input
, int offset
, int len
, boolean finish
) {
618 // Using local variables makes the encoder about 9% faster.
619 final byte[] alphabet
= this.alphabet
;
620 final byte[] output
= this.output
;
622 int count
= this.count
;
628 // First we need to concatenate the tail of the previous call
629 // with any input bytes available now and see if we can empty
634 // There was no tail.
639 // A 1-byte tail with at least 2 bytes of
640 // input available now.
641 v
= ((tail
[0] & 0xff) << 16) |
642 ((input
[p
++] & 0xff) << 8) |
650 // A 2-byte tail with at least 1 byte of input.
651 v
= ((tail
[0] & 0xff) << 16) |
652 ((tail
[1] & 0xff) << 8) |
660 output
[op
++] = alphabet
[(v
>> 18) & 0x3f];
661 output
[op
++] = alphabet
[(v
>> 12) & 0x3f];
662 output
[op
++] = alphabet
[(v
>> 6) & 0x3f];
663 output
[op
++] = alphabet
[v
& 0x3f];
665 if (do_cr
) output
[op
++] = '\r';
671 // At this point either there is no tail, or there are fewer
672 // than 3 bytes of input available.
674 // The main loop, turning 3 input bytes into 4 output bytes on
677 v
= ((input
[p
] & 0xff) << 16) |
678 ((input
[p
+1] & 0xff) << 8) |
680 output
[op
] = alphabet
[(v
>> 18) & 0x3f];
681 output
[op
+1] = alphabet
[(v
>> 12) & 0x3f];
682 output
[op
+2] = alphabet
[(v
>> 6) & 0x3f];
683 output
[op
+3] = alphabet
[v
& 0x3f];
687 if (do_cr
) output
[op
++] = '\r';
694 // Finish up the tail of the input. Note that we need to
695 // consume any bytes in tail before any bytes
696 // remaining in input; there should be at most two bytes
699 if (p
-tailLen
== len
-1) {
701 v
= ((tailLen
> 0 ? tail
[t
++] : input
[p
++]) & 0xff) << 4;
703 output
[op
++] = alphabet
[(v
>> 6) & 0x3f];
704 output
[op
++] = alphabet
[v
& 0x3f];
710 if (do_cr
) output
[op
++] = '\r';
713 } else if (p
-tailLen
== len
-2) {
715 v
= (((tailLen
> 1 ? tail
[t
++] : input
[p
++]) & 0xff) << 10) |
716 (((tailLen
> 0 ? tail
[t
++] : input
[p
++]) & 0xff) << 2);
718 output
[op
++] = alphabet
[(v
>> 12) & 0x3f];
719 output
[op
++] = alphabet
[(v
>> 6) & 0x3f];
720 output
[op
++] = alphabet
[v
& 0x3f];
725 if (do_cr
) output
[op
++] = '\r';
728 } else if (do_newline
&& op
> 0 && count
!= LINE_GROUPS
) {
729 if (do_cr
) output
[op
++] = '\r';
736 // Save the leftovers in tail to be consumed on the next
737 // call to encodeInternal.
740 tail
[tailLen
++] = input
[p
];
741 } else if (p
== len
-2) {
742 tail
[tailLen
++] = input
[p
];
743 tail
[tailLen
++] = input
[p
+1];
754 private Base64() { } // don't instantiate