2 * Copyright (C) 2010 The Android Open Source Project
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * Changes (@author niki):
19 * - default charset -> UTF-8
22 package be
.nikiroo
.utils
.streams
;
24 import java
.io
.UnsupportedEncodingException
;
26 import be
.nikiroo
.utils
.StringUtils
;
29 * Utilities for encoding and decoding the Base64 representation of
30 * binary data. See RFCs <a
31 * href="http://www.ietf.org/rfc/rfc2045.txt">2045</a> and <a
32 * href="http://www.ietf.org/rfc/rfc3548.txt">3548</a>.
36 * Default values for encoder/decoder flags.
38 public static final int DEFAULT
= 0;
41 * Encoder flag bit to omit the padding '=' characters at the end
42 * of the output (if any).
44 public static final int NO_PADDING
= 1;
47 * Encoder flag bit to omit all line terminators (i.e., the output
48 * will be on one long line).
50 public static final int NO_WRAP
= 2;
53 * Encoder flag bit to indicate lines should be terminated with a
54 * CRLF pair instead of just an LF. Has no effect if {@code
55 * NO_WRAP} is specified as well.
57 public static final int CRLF
= 4;
60 * Encoder/decoder flag bit to indicate using the "URL and
61 * filename safe" variant of Base64 (see RFC 3548 section 4) where
62 * {@code -} and {@code _} are used in place of {@code +} and
65 public static final int URL_SAFE
= 8;
68 * Flag to pass to {@link Base64OutputStream} to indicate that it
69 * should not close the output stream it is wrapping when it
72 public static final int NO_CLOSE
= 16;
74 // --------------------------------------------------------
76 // --------------------------------------------------------
78 /* package */ static abstract class Coder
{
83 * Encode/decode another block of input data. this.output is
84 * provided by the caller, and must be big enough to hold all
85 * the coded data. On exit, this.opwill be set to the length
88 * @param finish true if this is the final call to process for
89 * this object. Will finalize the coder state and
90 * include any final bytes in the output.
92 * @return true if the input so far is good; false if some
93 * error has been detected in the input stream..
95 public abstract boolean process(byte[] input
, int offset
, int len
, boolean finish
);
98 * @return the maximum number of bytes a call to process()
99 * could produce for the given number of input bytes. This may
100 * be an overestimate.
102 public abstract int maxOutputSize(int len
);
105 // --------------------------------------------------------
107 // --------------------------------------------------------
110 * Decode the Base64-encoded data in input and return the data in
113 * <p>The padding '=' characters at the end are considered optional, but
114 * if any are present, there must be the correct number of them.
116 * @param str the input String to decode, which is converted to
117 * bytes using the default charset
118 * @param flags controls certain features of the decoded output.
119 * Pass {@code DEFAULT} to decode standard Base64.
121 * @throws IllegalArgumentException if the input contains
124 public static byte[] decode(String str
, int flags
) {
125 return decode(StringUtils
.getBytes(str
), flags
);
129 * Decode the Base64-encoded data in input and return the data in
132 * <p>The padding '=' characters at the end are considered optional, but
133 * if any are present, there must be the correct number of them.
135 * @param input the input array to decode
136 * @param flags controls certain features of the decoded output.
137 * Pass {@code DEFAULT} to decode standard Base64.
139 * @throws IllegalArgumentException if the input contains
142 public static byte[] decode(byte[] input
, int flags
) {
143 return decode(input
, 0, input
.length
, flags
);
147 * Decode the Base64-encoded data in input and return the data in
150 * <p>The padding '=' characters at the end are considered optional, but
151 * if any are present, there must be the correct number of them.
153 * @param input the data to decode
154 * @param offset the position within the input array at which to start
155 * @param len the number of bytes of input to decode
156 * @param flags controls certain features of the decoded output.
157 * Pass {@code DEFAULT} to decode standard Base64.
159 * @throws IllegalArgumentException if the input contains
162 public static byte[] decode(byte[] input
, int offset
, int len
, int flags
) {
163 // Allocate space for the most data the input could represent.
164 // (It could contain less if it contains whitespace, etc.)
165 Decoder decoder
= new Decoder(flags
, new byte[len
*3/4]);
167 if (!decoder
.process(input
, offset
, len
, true)) {
168 throw new IllegalArgumentException("bad base-64");
171 // Maybe we got lucky and allocated exactly enough output space.
172 if (decoder
.op
== decoder
.output
.length
) {
173 return decoder
.output
;
176 // Need to shorten the array, so allocate a new one of the
177 // right size and copy.
178 byte[] temp
= new byte[decoder
.op
];
179 System
.arraycopy(decoder
.output
, 0, temp
, 0, decoder
.op
);
183 /* package */ static class Decoder
extends Coder
{
185 * Lookup table for turning bytes into their position in the
188 private static final int DECODE
[] = {
189 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
190 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
191 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
192 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -2, -1, -1,
193 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
194 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
195 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
196 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
197 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
198 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
199 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
200 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
201 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
202 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
203 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
204 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
208 * Decode lookup table for the "web safe" variant (RFC 3548
209 * sec. 4) where - and _ replace + and /.
211 private static final int DECODE_WEBSAFE
[] = {
212 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
213 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
214 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1,
215 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -2, -1, -1,
216 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
217 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63,
218 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
219 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
220 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
221 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
222 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
223 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
224 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
225 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
226 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
227 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
230 /** Non-data values in the DECODE arrays. */
231 private static final int SKIP
= -1;
232 private static final int EQUALS
= -2;
235 * States 0-3 are reading through the next input tuple.
236 * State 4 is having read one '=' and expecting exactly
238 * State 5 is expecting no more data or padding characters
240 * State 6 is the error state; an error has been detected
241 * in the input and no future input can "fix" it.
243 private int state
; // state number (0 to 6)
246 final private int[] alphabet
;
248 public Decoder(int flags
, byte[] output
) {
249 this.output
= output
;
251 alphabet
= ((flags
& URL_SAFE
) == 0) ? DECODE
: DECODE_WEBSAFE
;
257 * @return an overestimate for the number of bytes {@code
258 * len} bytes could decode to.
261 public int maxOutputSize(int len
) {
262 return len
* 3/4 + 10;
266 * Decode another block of input data.
268 * @return true if the state machine is still healthy. false if
269 * bad base-64 data has been detected in the input stream.
272 public boolean process(byte[] input
, int offset
, int len
, boolean finish
) {
273 if (this.state
== 6) return false;
278 // Using local variables makes the decoder about 12%
279 // faster than if we manipulate the member variables in
280 // the loop. (Even alphabet makes a measurable
281 // difference, which is somewhat surprising to me since
282 // the member variable is final.)
283 int state
= this.state
;
284 int value
= this.value
;
286 final byte[] output
= this.output
;
287 final int[] alphabet
= this.alphabet
;
290 // Try the fast path: we're starting a new tuple and the
291 // next four bytes of the input stream are all data
292 // bytes. This corresponds to going through states
293 // 0-1-2-3-0. We expect to use this method for most of
296 // If any of the next four bytes of input are non-data
297 // (whitespace, etc.), value will end up negative. (All
298 // the non-data values in decode are small negative
299 // numbers, so shifting any of them up and or'ing them
300 // together will result in a value with its top bit set.)
302 // You can remove this whole block and the output should
303 // be the same, just slower.
306 (value
= ((alphabet
[input
[p
] & 0xff] << 18) |
307 (alphabet
[input
[p
+1] & 0xff] << 12) |
308 (alphabet
[input
[p
+2] & 0xff] << 6) |
309 (alphabet
[input
[p
+3] & 0xff]))) >= 0) {
310 output
[op
+2] = (byte) value
;
311 output
[op
+1] = (byte) (value
>> 8);
312 output
[op
] = (byte) (value
>> 16);
319 // The fast path isn't available -- either we've read a
320 // partial tuple, or the next four input bytes aren't all
321 // data, or whatever. Fall back to the slower state
322 // machine implementation.
324 int d
= alphabet
[input
[p
++] & 0xff];
331 } else if (d
!= SKIP
) {
339 value
= (value
<< 6) | d
;
341 } else if (d
!= SKIP
) {
349 value
= (value
<< 6) | d
;
351 } else if (d
== EQUALS
) {
352 // Emit the last (partial) output tuple;
353 // expect exactly one more padding character.
354 output
[op
++] = (byte) (value
>> 4);
356 } else if (d
!= SKIP
) {
364 // Emit the output triple and return to state 0.
365 value
= (value
<< 6) | d
;
366 output
[op
+2] = (byte) value
;
367 output
[op
+1] = (byte) (value
>> 8);
368 output
[op
] = (byte) (value
>> 16);
371 } else if (d
== EQUALS
) {
372 // Emit the last (partial) output tuple;
373 // expect no further data or padding characters.
374 output
[op
+1] = (byte) (value
>> 2);
375 output
[op
] = (byte) (value
>> 10);
378 } else if (d
!= SKIP
) {
387 } else if (d
!= SKIP
) {
403 // We're out of input, but a future call could provide
411 // Done reading input. Now figure out where we are left in
412 // the state machine and finish up.
416 // Output length is a multiple of three. Fine.
419 // Read one extra input byte, which isn't enough to
420 // make another output byte. Illegal.
424 // Read two extra input bytes, enough to emit 1 more
425 // output byte. Fine.
426 output
[op
++] = (byte) (value
>> 4);
429 // Read three extra input bytes, enough to emit 2 more
430 // output bytes. Fine.
431 output
[op
++] = (byte) (value
>> 10);
432 output
[op
++] = (byte) (value
>> 2);
435 // Read one padding '=' when we expected 2. Illegal.
439 // Read all the padding '='s we expected and no more.
450 // --------------------------------------------------------
452 // --------------------------------------------------------
455 * Base64-encode the given data and return a newly allocated
456 * String with the result.
458 * @param input the data to encode
459 * @param flags controls certain features of the encoded output.
460 * Passing {@code DEFAULT} results in output that
461 * adheres to RFC 2045.
463 public static String
encodeToString(byte[] input
, int flags
) {
465 return new String(encode(input
, flags
), "US-ASCII");
466 } catch (UnsupportedEncodingException e
) {
467 // US-ASCII is guaranteed to be available.
468 throw new AssertionError(e
);
473 * Base64-encode the given data and return a newly allocated
474 * String with the result.
476 * @param input the data to encode
477 * @param offset the position within the input array at which to
479 * @param len the number of bytes of input to encode
480 * @param flags controls certain features of the encoded output.
481 * Passing {@code DEFAULT} results in output that
482 * adheres to RFC 2045.
484 public static String
encodeToString(byte[] input
, int offset
, int len
, int flags
) {
486 return new String(encode(input
, offset
, len
, flags
), "US-ASCII");
487 } catch (UnsupportedEncodingException e
) {
488 // US-ASCII is guaranteed to be available.
489 throw new AssertionError(e
);
494 * Base64-encode the given data and return a newly allocated
495 * byte[] with the result.
497 * @param input the data to encode
498 * @param flags controls certain features of the encoded output.
499 * Passing {@code DEFAULT} results in output that
500 * adheres to RFC 2045.
502 public static byte[] encode(byte[] input
, int flags
) {
503 return encode(input
, 0, input
.length
, flags
);
507 * Base64-encode the given data and return a newly allocated
508 * byte[] with the result.
510 * @param input the data to encode
511 * @param offset the position within the input array at which to
513 * @param len the number of bytes of input to encode
514 * @param flags controls certain features of the encoded output.
515 * Passing {@code DEFAULT} results in output that
516 * adheres to RFC 2045.
518 public static byte[] encode(byte[] input
, int offset
, int len
, int flags
) {
519 Encoder encoder
= new Encoder(flags
, null);
521 // Compute the exact length of the array we will produce.
522 int output_len
= len
/ 3 * 4;
524 // Account for the tail of the data and the padding bytes, if any.
525 if (encoder
.do_padding
) {
532 case 1: output_len
+= 2; break;
533 case 2: output_len
+= 3; break;
537 // Account for the newlines, if any.
538 if (encoder
.do_newline
&& len
> 0) {
539 output_len
+= (((len
-1) / (3 * Encoder
.LINE_GROUPS
)) + 1) *
540 (encoder
.do_cr ?
2 : 1);
543 encoder
.output
= new byte[output_len
];
544 encoder
.process(input
, offset
, len
, true);
546 assert encoder
.op
== output_len
;
548 return encoder
.output
;
551 /* package */ static class Encoder
extends Coder
{
553 * Emit a new line every this many output tuples. Corresponds to
554 * a 76-character line length (the maximum allowable according to
555 * <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>).
557 public static final int LINE_GROUPS
= 19;
560 * Lookup table for turning Base64 alphabet positions (6 bits)
563 private static final byte ENCODE
[] = {
564 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
565 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
566 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
567 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/',
571 * Lookup table for turning Base64 alphabet positions (6 bits)
574 private static final byte ENCODE_WEBSAFE
[] = {
575 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
576 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
577 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
578 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_',
581 final private byte[] tail
;
582 /* package */ int tailLen
;
585 final public boolean do_padding
;
586 final public boolean do_newline
;
587 final public boolean do_cr
;
588 final private byte[] alphabet
;
590 public Encoder(int flags
, byte[] output
) {
591 this.output
= output
;
593 do_padding
= (flags
& NO_PADDING
) == 0;
594 do_newline
= (flags
& NO_WRAP
) == 0;
595 do_cr
= (flags
& CRLF
) != 0;
596 alphabet
= ((flags
& URL_SAFE
) == 0) ? ENCODE
: ENCODE_WEBSAFE
;
601 count
= do_newline ? LINE_GROUPS
: -1;
605 * @return an overestimate for the number of bytes {@code
606 * len} bytes could encode to.
609 public int maxOutputSize(int len
) {
610 return len
* 8/5 + 10;
614 public boolean process(byte[] input
, int offset
, int len
, boolean finish
) {
615 // Using local variables makes the encoder about 9% faster.
616 final byte[] alphabet
= this.alphabet
;
617 final byte[] output
= this.output
;
619 int count
= this.count
;
625 // First we need to concatenate the tail of the previous call
626 // with any input bytes available now and see if we can empty
631 // There was no tail.
636 // A 1-byte tail with at least 2 bytes of
637 // input available now.
638 v
= ((tail
[0] & 0xff) << 16) |
639 ((input
[p
++] & 0xff) << 8) |
647 // A 2-byte tail with at least 1 byte of input.
648 v
= ((tail
[0] & 0xff) << 16) |
649 ((tail
[1] & 0xff) << 8) |
657 output
[op
++] = alphabet
[(v
>> 18) & 0x3f];
658 output
[op
++] = alphabet
[(v
>> 12) & 0x3f];
659 output
[op
++] = alphabet
[(v
>> 6) & 0x3f];
660 output
[op
++] = alphabet
[v
& 0x3f];
662 if (do_cr
) output
[op
++] = '\r';
668 // At this point either there is no tail, or there are fewer
669 // than 3 bytes of input available.
671 // The main loop, turning 3 input bytes into 4 output bytes on
674 v
= ((input
[p
] & 0xff) << 16) |
675 ((input
[p
+1] & 0xff) << 8) |
677 output
[op
] = alphabet
[(v
>> 18) & 0x3f];
678 output
[op
+1] = alphabet
[(v
>> 12) & 0x3f];
679 output
[op
+2] = alphabet
[(v
>> 6) & 0x3f];
680 output
[op
+3] = alphabet
[v
& 0x3f];
684 if (do_cr
) output
[op
++] = '\r';
691 // Finish up the tail of the input. Note that we need to
692 // consume any bytes in tail before any bytes
693 // remaining in input; there should be at most two bytes
696 if (p
-tailLen
== len
-1) {
698 v
= ((tailLen
> 0 ? tail
[t
++] : input
[p
++]) & 0xff) << 4;
700 output
[op
++] = alphabet
[(v
>> 6) & 0x3f];
701 output
[op
++] = alphabet
[v
& 0x3f];
707 if (do_cr
) output
[op
++] = '\r';
710 } else if (p
-tailLen
== len
-2) {
712 v
= (((tailLen
> 1 ? tail
[t
++] : input
[p
++]) & 0xff) << 10) |
713 (((tailLen
> 0 ? tail
[t
++] : input
[p
++]) & 0xff) << 2);
715 output
[op
++] = alphabet
[(v
>> 12) & 0x3f];
716 output
[op
++] = alphabet
[(v
>> 6) & 0x3f];
717 output
[op
++] = alphabet
[v
& 0x3f];
722 if (do_cr
) output
[op
++] = '\r';
725 } else if (do_newline
&& op
> 0 && count
!= LINE_GROUPS
) {
726 if (do_cr
) output
[op
++] = '\r';
733 // Save the leftovers in tail to be consumed on the next
734 // call to encodeInternal.
737 tail
[tailLen
++] = input
[p
];
738 } else if (p
== len
-2) {
739 tail
[tailLen
++] = input
[p
];
740 tail
[tailLen
++] = input
[p
+1];
751 private Base64() { } // don't instantiate