Commit | Line | Data |
---|---|---|
f28a134e NR |
1 | /* |
2 | * Copyright (C) 2010 The Android Open Source Project | |
3 | * | |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
a6a73de3 NR |
17 | /* |
18 | * Changes (@author niki): | |
19 | * - default charset -> UTF-8 | |
20 | */ | |
21 | ||
f28a134e NR |
22 | package be.nikiroo.utils.streams; |
23 | ||
24 | import java.io.UnsupportedEncodingException; | |
25 | ||
f8147a0e NR |
26 | import be.nikiroo.utils.StringUtils; |
27 | ||
f28a134e NR |
28 | /** |
29 | * Utilities for encoding and decoding the Base64 representation of | |
30 | * binary data. See RFCs <a | |
31 | * href="http://www.ietf.org/rfc/rfc2045.txt">2045</a> and <a | |
32 | * href="http://www.ietf.org/rfc/rfc3548.txt">3548</a>. | |
33 | */ | |
34 | class Base64 { | |
35 | /** | |
36 | * Default values for encoder/decoder flags. | |
37 | */ | |
38 | public static final int DEFAULT = 0; | |
39 | ||
40 | /** | |
41 | * Encoder flag bit to omit the padding '=' characters at the end | |
42 | * of the output (if any). | |
43 | */ | |
44 | public static final int NO_PADDING = 1; | |
45 | ||
46 | /** | |
47 | * Encoder flag bit to omit all line terminators (i.e., the output | |
48 | * will be on one long line). | |
49 | */ | |
50 | public static final int NO_WRAP = 2; | |
51 | ||
52 | /** | |
53 | * Encoder flag bit to indicate lines should be terminated with a | |
54 | * CRLF pair instead of just an LF. Has no effect if {@code | |
55 | * NO_WRAP} is specified as well. | |
56 | */ | |
57 | public static final int CRLF = 4; | |
58 | ||
59 | /** | |
60 | * Encoder/decoder flag bit to indicate using the "URL and | |
61 | * filename safe" variant of Base64 (see RFC 3548 section 4) where | |
62 | * {@code -} and {@code _} are used in place of {@code +} and | |
63 | * {@code /}. | |
64 | */ | |
65 | public static final int URL_SAFE = 8; | |
66 | ||
67 | /** | |
68 | * Flag to pass to {@link Base64OutputStream} to indicate that it | |
69 | * should not close the output stream it is wrapping when it | |
70 | * itself is closed. | |
71 | */ | |
72 | public static final int NO_CLOSE = 16; | |
73 | ||
74 | // -------------------------------------------------------- | |
75 | // shared code | |
76 | // -------------------------------------------------------- | |
77 | ||
78 | /* package */ static abstract class Coder { | |
79 | public byte[] output; | |
80 | public int op; | |
81 | ||
82 | /** | |
83 | * Encode/decode another block of input data. this.output is | |
84 | * provided by the caller, and must be big enough to hold all | |
85 | * the coded data. On exit, this.opwill be set to the length | |
86 | * of the coded data. | |
87 | * | |
88 | * @param finish true if this is the final call to process for | |
89 | * this object. Will finalize the coder state and | |
90 | * include any final bytes in the output. | |
91 | * | |
92 | * @return true if the input so far is good; false if some | |
93 | * error has been detected in the input stream.. | |
94 | */ | |
95 | public abstract boolean process(byte[] input, int offset, int len, boolean finish); | |
96 | ||
97 | /** | |
98 | * @return the maximum number of bytes a call to process() | |
99 | * could produce for the given number of input bytes. This may | |
100 | * be an overestimate. | |
101 | */ | |
102 | public abstract int maxOutputSize(int len); | |
103 | } | |
104 | ||
105 | // -------------------------------------------------------- | |
106 | // decoding | |
107 | // -------------------------------------------------------- | |
108 | ||
109 | /** | |
110 | * Decode the Base64-encoded data in input and return the data in | |
111 | * a new byte array. | |
112 | * | |
113 | * <p>The padding '=' characters at the end are considered optional, but | |
114 | * if any are present, there must be the correct number of them. | |
115 | * | |
116 | * @param str the input String to decode, which is converted to | |
117 | * bytes using the default charset | |
118 | * @param flags controls certain features of the decoded output. | |
119 | * Pass {@code DEFAULT} to decode standard Base64. | |
120 | * | |
121 | * @throws IllegalArgumentException if the input contains | |
122 | * incorrect padding | |
123 | */ | |
124 | public static byte[] decode(String str, int flags) { | |
f8147a0e | 125 | return decode(StringUtils.getBytes(str), flags); |
f28a134e NR |
126 | } |
127 | ||
128 | /** | |
129 | * Decode the Base64-encoded data in input and return the data in | |
130 | * a new byte array. | |
131 | * | |
132 | * <p>The padding '=' characters at the end are considered optional, but | |
133 | * if any are present, there must be the correct number of them. | |
134 | * | |
135 | * @param input the input array to decode | |
136 | * @param flags controls certain features of the decoded output. | |
137 | * Pass {@code DEFAULT} to decode standard Base64. | |
138 | * | |
139 | * @throws IllegalArgumentException if the input contains | |
140 | * incorrect padding | |
141 | */ | |
142 | public static byte[] decode(byte[] input, int flags) { | |
143 | return decode(input, 0, input.length, flags); | |
144 | } | |
145 | ||
146 | /** | |
147 | * Decode the Base64-encoded data in input and return the data in | |
148 | * a new byte array. | |
149 | * | |
150 | * <p>The padding '=' characters at the end are considered optional, but | |
151 | * if any are present, there must be the correct number of them. | |
152 | * | |
153 | * @param input the data to decode | |
154 | * @param offset the position within the input array at which to start | |
155 | * @param len the number of bytes of input to decode | |
156 | * @param flags controls certain features of the decoded output. | |
157 | * Pass {@code DEFAULT} to decode standard Base64. | |
158 | * | |
159 | * @throws IllegalArgumentException if the input contains | |
160 | * incorrect padding | |
161 | */ | |
162 | public static byte[] decode(byte[] input, int offset, int len, int flags) { | |
163 | // Allocate space for the most data the input could represent. | |
164 | // (It could contain less if it contains whitespace, etc.) | |
165 | Decoder decoder = new Decoder(flags, new byte[len*3/4]); | |
166 | ||
167 | if (!decoder.process(input, offset, len, true)) { | |
168 | throw new IllegalArgumentException("bad base-64"); | |
169 | } | |
170 | ||
171 | // Maybe we got lucky and allocated exactly enough output space. | |
172 | if (decoder.op == decoder.output.length) { | |
173 | return decoder.output; | |
174 | } | |
175 | ||
176 | // Need to shorten the array, so allocate a new one of the | |
177 | // right size and copy. | |
178 | byte[] temp = new byte[decoder.op]; | |
179 | System.arraycopy(decoder.output, 0, temp, 0, decoder.op); | |
180 | return temp; | |
181 | } | |
182 | ||
183 | /* package */ static class Decoder extends Coder { | |
184 | /** | |
185 | * Lookup table for turning bytes into their position in the | |
186 | * Base64 alphabet. | |
187 | */ | |
188 | private static final int DECODE[] = { | |
189 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
190 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
191 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, | |
192 | 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -2, -1, -1, | |
193 | -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, | |
194 | 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, | |
195 | -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, | |
196 | 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, | |
197 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
198 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
199 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
200 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
201 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
202 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
203 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
204 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
205 | }; | |
206 | ||
207 | /** | |
208 | * Decode lookup table for the "web safe" variant (RFC 3548 | |
209 | * sec. 4) where - and _ replace + and /. | |
210 | */ | |
211 | private static final int DECODE_WEBSAFE[] = { | |
212 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
213 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
214 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, | |
215 | 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -2, -1, -1, | |
216 | -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, | |
217 | 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63, | |
218 | -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, | |
219 | 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, | |
220 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
221 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
222 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
223 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
224 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
225 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
226 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
227 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
228 | }; | |
229 | ||
230 | /** Non-data values in the DECODE arrays. */ | |
231 | private static final int SKIP = -1; | |
232 | private static final int EQUALS = -2; | |
233 | ||
234 | /** | |
235 | * States 0-3 are reading through the next input tuple. | |
236 | * State 4 is having read one '=' and expecting exactly | |
237 | * one more. | |
238 | * State 5 is expecting no more data or padding characters | |
239 | * in the input. | |
240 | * State 6 is the error state; an error has been detected | |
241 | * in the input and no future input can "fix" it. | |
242 | */ | |
243 | private int state; // state number (0 to 6) | |
244 | private int value; | |
245 | ||
246 | final private int[] alphabet; | |
247 | ||
248 | public Decoder(int flags, byte[] output) { | |
249 | this.output = output; | |
250 | ||
251 | alphabet = ((flags & URL_SAFE) == 0) ? DECODE : DECODE_WEBSAFE; | |
252 | state = 0; | |
253 | value = 0; | |
254 | } | |
255 | ||
256 | /** | |
257 | * @return an overestimate for the number of bytes {@code | |
258 | * len} bytes could decode to. | |
259 | */ | |
7194ac50 NR |
260 | @Override |
261 | public int maxOutputSize(int len) { | |
f28a134e NR |
262 | return len * 3/4 + 10; |
263 | } | |
264 | ||
265 | /** | |
266 | * Decode another block of input data. | |
267 | * | |
268 | * @return true if the state machine is still healthy. false if | |
269 | * bad base-64 data has been detected in the input stream. | |
270 | */ | |
7194ac50 NR |
271 | @Override |
272 | public boolean process(byte[] input, int offset, int len, boolean finish) { | |
f28a134e NR |
273 | if (this.state == 6) return false; |
274 | ||
275 | int p = offset; | |
276 | len += offset; | |
277 | ||
278 | // Using local variables makes the decoder about 12% | |
279 | // faster than if we manipulate the member variables in | |
280 | // the loop. (Even alphabet makes a measurable | |
281 | // difference, which is somewhat surprising to me since | |
282 | // the member variable is final.) | |
283 | int state = this.state; | |
284 | int value = this.value; | |
285 | int op = 0; | |
286 | final byte[] output = this.output; | |
287 | final int[] alphabet = this.alphabet; | |
288 | ||
289 | while (p < len) { | |
290 | // Try the fast path: we're starting a new tuple and the | |
291 | // next four bytes of the input stream are all data | |
292 | // bytes. This corresponds to going through states | |
293 | // 0-1-2-3-0. We expect to use this method for most of | |
294 | // the data. | |
295 | // | |
296 | // If any of the next four bytes of input are non-data | |
297 | // (whitespace, etc.), value will end up negative. (All | |
298 | // the non-data values in decode are small negative | |
299 | // numbers, so shifting any of them up and or'ing them | |
300 | // together will result in a value with its top bit set.) | |
301 | // | |
302 | // You can remove this whole block and the output should | |
303 | // be the same, just slower. | |
304 | if (state == 0) { | |
305 | while (p+4 <= len && | |
306 | (value = ((alphabet[input[p] & 0xff] << 18) | | |
307 | (alphabet[input[p+1] & 0xff] << 12) | | |
308 | (alphabet[input[p+2] & 0xff] << 6) | | |
309 | (alphabet[input[p+3] & 0xff]))) >= 0) { | |
310 | output[op+2] = (byte) value; | |
311 | output[op+1] = (byte) (value >> 8); | |
312 | output[op] = (byte) (value >> 16); | |
313 | op += 3; | |
314 | p += 4; | |
315 | } | |
316 | if (p >= len) break; | |
317 | } | |
318 | ||
319 | // The fast path isn't available -- either we've read a | |
320 | // partial tuple, or the next four input bytes aren't all | |
321 | // data, or whatever. Fall back to the slower state | |
322 | // machine implementation. | |
323 | ||
324 | int d = alphabet[input[p++] & 0xff]; | |
325 | ||
326 | switch (state) { | |
327 | case 0: | |
328 | if (d >= 0) { | |
329 | value = d; | |
330 | ++state; | |
331 | } else if (d != SKIP) { | |
332 | this.state = 6; | |
333 | return false; | |
334 | } | |
335 | break; | |
336 | ||
337 | case 1: | |
338 | if (d >= 0) { | |
339 | value = (value << 6) | d; | |
340 | ++state; | |
341 | } else if (d != SKIP) { | |
342 | this.state = 6; | |
343 | return false; | |
344 | } | |
345 | break; | |
346 | ||
347 | case 2: | |
348 | if (d >= 0) { | |
349 | value = (value << 6) | d; | |
350 | ++state; | |
351 | } else if (d == EQUALS) { | |
352 | // Emit the last (partial) output tuple; | |
353 | // expect exactly one more padding character. | |
354 | output[op++] = (byte) (value >> 4); | |
355 | state = 4; | |
356 | } else if (d != SKIP) { | |
357 | this.state = 6; | |
358 | return false; | |
359 | } | |
360 | break; | |
361 | ||
362 | case 3: | |
363 | if (d >= 0) { | |
364 | // Emit the output triple and return to state 0. | |
365 | value = (value << 6) | d; | |
366 | output[op+2] = (byte) value; | |
367 | output[op+1] = (byte) (value >> 8); | |
368 | output[op] = (byte) (value >> 16); | |
369 | op += 3; | |
370 | state = 0; | |
371 | } else if (d == EQUALS) { | |
372 | // Emit the last (partial) output tuple; | |
373 | // expect no further data or padding characters. | |
374 | output[op+1] = (byte) (value >> 2); | |
375 | output[op] = (byte) (value >> 10); | |
376 | op += 2; | |
377 | state = 5; | |
378 | } else if (d != SKIP) { | |
379 | this.state = 6; | |
380 | return false; | |
381 | } | |
382 | break; | |
383 | ||
384 | case 4: | |
385 | if (d == EQUALS) { | |
386 | ++state; | |
387 | } else if (d != SKIP) { | |
388 | this.state = 6; | |
389 | return false; | |
390 | } | |
391 | break; | |
392 | ||
393 | case 5: | |
394 | if (d != SKIP) { | |
395 | this.state = 6; | |
396 | return false; | |
397 | } | |
398 | break; | |
399 | } | |
400 | } | |
401 | ||
402 | if (!finish) { | |
403 | // We're out of input, but a future call could provide | |
404 | // more. | |
405 | this.state = state; | |
406 | this.value = value; | |
407 | this.op = op; | |
408 | return true; | |
409 | } | |
410 | ||
411 | // Done reading input. Now figure out where we are left in | |
412 | // the state machine and finish up. | |
413 | ||
414 | switch (state) { | |
415 | case 0: | |
416 | // Output length is a multiple of three. Fine. | |
417 | break; | |
418 | case 1: | |
419 | // Read one extra input byte, which isn't enough to | |
420 | // make another output byte. Illegal. | |
421 | this.state = 6; | |
422 | return false; | |
423 | case 2: | |
424 | // Read two extra input bytes, enough to emit 1 more | |
425 | // output byte. Fine. | |
426 | output[op++] = (byte) (value >> 4); | |
427 | break; | |
428 | case 3: | |
429 | // Read three extra input bytes, enough to emit 2 more | |
430 | // output bytes. Fine. | |
431 | output[op++] = (byte) (value >> 10); | |
432 | output[op++] = (byte) (value >> 2); | |
433 | break; | |
434 | case 4: | |
435 | // Read one padding '=' when we expected 2. Illegal. | |
436 | this.state = 6; | |
437 | return false; | |
438 | case 5: | |
439 | // Read all the padding '='s we expected and no more. | |
440 | // Fine. | |
441 | break; | |
442 | } | |
443 | ||
444 | this.state = state; | |
445 | this.op = op; | |
446 | return true; | |
447 | } | |
448 | } | |
449 | ||
450 | // -------------------------------------------------------- | |
451 | // encoding | |
452 | // -------------------------------------------------------- | |
453 | ||
454 | /** | |
455 | * Base64-encode the given data and return a newly allocated | |
456 | * String with the result. | |
457 | * | |
458 | * @param input the data to encode | |
459 | * @param flags controls certain features of the encoded output. | |
460 | * Passing {@code DEFAULT} results in output that | |
461 | * adheres to RFC 2045. | |
462 | */ | |
463 | public static String encodeToString(byte[] input, int flags) { | |
464 | try { | |
465 | return new String(encode(input, flags), "US-ASCII"); | |
466 | } catch (UnsupportedEncodingException e) { | |
467 | // US-ASCII is guaranteed to be available. | |
468 | throw new AssertionError(e); | |
469 | } | |
470 | } | |
471 | ||
472 | /** | |
473 | * Base64-encode the given data and return a newly allocated | |
474 | * String with the result. | |
475 | * | |
476 | * @param input the data to encode | |
477 | * @param offset the position within the input array at which to | |
478 | * start | |
479 | * @param len the number of bytes of input to encode | |
480 | * @param flags controls certain features of the encoded output. | |
481 | * Passing {@code DEFAULT} results in output that | |
482 | * adheres to RFC 2045. | |
483 | */ | |
484 | public static String encodeToString(byte[] input, int offset, int len, int flags) { | |
485 | try { | |
486 | return new String(encode(input, offset, len, flags), "US-ASCII"); | |
487 | } catch (UnsupportedEncodingException e) { | |
488 | // US-ASCII is guaranteed to be available. | |
489 | throw new AssertionError(e); | |
490 | } | |
491 | } | |
492 | ||
493 | /** | |
494 | * Base64-encode the given data and return a newly allocated | |
495 | * byte[] with the result. | |
496 | * | |
497 | * @param input the data to encode | |
498 | * @param flags controls certain features of the encoded output. | |
499 | * Passing {@code DEFAULT} results in output that | |
500 | * adheres to RFC 2045. | |
501 | */ | |
502 | public static byte[] encode(byte[] input, int flags) { | |
503 | return encode(input, 0, input.length, flags); | |
504 | } | |
505 | ||
506 | /** | |
507 | * Base64-encode the given data and return a newly allocated | |
508 | * byte[] with the result. | |
509 | * | |
510 | * @param input the data to encode | |
511 | * @param offset the position within the input array at which to | |
512 | * start | |
513 | * @param len the number of bytes of input to encode | |
514 | * @param flags controls certain features of the encoded output. | |
515 | * Passing {@code DEFAULT} results in output that | |
516 | * adheres to RFC 2045. | |
517 | */ | |
518 | public static byte[] encode(byte[] input, int offset, int len, int flags) { | |
519 | Encoder encoder = new Encoder(flags, null); | |
520 | ||
521 | // Compute the exact length of the array we will produce. | |
522 | int output_len = len / 3 * 4; | |
523 | ||
524 | // Account for the tail of the data and the padding bytes, if any. | |
525 | if (encoder.do_padding) { | |
526 | if (len % 3 > 0) { | |
527 | output_len += 4; | |
528 | } | |
529 | } else { | |
530 | switch (len % 3) { | |
531 | case 0: break; | |
532 | case 1: output_len += 2; break; | |
533 | case 2: output_len += 3; break; | |
534 | } | |
535 | } | |
536 | ||
537 | // Account for the newlines, if any. | |
538 | if (encoder.do_newline && len > 0) { | |
539 | output_len += (((len-1) / (3 * Encoder.LINE_GROUPS)) + 1) * | |
540 | (encoder.do_cr ? 2 : 1); | |
541 | } | |
542 | ||
543 | encoder.output = new byte[output_len]; | |
544 | encoder.process(input, offset, len, true); | |
545 | ||
546 | assert encoder.op == output_len; | |
547 | ||
548 | return encoder.output; | |
549 | } | |
550 | ||
551 | /* package */ static class Encoder extends Coder { | |
552 | /** | |
553 | * Emit a new line every this many output tuples. Corresponds to | |
554 | * a 76-character line length (the maximum allowable according to | |
555 | * <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>). | |
556 | */ | |
557 | public static final int LINE_GROUPS = 19; | |
558 | ||
559 | /** | |
560 | * Lookup table for turning Base64 alphabet positions (6 bits) | |
561 | * into output bytes. | |
562 | */ | |
563 | private static final byte ENCODE[] = { | |
564 | 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', | |
565 | 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', | |
566 | 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', | |
567 | 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', | |
568 | }; | |
569 | ||
570 | /** | |
571 | * Lookup table for turning Base64 alphabet positions (6 bits) | |
572 | * into output bytes. | |
573 | */ | |
574 | private static final byte ENCODE_WEBSAFE[] = { | |
575 | 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', | |
576 | 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', | |
577 | 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', | |
578 | 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_', | |
579 | }; | |
580 | ||
581 | final private byte[] tail; | |
582 | /* package */ int tailLen; | |
583 | private int count; | |
584 | ||
585 | final public boolean do_padding; | |
586 | final public boolean do_newline; | |
587 | final public boolean do_cr; | |
588 | final private byte[] alphabet; | |
589 | ||
590 | public Encoder(int flags, byte[] output) { | |
591 | this.output = output; | |
592 | ||
593 | do_padding = (flags & NO_PADDING) == 0; | |
594 | do_newline = (flags & NO_WRAP) == 0; | |
595 | do_cr = (flags & CRLF) != 0; | |
596 | alphabet = ((flags & URL_SAFE) == 0) ? ENCODE : ENCODE_WEBSAFE; | |
597 | ||
598 | tail = new byte[2]; | |
599 | tailLen = 0; | |
600 | ||
601 | count = do_newline ? LINE_GROUPS : -1; | |
602 | } | |
603 | ||
604 | /** | |
605 | * @return an overestimate for the number of bytes {@code | |
606 | * len} bytes could encode to. | |
607 | */ | |
7194ac50 NR |
608 | @Override |
609 | public int maxOutputSize(int len) { | |
f28a134e NR |
610 | return len * 8/5 + 10; |
611 | } | |
612 | ||
7194ac50 NR |
613 | @Override |
614 | public boolean process(byte[] input, int offset, int len, boolean finish) { | |
f28a134e NR |
615 | // Using local variables makes the encoder about 9% faster. |
616 | final byte[] alphabet = this.alphabet; | |
617 | final byte[] output = this.output; | |
618 | int op = 0; | |
619 | int count = this.count; | |
620 | ||
621 | int p = offset; | |
622 | len += offset; | |
623 | int v = -1; | |
624 | ||
625 | // First we need to concatenate the tail of the previous call | |
626 | // with any input bytes available now and see if we can empty | |
627 | // the tail. | |
628 | ||
629 | switch (tailLen) { | |
630 | case 0: | |
631 | // There was no tail. | |
632 | break; | |
633 | ||
634 | case 1: | |
635 | if (p+2 <= len) { | |
636 | // A 1-byte tail with at least 2 bytes of | |
637 | // input available now. | |
638 | v = ((tail[0] & 0xff) << 16) | | |
639 | ((input[p++] & 0xff) << 8) | | |
640 | (input[p++] & 0xff); | |
641 | tailLen = 0; | |
7194ac50 | 642 | } |
f28a134e NR |
643 | break; |
644 | ||
645 | case 2: | |
646 | if (p+1 <= len) { | |
647 | // A 2-byte tail with at least 1 byte of input. | |
648 | v = ((tail[0] & 0xff) << 16) | | |
649 | ((tail[1] & 0xff) << 8) | | |
650 | (input[p++] & 0xff); | |
651 | tailLen = 0; | |
652 | } | |
653 | break; | |
654 | } | |
655 | ||
656 | if (v != -1) { | |
657 | output[op++] = alphabet[(v >> 18) & 0x3f]; | |
658 | output[op++] = alphabet[(v >> 12) & 0x3f]; | |
659 | output[op++] = alphabet[(v >> 6) & 0x3f]; | |
660 | output[op++] = alphabet[v & 0x3f]; | |
661 | if (--count == 0) { | |
662 | if (do_cr) output[op++] = '\r'; | |
663 | output[op++] = '\n'; | |
664 | count = LINE_GROUPS; | |
665 | } | |
666 | } | |
667 | ||
668 | // At this point either there is no tail, or there are fewer | |
669 | // than 3 bytes of input available. | |
670 | ||
671 | // The main loop, turning 3 input bytes into 4 output bytes on | |
672 | // each iteration. | |
673 | while (p+3 <= len) { | |
674 | v = ((input[p] & 0xff) << 16) | | |
675 | ((input[p+1] & 0xff) << 8) | | |
676 | (input[p+2] & 0xff); | |
677 | output[op] = alphabet[(v >> 18) & 0x3f]; | |
678 | output[op+1] = alphabet[(v >> 12) & 0x3f]; | |
679 | output[op+2] = alphabet[(v >> 6) & 0x3f]; | |
680 | output[op+3] = alphabet[v & 0x3f]; | |
681 | p += 3; | |
682 | op += 4; | |
683 | if (--count == 0) { | |
684 | if (do_cr) output[op++] = '\r'; | |
685 | output[op++] = '\n'; | |
686 | count = LINE_GROUPS; | |
687 | } | |
688 | } | |
689 | ||
690 | if (finish) { | |
691 | // Finish up the tail of the input. Note that we need to | |
692 | // consume any bytes in tail before any bytes | |
693 | // remaining in input; there should be at most two bytes | |
694 | // total. | |
695 | ||
696 | if (p-tailLen == len-1) { | |
697 | int t = 0; | |
698 | v = ((tailLen > 0 ? tail[t++] : input[p++]) & 0xff) << 4; | |
699 | tailLen -= t; | |
700 | output[op++] = alphabet[(v >> 6) & 0x3f]; | |
701 | output[op++] = alphabet[v & 0x3f]; | |
702 | if (do_padding) { | |
703 | output[op++] = '='; | |
704 | output[op++] = '='; | |
705 | } | |
706 | if (do_newline) { | |
707 | if (do_cr) output[op++] = '\r'; | |
708 | output[op++] = '\n'; | |
709 | } | |
710 | } else if (p-tailLen == len-2) { | |
711 | int t = 0; | |
712 | v = (((tailLen > 1 ? tail[t++] : input[p++]) & 0xff) << 10) | | |
713 | (((tailLen > 0 ? tail[t++] : input[p++]) & 0xff) << 2); | |
714 | tailLen -= t; | |
715 | output[op++] = alphabet[(v >> 12) & 0x3f]; | |
716 | output[op++] = alphabet[(v >> 6) & 0x3f]; | |
717 | output[op++] = alphabet[v & 0x3f]; | |
718 | if (do_padding) { | |
719 | output[op++] = '='; | |
720 | } | |
721 | if (do_newline) { | |
722 | if (do_cr) output[op++] = '\r'; | |
723 | output[op++] = '\n'; | |
724 | } | |
725 | } else if (do_newline && op > 0 && count != LINE_GROUPS) { | |
726 | if (do_cr) output[op++] = '\r'; | |
727 | output[op++] = '\n'; | |
728 | } | |
729 | ||
730 | assert tailLen == 0; | |
731 | assert p == len; | |
732 | } else { | |
733 | // Save the leftovers in tail to be consumed on the next | |
734 | // call to encodeInternal. | |
735 | ||
736 | if (p == len-1) { | |
737 | tail[tailLen++] = input[p]; | |
738 | } else if (p == len-2) { | |
739 | tail[tailLen++] = input[p]; | |
740 | tail[tailLen++] = input[p+1]; | |
741 | } | |
742 | } | |
743 | ||
744 | this.op = op; | |
745 | this.count = count; | |
746 | ||
747 | return true; | |
748 | } | |
749 | } | |
750 | ||
751 | private Base64() { } // don't instantiate | |
752 | } |