Commit | Line | Data |
---|---|---|
f28a134e NR |
1 | /* |
2 | * Copyright (C) 2010 The Android Open Source Project | |
3 | * | |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
a6a73de3 NR |
17 | /* |
18 | * Changes (@author niki): | |
19 | * - default charset -> UTF-8 | |
20 | */ | |
21 | ||
f28a134e NR |
22 | package be.nikiroo.utils.streams; |
23 | ||
24 | import java.io.UnsupportedEncodingException; | |
25 | ||
26 | /** | |
27 | * Utilities for encoding and decoding the Base64 representation of | |
28 | * binary data. See RFCs <a | |
29 | * href="http://www.ietf.org/rfc/rfc2045.txt">2045</a> and <a | |
30 | * href="http://www.ietf.org/rfc/rfc3548.txt">3548</a>. | |
31 | */ | |
32 | class Base64 { | |
33 | /** | |
34 | * Default values for encoder/decoder flags. | |
35 | */ | |
36 | public static final int DEFAULT = 0; | |
37 | ||
38 | /** | |
39 | * Encoder flag bit to omit the padding '=' characters at the end | |
40 | * of the output (if any). | |
41 | */ | |
42 | public static final int NO_PADDING = 1; | |
43 | ||
44 | /** | |
45 | * Encoder flag bit to omit all line terminators (i.e., the output | |
46 | * will be on one long line). | |
47 | */ | |
48 | public static final int NO_WRAP = 2; | |
49 | ||
50 | /** | |
51 | * Encoder flag bit to indicate lines should be terminated with a | |
52 | * CRLF pair instead of just an LF. Has no effect if {@code | |
53 | * NO_WRAP} is specified as well. | |
54 | */ | |
55 | public static final int CRLF = 4; | |
56 | ||
57 | /** | |
58 | * Encoder/decoder flag bit to indicate using the "URL and | |
59 | * filename safe" variant of Base64 (see RFC 3548 section 4) where | |
60 | * {@code -} and {@code _} are used in place of {@code +} and | |
61 | * {@code /}. | |
62 | */ | |
63 | public static final int URL_SAFE = 8; | |
64 | ||
65 | /** | |
66 | * Flag to pass to {@link Base64OutputStream} to indicate that it | |
67 | * should not close the output stream it is wrapping when it | |
68 | * itself is closed. | |
69 | */ | |
70 | public static final int NO_CLOSE = 16; | |
71 | ||
72 | // -------------------------------------------------------- | |
73 | // shared code | |
74 | // -------------------------------------------------------- | |
75 | ||
76 | /* package */ static abstract class Coder { | |
77 | public byte[] output; | |
78 | public int op; | |
79 | ||
80 | /** | |
81 | * Encode/decode another block of input data. this.output is | |
82 | * provided by the caller, and must be big enough to hold all | |
83 | * the coded data. On exit, this.opwill be set to the length | |
84 | * of the coded data. | |
85 | * | |
86 | * @param finish true if this is the final call to process for | |
87 | * this object. Will finalize the coder state and | |
88 | * include any final bytes in the output. | |
89 | * | |
90 | * @return true if the input so far is good; false if some | |
91 | * error has been detected in the input stream.. | |
92 | */ | |
93 | public abstract boolean process(byte[] input, int offset, int len, boolean finish); | |
94 | ||
95 | /** | |
96 | * @return the maximum number of bytes a call to process() | |
97 | * could produce for the given number of input bytes. This may | |
98 | * be an overestimate. | |
99 | */ | |
100 | public abstract int maxOutputSize(int len); | |
101 | } | |
102 | ||
103 | // -------------------------------------------------------- | |
104 | // decoding | |
105 | // -------------------------------------------------------- | |
106 | ||
107 | /** | |
108 | * Decode the Base64-encoded data in input and return the data in | |
109 | * a new byte array. | |
110 | * | |
111 | * <p>The padding '=' characters at the end are considered optional, but | |
112 | * if any are present, there must be the correct number of them. | |
113 | * | |
114 | * @param str the input String to decode, which is converted to | |
115 | * bytes using the default charset | |
116 | * @param flags controls certain features of the decoded output. | |
117 | * Pass {@code DEFAULT} to decode standard Base64. | |
118 | * | |
119 | * @throws IllegalArgumentException if the input contains | |
120 | * incorrect padding | |
121 | */ | |
122 | public static byte[] decode(String str, int flags) { | |
a6a73de3 NR |
123 | try{ |
124 | return decode(str.getBytes("UTF-8"), flags); | |
125 | } catch (UnsupportedEncodingException e) { | |
126 | // All conforming JVM are expected to support UTF-8 | |
127 | return null; | |
128 | } | |
f28a134e NR |
129 | } |
130 | ||
131 | /** | |
132 | * Decode the Base64-encoded data in input and return the data in | |
133 | * a new byte array. | |
134 | * | |
135 | * <p>The padding '=' characters at the end are considered optional, but | |
136 | * if any are present, there must be the correct number of them. | |
137 | * | |
138 | * @param input the input array to decode | |
139 | * @param flags controls certain features of the decoded output. | |
140 | * Pass {@code DEFAULT} to decode standard Base64. | |
141 | * | |
142 | * @throws IllegalArgumentException if the input contains | |
143 | * incorrect padding | |
144 | */ | |
145 | public static byte[] decode(byte[] input, int flags) { | |
146 | return decode(input, 0, input.length, flags); | |
147 | } | |
148 | ||
149 | /** | |
150 | * Decode the Base64-encoded data in input and return the data in | |
151 | * a new byte array. | |
152 | * | |
153 | * <p>The padding '=' characters at the end are considered optional, but | |
154 | * if any are present, there must be the correct number of them. | |
155 | * | |
156 | * @param input the data to decode | |
157 | * @param offset the position within the input array at which to start | |
158 | * @param len the number of bytes of input to decode | |
159 | * @param flags controls certain features of the decoded output. | |
160 | * Pass {@code DEFAULT} to decode standard Base64. | |
161 | * | |
162 | * @throws IllegalArgumentException if the input contains | |
163 | * incorrect padding | |
164 | */ | |
165 | public static byte[] decode(byte[] input, int offset, int len, int flags) { | |
166 | // Allocate space for the most data the input could represent. | |
167 | // (It could contain less if it contains whitespace, etc.) | |
168 | Decoder decoder = new Decoder(flags, new byte[len*3/4]); | |
169 | ||
170 | if (!decoder.process(input, offset, len, true)) { | |
171 | throw new IllegalArgumentException("bad base-64"); | |
172 | } | |
173 | ||
174 | // Maybe we got lucky and allocated exactly enough output space. | |
175 | if (decoder.op == decoder.output.length) { | |
176 | return decoder.output; | |
177 | } | |
178 | ||
179 | // Need to shorten the array, so allocate a new one of the | |
180 | // right size and copy. | |
181 | byte[] temp = new byte[decoder.op]; | |
182 | System.arraycopy(decoder.output, 0, temp, 0, decoder.op); | |
183 | return temp; | |
184 | } | |
185 | ||
186 | /* package */ static class Decoder extends Coder { | |
187 | /** | |
188 | * Lookup table for turning bytes into their position in the | |
189 | * Base64 alphabet. | |
190 | */ | |
191 | private static final int DECODE[] = { | |
192 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
193 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
194 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, | |
195 | 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -2, -1, -1, | |
196 | -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, | |
197 | 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, | |
198 | -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, | |
199 | 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, | |
200 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
201 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
202 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
203 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
204 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
205 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
206 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
207 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
208 | }; | |
209 | ||
210 | /** | |
211 | * Decode lookup table for the "web safe" variant (RFC 3548 | |
212 | * sec. 4) where - and _ replace + and /. | |
213 | */ | |
214 | private static final int DECODE_WEBSAFE[] = { | |
215 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
216 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
217 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, | |
218 | 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -2, -1, -1, | |
219 | -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, | |
220 | 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63, | |
221 | -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, | |
222 | 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, | |
223 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
224 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
225 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
226 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
227 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
228 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
229 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
230 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
231 | }; | |
232 | ||
233 | /** Non-data values in the DECODE arrays. */ | |
234 | private static final int SKIP = -1; | |
235 | private static final int EQUALS = -2; | |
236 | ||
237 | /** | |
238 | * States 0-3 are reading through the next input tuple. | |
239 | * State 4 is having read one '=' and expecting exactly | |
240 | * one more. | |
241 | * State 5 is expecting no more data or padding characters | |
242 | * in the input. | |
243 | * State 6 is the error state; an error has been detected | |
244 | * in the input and no future input can "fix" it. | |
245 | */ | |
246 | private int state; // state number (0 to 6) | |
247 | private int value; | |
248 | ||
249 | final private int[] alphabet; | |
250 | ||
251 | public Decoder(int flags, byte[] output) { | |
252 | this.output = output; | |
253 | ||
254 | alphabet = ((flags & URL_SAFE) == 0) ? DECODE : DECODE_WEBSAFE; | |
255 | state = 0; | |
256 | value = 0; | |
257 | } | |
258 | ||
259 | /** | |
260 | * @return an overestimate for the number of bytes {@code | |
261 | * len} bytes could decode to. | |
262 | */ | |
7194ac50 NR |
263 | @Override |
264 | public int maxOutputSize(int len) { | |
f28a134e NR |
265 | return len * 3/4 + 10; |
266 | } | |
267 | ||
268 | /** | |
269 | * Decode another block of input data. | |
270 | * | |
271 | * @return true if the state machine is still healthy. false if | |
272 | * bad base-64 data has been detected in the input stream. | |
273 | */ | |
7194ac50 NR |
274 | @Override |
275 | public boolean process(byte[] input, int offset, int len, boolean finish) { | |
f28a134e NR |
276 | if (this.state == 6) return false; |
277 | ||
278 | int p = offset; | |
279 | len += offset; | |
280 | ||
281 | // Using local variables makes the decoder about 12% | |
282 | // faster than if we manipulate the member variables in | |
283 | // the loop. (Even alphabet makes a measurable | |
284 | // difference, which is somewhat surprising to me since | |
285 | // the member variable is final.) | |
286 | int state = this.state; | |
287 | int value = this.value; | |
288 | int op = 0; | |
289 | final byte[] output = this.output; | |
290 | final int[] alphabet = this.alphabet; | |
291 | ||
292 | while (p < len) { | |
293 | // Try the fast path: we're starting a new tuple and the | |
294 | // next four bytes of the input stream are all data | |
295 | // bytes. This corresponds to going through states | |
296 | // 0-1-2-3-0. We expect to use this method for most of | |
297 | // the data. | |
298 | // | |
299 | // If any of the next four bytes of input are non-data | |
300 | // (whitespace, etc.), value will end up negative. (All | |
301 | // the non-data values in decode are small negative | |
302 | // numbers, so shifting any of them up and or'ing them | |
303 | // together will result in a value with its top bit set.) | |
304 | // | |
305 | // You can remove this whole block and the output should | |
306 | // be the same, just slower. | |
307 | if (state == 0) { | |
308 | while (p+4 <= len && | |
309 | (value = ((alphabet[input[p] & 0xff] << 18) | | |
310 | (alphabet[input[p+1] & 0xff] << 12) | | |
311 | (alphabet[input[p+2] & 0xff] << 6) | | |
312 | (alphabet[input[p+3] & 0xff]))) >= 0) { | |
313 | output[op+2] = (byte) value; | |
314 | output[op+1] = (byte) (value >> 8); | |
315 | output[op] = (byte) (value >> 16); | |
316 | op += 3; | |
317 | p += 4; | |
318 | } | |
319 | if (p >= len) break; | |
320 | } | |
321 | ||
322 | // The fast path isn't available -- either we've read a | |
323 | // partial tuple, or the next four input bytes aren't all | |
324 | // data, or whatever. Fall back to the slower state | |
325 | // machine implementation. | |
326 | ||
327 | int d = alphabet[input[p++] & 0xff]; | |
328 | ||
329 | switch (state) { | |
330 | case 0: | |
331 | if (d >= 0) { | |
332 | value = d; | |
333 | ++state; | |
334 | } else if (d != SKIP) { | |
335 | this.state = 6; | |
336 | return false; | |
337 | } | |
338 | break; | |
339 | ||
340 | case 1: | |
341 | if (d >= 0) { | |
342 | value = (value << 6) | d; | |
343 | ++state; | |
344 | } else if (d != SKIP) { | |
345 | this.state = 6; | |
346 | return false; | |
347 | } | |
348 | break; | |
349 | ||
350 | case 2: | |
351 | if (d >= 0) { | |
352 | value = (value << 6) | d; | |
353 | ++state; | |
354 | } else if (d == EQUALS) { | |
355 | // Emit the last (partial) output tuple; | |
356 | // expect exactly one more padding character. | |
357 | output[op++] = (byte) (value >> 4); | |
358 | state = 4; | |
359 | } else if (d != SKIP) { | |
360 | this.state = 6; | |
361 | return false; | |
362 | } | |
363 | break; | |
364 | ||
365 | case 3: | |
366 | if (d >= 0) { | |
367 | // Emit the output triple and return to state 0. | |
368 | value = (value << 6) | d; | |
369 | output[op+2] = (byte) value; | |
370 | output[op+1] = (byte) (value >> 8); | |
371 | output[op] = (byte) (value >> 16); | |
372 | op += 3; | |
373 | state = 0; | |
374 | } else if (d == EQUALS) { | |
375 | // Emit the last (partial) output tuple; | |
376 | // expect no further data or padding characters. | |
377 | output[op+1] = (byte) (value >> 2); | |
378 | output[op] = (byte) (value >> 10); | |
379 | op += 2; | |
380 | state = 5; | |
381 | } else if (d != SKIP) { | |
382 | this.state = 6; | |
383 | return false; | |
384 | } | |
385 | break; | |
386 | ||
387 | case 4: | |
388 | if (d == EQUALS) { | |
389 | ++state; | |
390 | } else if (d != SKIP) { | |
391 | this.state = 6; | |
392 | return false; | |
393 | } | |
394 | break; | |
395 | ||
396 | case 5: | |
397 | if (d != SKIP) { | |
398 | this.state = 6; | |
399 | return false; | |
400 | } | |
401 | break; | |
402 | } | |
403 | } | |
404 | ||
405 | if (!finish) { | |
406 | // We're out of input, but a future call could provide | |
407 | // more. | |
408 | this.state = state; | |
409 | this.value = value; | |
410 | this.op = op; | |
411 | return true; | |
412 | } | |
413 | ||
414 | // Done reading input. Now figure out where we are left in | |
415 | // the state machine and finish up. | |
416 | ||
417 | switch (state) { | |
418 | case 0: | |
419 | // Output length is a multiple of three. Fine. | |
420 | break; | |
421 | case 1: | |
422 | // Read one extra input byte, which isn't enough to | |
423 | // make another output byte. Illegal. | |
424 | this.state = 6; | |
425 | return false; | |
426 | case 2: | |
427 | // Read two extra input bytes, enough to emit 1 more | |
428 | // output byte. Fine. | |
429 | output[op++] = (byte) (value >> 4); | |
430 | break; | |
431 | case 3: | |
432 | // Read three extra input bytes, enough to emit 2 more | |
433 | // output bytes. Fine. | |
434 | output[op++] = (byte) (value >> 10); | |
435 | output[op++] = (byte) (value >> 2); | |
436 | break; | |
437 | case 4: | |
438 | // Read one padding '=' when we expected 2. Illegal. | |
439 | this.state = 6; | |
440 | return false; | |
441 | case 5: | |
442 | // Read all the padding '='s we expected and no more. | |
443 | // Fine. | |
444 | break; | |
445 | } | |
446 | ||
447 | this.state = state; | |
448 | this.op = op; | |
449 | return true; | |
450 | } | |
451 | } | |
452 | ||
453 | // -------------------------------------------------------- | |
454 | // encoding | |
455 | // -------------------------------------------------------- | |
456 | ||
457 | /** | |
458 | * Base64-encode the given data and return a newly allocated | |
459 | * String with the result. | |
460 | * | |
461 | * @param input the data to encode | |
462 | * @param flags controls certain features of the encoded output. | |
463 | * Passing {@code DEFAULT} results in output that | |
464 | * adheres to RFC 2045. | |
465 | */ | |
466 | public static String encodeToString(byte[] input, int flags) { | |
467 | try { | |
468 | return new String(encode(input, flags), "US-ASCII"); | |
469 | } catch (UnsupportedEncodingException e) { | |
470 | // US-ASCII is guaranteed to be available. | |
471 | throw new AssertionError(e); | |
472 | } | |
473 | } | |
474 | ||
475 | /** | |
476 | * Base64-encode the given data and return a newly allocated | |
477 | * String with the result. | |
478 | * | |
479 | * @param input the data to encode | |
480 | * @param offset the position within the input array at which to | |
481 | * start | |
482 | * @param len the number of bytes of input to encode | |
483 | * @param flags controls certain features of the encoded output. | |
484 | * Passing {@code DEFAULT} results in output that | |
485 | * adheres to RFC 2045. | |
486 | */ | |
487 | public static String encodeToString(byte[] input, int offset, int len, int flags) { | |
488 | try { | |
489 | return new String(encode(input, offset, len, flags), "US-ASCII"); | |
490 | } catch (UnsupportedEncodingException e) { | |
491 | // US-ASCII is guaranteed to be available. | |
492 | throw new AssertionError(e); | |
493 | } | |
494 | } | |
495 | ||
496 | /** | |
497 | * Base64-encode the given data and return a newly allocated | |
498 | * byte[] with the result. | |
499 | * | |
500 | * @param input the data to encode | |
501 | * @param flags controls certain features of the encoded output. | |
502 | * Passing {@code DEFAULT} results in output that | |
503 | * adheres to RFC 2045. | |
504 | */ | |
505 | public static byte[] encode(byte[] input, int flags) { | |
506 | return encode(input, 0, input.length, flags); | |
507 | } | |
508 | ||
509 | /** | |
510 | * Base64-encode the given data and return a newly allocated | |
511 | * byte[] with the result. | |
512 | * | |
513 | * @param input the data to encode | |
514 | * @param offset the position within the input array at which to | |
515 | * start | |
516 | * @param len the number of bytes of input to encode | |
517 | * @param flags controls certain features of the encoded output. | |
518 | * Passing {@code DEFAULT} results in output that | |
519 | * adheres to RFC 2045. | |
520 | */ | |
521 | public static byte[] encode(byte[] input, int offset, int len, int flags) { | |
522 | Encoder encoder = new Encoder(flags, null); | |
523 | ||
524 | // Compute the exact length of the array we will produce. | |
525 | int output_len = len / 3 * 4; | |
526 | ||
527 | // Account for the tail of the data and the padding bytes, if any. | |
528 | if (encoder.do_padding) { | |
529 | if (len % 3 > 0) { | |
530 | output_len += 4; | |
531 | } | |
532 | } else { | |
533 | switch (len % 3) { | |
534 | case 0: break; | |
535 | case 1: output_len += 2; break; | |
536 | case 2: output_len += 3; break; | |
537 | } | |
538 | } | |
539 | ||
540 | // Account for the newlines, if any. | |
541 | if (encoder.do_newline && len > 0) { | |
542 | output_len += (((len-1) / (3 * Encoder.LINE_GROUPS)) + 1) * | |
543 | (encoder.do_cr ? 2 : 1); | |
544 | } | |
545 | ||
546 | encoder.output = new byte[output_len]; | |
547 | encoder.process(input, offset, len, true); | |
548 | ||
549 | assert encoder.op == output_len; | |
550 | ||
551 | return encoder.output; | |
552 | } | |
553 | ||
554 | /* package */ static class Encoder extends Coder { | |
555 | /** | |
556 | * Emit a new line every this many output tuples. Corresponds to | |
557 | * a 76-character line length (the maximum allowable according to | |
558 | * <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>). | |
559 | */ | |
560 | public static final int LINE_GROUPS = 19; | |
561 | ||
562 | /** | |
563 | * Lookup table for turning Base64 alphabet positions (6 bits) | |
564 | * into output bytes. | |
565 | */ | |
566 | private static final byte ENCODE[] = { | |
567 | 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', | |
568 | 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', | |
569 | 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', | |
570 | 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', | |
571 | }; | |
572 | ||
573 | /** | |
574 | * Lookup table for turning Base64 alphabet positions (6 bits) | |
575 | * into output bytes. | |
576 | */ | |
577 | private static final byte ENCODE_WEBSAFE[] = { | |
578 | 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', | |
579 | 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', | |
580 | 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', | |
581 | 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_', | |
582 | }; | |
583 | ||
584 | final private byte[] tail; | |
585 | /* package */ int tailLen; | |
586 | private int count; | |
587 | ||
588 | final public boolean do_padding; | |
589 | final public boolean do_newline; | |
590 | final public boolean do_cr; | |
591 | final private byte[] alphabet; | |
592 | ||
593 | public Encoder(int flags, byte[] output) { | |
594 | this.output = output; | |
595 | ||
596 | do_padding = (flags & NO_PADDING) == 0; | |
597 | do_newline = (flags & NO_WRAP) == 0; | |
598 | do_cr = (flags & CRLF) != 0; | |
599 | alphabet = ((flags & URL_SAFE) == 0) ? ENCODE : ENCODE_WEBSAFE; | |
600 | ||
601 | tail = new byte[2]; | |
602 | tailLen = 0; | |
603 | ||
604 | count = do_newline ? LINE_GROUPS : -1; | |
605 | } | |
606 | ||
607 | /** | |
608 | * @return an overestimate for the number of bytes {@code | |
609 | * len} bytes could encode to. | |
610 | */ | |
7194ac50 NR |
611 | @Override |
612 | public int maxOutputSize(int len) { | |
f28a134e NR |
613 | return len * 8/5 + 10; |
614 | } | |
615 | ||
7194ac50 NR |
616 | @Override |
617 | public boolean process(byte[] input, int offset, int len, boolean finish) { | |
f28a134e NR |
618 | // Using local variables makes the encoder about 9% faster. |
619 | final byte[] alphabet = this.alphabet; | |
620 | final byte[] output = this.output; | |
621 | int op = 0; | |
622 | int count = this.count; | |
623 | ||
624 | int p = offset; | |
625 | len += offset; | |
626 | int v = -1; | |
627 | ||
628 | // First we need to concatenate the tail of the previous call | |
629 | // with any input bytes available now and see if we can empty | |
630 | // the tail. | |
631 | ||
632 | switch (tailLen) { | |
633 | case 0: | |
634 | // There was no tail. | |
635 | break; | |
636 | ||
637 | case 1: | |
638 | if (p+2 <= len) { | |
639 | // A 1-byte tail with at least 2 bytes of | |
640 | // input available now. | |
641 | v = ((tail[0] & 0xff) << 16) | | |
642 | ((input[p++] & 0xff) << 8) | | |
643 | (input[p++] & 0xff); | |
644 | tailLen = 0; | |
7194ac50 | 645 | } |
f28a134e NR |
646 | break; |
647 | ||
648 | case 2: | |
649 | if (p+1 <= len) { | |
650 | // A 2-byte tail with at least 1 byte of input. | |
651 | v = ((tail[0] & 0xff) << 16) | | |
652 | ((tail[1] & 0xff) << 8) | | |
653 | (input[p++] & 0xff); | |
654 | tailLen = 0; | |
655 | } | |
656 | break; | |
657 | } | |
658 | ||
659 | if (v != -1) { | |
660 | output[op++] = alphabet[(v >> 18) & 0x3f]; | |
661 | output[op++] = alphabet[(v >> 12) & 0x3f]; | |
662 | output[op++] = alphabet[(v >> 6) & 0x3f]; | |
663 | output[op++] = alphabet[v & 0x3f]; | |
664 | if (--count == 0) { | |
665 | if (do_cr) output[op++] = '\r'; | |
666 | output[op++] = '\n'; | |
667 | count = LINE_GROUPS; | |
668 | } | |
669 | } | |
670 | ||
671 | // At this point either there is no tail, or there are fewer | |
672 | // than 3 bytes of input available. | |
673 | ||
674 | // The main loop, turning 3 input bytes into 4 output bytes on | |
675 | // each iteration. | |
676 | while (p+3 <= len) { | |
677 | v = ((input[p] & 0xff) << 16) | | |
678 | ((input[p+1] & 0xff) << 8) | | |
679 | (input[p+2] & 0xff); | |
680 | output[op] = alphabet[(v >> 18) & 0x3f]; | |
681 | output[op+1] = alphabet[(v >> 12) & 0x3f]; | |
682 | output[op+2] = alphabet[(v >> 6) & 0x3f]; | |
683 | output[op+3] = alphabet[v & 0x3f]; | |
684 | p += 3; | |
685 | op += 4; | |
686 | if (--count == 0) { | |
687 | if (do_cr) output[op++] = '\r'; | |
688 | output[op++] = '\n'; | |
689 | count = LINE_GROUPS; | |
690 | } | |
691 | } | |
692 | ||
693 | if (finish) { | |
694 | // Finish up the tail of the input. Note that we need to | |
695 | // consume any bytes in tail before any bytes | |
696 | // remaining in input; there should be at most two bytes | |
697 | // total. | |
698 | ||
699 | if (p-tailLen == len-1) { | |
700 | int t = 0; | |
701 | v = ((tailLen > 0 ? tail[t++] : input[p++]) & 0xff) << 4; | |
702 | tailLen -= t; | |
703 | output[op++] = alphabet[(v >> 6) & 0x3f]; | |
704 | output[op++] = alphabet[v & 0x3f]; | |
705 | if (do_padding) { | |
706 | output[op++] = '='; | |
707 | output[op++] = '='; | |
708 | } | |
709 | if (do_newline) { | |
710 | if (do_cr) output[op++] = '\r'; | |
711 | output[op++] = '\n'; | |
712 | } | |
713 | } else if (p-tailLen == len-2) { | |
714 | int t = 0; | |
715 | v = (((tailLen > 1 ? tail[t++] : input[p++]) & 0xff) << 10) | | |
716 | (((tailLen > 0 ? tail[t++] : input[p++]) & 0xff) << 2); | |
717 | tailLen -= t; | |
718 | output[op++] = alphabet[(v >> 12) & 0x3f]; | |
719 | output[op++] = alphabet[(v >> 6) & 0x3f]; | |
720 | output[op++] = alphabet[v & 0x3f]; | |
721 | if (do_padding) { | |
722 | output[op++] = '='; | |
723 | } | |
724 | if (do_newline) { | |
725 | if (do_cr) output[op++] = '\r'; | |
726 | output[op++] = '\n'; | |
727 | } | |
728 | } else if (do_newline && op > 0 && count != LINE_GROUPS) { | |
729 | if (do_cr) output[op++] = '\r'; | |
730 | output[op++] = '\n'; | |
731 | } | |
732 | ||
733 | assert tailLen == 0; | |
734 | assert p == len; | |
735 | } else { | |
736 | // Save the leftovers in tail to be consumed on the next | |
737 | // call to encodeInternal. | |
738 | ||
739 | if (p == len-1) { | |
740 | tail[tailLen++] = input[p]; | |
741 | } else if (p == len-2) { | |
742 | tail[tailLen++] = input[p]; | |
743 | tail[tailLen++] = input[p+1]; | |
744 | } | |
745 | } | |
746 | ||
747 | this.op = op; | |
748 | this.count = count; | |
749 | ||
750 | return true; | |
751 | } | |
752 | } | |
753 | ||
754 | private Base64() { } // don't instantiate | |
755 | } |