Commit | Line | Data |
---|---|---|
f28a134e NR |
1 | /* |
2 | * Copyright (C) 2010 The Android Open Source Project | |
3 | * | |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | package be.nikiroo.utils.streams; | |
18 | ||
19 | import java.io.UnsupportedEncodingException; | |
20 | ||
21 | /** | |
22 | * Utilities for encoding and decoding the Base64 representation of | |
23 | * binary data. See RFCs <a | |
24 | * href="http://www.ietf.org/rfc/rfc2045.txt">2045</a> and <a | |
25 | * href="http://www.ietf.org/rfc/rfc3548.txt">3548</a>. | |
26 | */ | |
27 | class Base64 { | |
28 | /** | |
29 | * Default values for encoder/decoder flags. | |
30 | */ | |
31 | public static final int DEFAULT = 0; | |
32 | ||
33 | /** | |
34 | * Encoder flag bit to omit the padding '=' characters at the end | |
35 | * of the output (if any). | |
36 | */ | |
37 | public static final int NO_PADDING = 1; | |
38 | ||
39 | /** | |
40 | * Encoder flag bit to omit all line terminators (i.e., the output | |
41 | * will be on one long line). | |
42 | */ | |
43 | public static final int NO_WRAP = 2; | |
44 | ||
45 | /** | |
46 | * Encoder flag bit to indicate lines should be terminated with a | |
47 | * CRLF pair instead of just an LF. Has no effect if {@code | |
48 | * NO_WRAP} is specified as well. | |
49 | */ | |
50 | public static final int CRLF = 4; | |
51 | ||
52 | /** | |
53 | * Encoder/decoder flag bit to indicate using the "URL and | |
54 | * filename safe" variant of Base64 (see RFC 3548 section 4) where | |
55 | * {@code -} and {@code _} are used in place of {@code +} and | |
56 | * {@code /}. | |
57 | */ | |
58 | public static final int URL_SAFE = 8; | |
59 | ||
60 | /** | |
61 | * Flag to pass to {@link Base64OutputStream} to indicate that it | |
62 | * should not close the output stream it is wrapping when it | |
63 | * itself is closed. | |
64 | */ | |
65 | public static final int NO_CLOSE = 16; | |
66 | ||
67 | // -------------------------------------------------------- | |
68 | // shared code | |
69 | // -------------------------------------------------------- | |
70 | ||
71 | /* package */ static abstract class Coder { | |
72 | public byte[] output; | |
73 | public int op; | |
74 | ||
75 | /** | |
76 | * Encode/decode another block of input data. this.output is | |
77 | * provided by the caller, and must be big enough to hold all | |
78 | * the coded data. On exit, this.opwill be set to the length | |
79 | * of the coded data. | |
80 | * | |
81 | * @param finish true if this is the final call to process for | |
82 | * this object. Will finalize the coder state and | |
83 | * include any final bytes in the output. | |
84 | * | |
85 | * @return true if the input so far is good; false if some | |
86 | * error has been detected in the input stream.. | |
87 | */ | |
88 | public abstract boolean process(byte[] input, int offset, int len, boolean finish); | |
89 | ||
90 | /** | |
91 | * @return the maximum number of bytes a call to process() | |
92 | * could produce for the given number of input bytes. This may | |
93 | * be an overestimate. | |
94 | */ | |
95 | public abstract int maxOutputSize(int len); | |
96 | } | |
97 | ||
98 | // -------------------------------------------------------- | |
99 | // decoding | |
100 | // -------------------------------------------------------- | |
101 | ||
102 | /** | |
103 | * Decode the Base64-encoded data in input and return the data in | |
104 | * a new byte array. | |
105 | * | |
106 | * <p>The padding '=' characters at the end are considered optional, but | |
107 | * if any are present, there must be the correct number of them. | |
108 | * | |
109 | * @param str the input String to decode, which is converted to | |
110 | * bytes using the default charset | |
111 | * @param flags controls certain features of the decoded output. | |
112 | * Pass {@code DEFAULT} to decode standard Base64. | |
113 | * | |
114 | * @throws IllegalArgumentException if the input contains | |
115 | * incorrect padding | |
116 | */ | |
117 | public static byte[] decode(String str, int flags) { | |
118 | return decode(str.getBytes(), flags); | |
119 | } | |
120 | ||
121 | /** | |
122 | * Decode the Base64-encoded data in input and return the data in | |
123 | * a new byte array. | |
124 | * | |
125 | * <p>The padding '=' characters at the end are considered optional, but | |
126 | * if any are present, there must be the correct number of them. | |
127 | * | |
128 | * @param input the input array to decode | |
129 | * @param flags controls certain features of the decoded output. | |
130 | * Pass {@code DEFAULT} to decode standard Base64. | |
131 | * | |
132 | * @throws IllegalArgumentException if the input contains | |
133 | * incorrect padding | |
134 | */ | |
135 | public static byte[] decode(byte[] input, int flags) { | |
136 | return decode(input, 0, input.length, flags); | |
137 | } | |
138 | ||
139 | /** | |
140 | * Decode the Base64-encoded data in input and return the data in | |
141 | * a new byte array. | |
142 | * | |
143 | * <p>The padding '=' characters at the end are considered optional, but | |
144 | * if any are present, there must be the correct number of them. | |
145 | * | |
146 | * @param input the data to decode | |
147 | * @param offset the position within the input array at which to start | |
148 | * @param len the number of bytes of input to decode | |
149 | * @param flags controls certain features of the decoded output. | |
150 | * Pass {@code DEFAULT} to decode standard Base64. | |
151 | * | |
152 | * @throws IllegalArgumentException if the input contains | |
153 | * incorrect padding | |
154 | */ | |
155 | public static byte[] decode(byte[] input, int offset, int len, int flags) { | |
156 | // Allocate space for the most data the input could represent. | |
157 | // (It could contain less if it contains whitespace, etc.) | |
158 | Decoder decoder = new Decoder(flags, new byte[len*3/4]); | |
159 | ||
160 | if (!decoder.process(input, offset, len, true)) { | |
161 | throw new IllegalArgumentException("bad base-64"); | |
162 | } | |
163 | ||
164 | // Maybe we got lucky and allocated exactly enough output space. | |
165 | if (decoder.op == decoder.output.length) { | |
166 | return decoder.output; | |
167 | } | |
168 | ||
169 | // Need to shorten the array, so allocate a new one of the | |
170 | // right size and copy. | |
171 | byte[] temp = new byte[decoder.op]; | |
172 | System.arraycopy(decoder.output, 0, temp, 0, decoder.op); | |
173 | return temp; | |
174 | } | |
175 | ||
176 | /* package */ static class Decoder extends Coder { | |
177 | /** | |
178 | * Lookup table for turning bytes into their position in the | |
179 | * Base64 alphabet. | |
180 | */ | |
181 | private static final int DECODE[] = { | |
182 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
183 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
184 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, | |
185 | 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -2, -1, -1, | |
186 | -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, | |
187 | 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, | |
188 | -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, | |
189 | 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, | |
190 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
191 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
192 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
193 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
194 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
195 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
196 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
197 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
198 | }; | |
199 | ||
200 | /** | |
201 | * Decode lookup table for the "web safe" variant (RFC 3548 | |
202 | * sec. 4) where - and _ replace + and /. | |
203 | */ | |
204 | private static final int DECODE_WEBSAFE[] = { | |
205 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
206 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
207 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, | |
208 | 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -2, -1, -1, | |
209 | -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, | |
210 | 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63, | |
211 | -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, | |
212 | 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, | |
213 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
214 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
215 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
216 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
217 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
218 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
219 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
220 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
221 | }; | |
222 | ||
223 | /** Non-data values in the DECODE arrays. */ | |
224 | private static final int SKIP = -1; | |
225 | private static final int EQUALS = -2; | |
226 | ||
227 | /** | |
228 | * States 0-3 are reading through the next input tuple. | |
229 | * State 4 is having read one '=' and expecting exactly | |
230 | * one more. | |
231 | * State 5 is expecting no more data or padding characters | |
232 | * in the input. | |
233 | * State 6 is the error state; an error has been detected | |
234 | * in the input and no future input can "fix" it. | |
235 | */ | |
236 | private int state; // state number (0 to 6) | |
237 | private int value; | |
238 | ||
239 | final private int[] alphabet; | |
240 | ||
241 | public Decoder(int flags, byte[] output) { | |
242 | this.output = output; | |
243 | ||
244 | alphabet = ((flags & URL_SAFE) == 0) ? DECODE : DECODE_WEBSAFE; | |
245 | state = 0; | |
246 | value = 0; | |
247 | } | |
248 | ||
249 | /** | |
250 | * @return an overestimate for the number of bytes {@code | |
251 | * len} bytes could decode to. | |
252 | */ | |
253 | public int maxOutputSize(int len) { | |
254 | return len * 3/4 + 10; | |
255 | } | |
256 | ||
257 | /** | |
258 | * Decode another block of input data. | |
259 | * | |
260 | * @return true if the state machine is still healthy. false if | |
261 | * bad base-64 data has been detected in the input stream. | |
262 | */ | |
263 | public boolean process(byte[] input, int offset, int len, boolean finish) { | |
264 | if (this.state == 6) return false; | |
265 | ||
266 | int p = offset; | |
267 | len += offset; | |
268 | ||
269 | // Using local variables makes the decoder about 12% | |
270 | // faster than if we manipulate the member variables in | |
271 | // the loop. (Even alphabet makes a measurable | |
272 | // difference, which is somewhat surprising to me since | |
273 | // the member variable is final.) | |
274 | int state = this.state; | |
275 | int value = this.value; | |
276 | int op = 0; | |
277 | final byte[] output = this.output; | |
278 | final int[] alphabet = this.alphabet; | |
279 | ||
280 | while (p < len) { | |
281 | // Try the fast path: we're starting a new tuple and the | |
282 | // next four bytes of the input stream are all data | |
283 | // bytes. This corresponds to going through states | |
284 | // 0-1-2-3-0. We expect to use this method for most of | |
285 | // the data. | |
286 | // | |
287 | // If any of the next four bytes of input are non-data | |
288 | // (whitespace, etc.), value will end up negative. (All | |
289 | // the non-data values in decode are small negative | |
290 | // numbers, so shifting any of them up and or'ing them | |
291 | // together will result in a value with its top bit set.) | |
292 | // | |
293 | // You can remove this whole block and the output should | |
294 | // be the same, just slower. | |
295 | if (state == 0) { | |
296 | while (p+4 <= len && | |
297 | (value = ((alphabet[input[p] & 0xff] << 18) | | |
298 | (alphabet[input[p+1] & 0xff] << 12) | | |
299 | (alphabet[input[p+2] & 0xff] << 6) | | |
300 | (alphabet[input[p+3] & 0xff]))) >= 0) { | |
301 | output[op+2] = (byte) value; | |
302 | output[op+1] = (byte) (value >> 8); | |
303 | output[op] = (byte) (value >> 16); | |
304 | op += 3; | |
305 | p += 4; | |
306 | } | |
307 | if (p >= len) break; | |
308 | } | |
309 | ||
310 | // The fast path isn't available -- either we've read a | |
311 | // partial tuple, or the next four input bytes aren't all | |
312 | // data, or whatever. Fall back to the slower state | |
313 | // machine implementation. | |
314 | ||
315 | int d = alphabet[input[p++] & 0xff]; | |
316 | ||
317 | switch (state) { | |
318 | case 0: | |
319 | if (d >= 0) { | |
320 | value = d; | |
321 | ++state; | |
322 | } else if (d != SKIP) { | |
323 | this.state = 6; | |
324 | return false; | |
325 | } | |
326 | break; | |
327 | ||
328 | case 1: | |
329 | if (d >= 0) { | |
330 | value = (value << 6) | d; | |
331 | ++state; | |
332 | } else if (d != SKIP) { | |
333 | this.state = 6; | |
334 | return false; | |
335 | } | |
336 | break; | |
337 | ||
338 | case 2: | |
339 | if (d >= 0) { | |
340 | value = (value << 6) | d; | |
341 | ++state; | |
342 | } else if (d == EQUALS) { | |
343 | // Emit the last (partial) output tuple; | |
344 | // expect exactly one more padding character. | |
345 | output[op++] = (byte) (value >> 4); | |
346 | state = 4; | |
347 | } else if (d != SKIP) { | |
348 | this.state = 6; | |
349 | return false; | |
350 | } | |
351 | break; | |
352 | ||
353 | case 3: | |
354 | if (d >= 0) { | |
355 | // Emit the output triple and return to state 0. | |
356 | value = (value << 6) | d; | |
357 | output[op+2] = (byte) value; | |
358 | output[op+1] = (byte) (value >> 8); | |
359 | output[op] = (byte) (value >> 16); | |
360 | op += 3; | |
361 | state = 0; | |
362 | } else if (d == EQUALS) { | |
363 | // Emit the last (partial) output tuple; | |
364 | // expect no further data or padding characters. | |
365 | output[op+1] = (byte) (value >> 2); | |
366 | output[op] = (byte) (value >> 10); | |
367 | op += 2; | |
368 | state = 5; | |
369 | } else if (d != SKIP) { | |
370 | this.state = 6; | |
371 | return false; | |
372 | } | |
373 | break; | |
374 | ||
375 | case 4: | |
376 | if (d == EQUALS) { | |
377 | ++state; | |
378 | } else if (d != SKIP) { | |
379 | this.state = 6; | |
380 | return false; | |
381 | } | |
382 | break; | |
383 | ||
384 | case 5: | |
385 | if (d != SKIP) { | |
386 | this.state = 6; | |
387 | return false; | |
388 | } | |
389 | break; | |
390 | } | |
391 | } | |
392 | ||
393 | if (!finish) { | |
394 | // We're out of input, but a future call could provide | |
395 | // more. | |
396 | this.state = state; | |
397 | this.value = value; | |
398 | this.op = op; | |
399 | return true; | |
400 | } | |
401 | ||
402 | // Done reading input. Now figure out where we are left in | |
403 | // the state machine and finish up. | |
404 | ||
405 | switch (state) { | |
406 | case 0: | |
407 | // Output length is a multiple of three. Fine. | |
408 | break; | |
409 | case 1: | |
410 | // Read one extra input byte, which isn't enough to | |
411 | // make another output byte. Illegal. | |
412 | this.state = 6; | |
413 | return false; | |
414 | case 2: | |
415 | // Read two extra input bytes, enough to emit 1 more | |
416 | // output byte. Fine. | |
417 | output[op++] = (byte) (value >> 4); | |
418 | break; | |
419 | case 3: | |
420 | // Read three extra input bytes, enough to emit 2 more | |
421 | // output bytes. Fine. | |
422 | output[op++] = (byte) (value >> 10); | |
423 | output[op++] = (byte) (value >> 2); | |
424 | break; | |
425 | case 4: | |
426 | // Read one padding '=' when we expected 2. Illegal. | |
427 | this.state = 6; | |
428 | return false; | |
429 | case 5: | |
430 | // Read all the padding '='s we expected and no more. | |
431 | // Fine. | |
432 | break; | |
433 | } | |
434 | ||
435 | this.state = state; | |
436 | this.op = op; | |
437 | return true; | |
438 | } | |
439 | } | |
440 | ||
441 | // -------------------------------------------------------- | |
442 | // encoding | |
443 | // -------------------------------------------------------- | |
444 | ||
445 | /** | |
446 | * Base64-encode the given data and return a newly allocated | |
447 | * String with the result. | |
448 | * | |
449 | * @param input the data to encode | |
450 | * @param flags controls certain features of the encoded output. | |
451 | * Passing {@code DEFAULT} results in output that | |
452 | * adheres to RFC 2045. | |
453 | */ | |
454 | public static String encodeToString(byte[] input, int flags) { | |
455 | try { | |
456 | return new String(encode(input, flags), "US-ASCII"); | |
457 | } catch (UnsupportedEncodingException e) { | |
458 | // US-ASCII is guaranteed to be available. | |
459 | throw new AssertionError(e); | |
460 | } | |
461 | } | |
462 | ||
463 | /** | |
464 | * Base64-encode the given data and return a newly allocated | |
465 | * String with the result. | |
466 | * | |
467 | * @param input the data to encode | |
468 | * @param offset the position within the input array at which to | |
469 | * start | |
470 | * @param len the number of bytes of input to encode | |
471 | * @param flags controls certain features of the encoded output. | |
472 | * Passing {@code DEFAULT} results in output that | |
473 | * adheres to RFC 2045. | |
474 | */ | |
475 | public static String encodeToString(byte[] input, int offset, int len, int flags) { | |
476 | try { | |
477 | return new String(encode(input, offset, len, flags), "US-ASCII"); | |
478 | } catch (UnsupportedEncodingException e) { | |
479 | // US-ASCII is guaranteed to be available. | |
480 | throw new AssertionError(e); | |
481 | } | |
482 | } | |
483 | ||
484 | /** | |
485 | * Base64-encode the given data and return a newly allocated | |
486 | * byte[] with the result. | |
487 | * | |
488 | * @param input the data to encode | |
489 | * @param flags controls certain features of the encoded output. | |
490 | * Passing {@code DEFAULT} results in output that | |
491 | * adheres to RFC 2045. | |
492 | */ | |
493 | public static byte[] encode(byte[] input, int flags) { | |
494 | return encode(input, 0, input.length, flags); | |
495 | } | |
496 | ||
497 | /** | |
498 | * Base64-encode the given data and return a newly allocated | |
499 | * byte[] with the result. | |
500 | * | |
501 | * @param input the data to encode | |
502 | * @param offset the position within the input array at which to | |
503 | * start | |
504 | * @param len the number of bytes of input to encode | |
505 | * @param flags controls certain features of the encoded output. | |
506 | * Passing {@code DEFAULT} results in output that | |
507 | * adheres to RFC 2045. | |
508 | */ | |
509 | public static byte[] encode(byte[] input, int offset, int len, int flags) { | |
510 | Encoder encoder = new Encoder(flags, null); | |
511 | ||
512 | // Compute the exact length of the array we will produce. | |
513 | int output_len = len / 3 * 4; | |
514 | ||
515 | // Account for the tail of the data and the padding bytes, if any. | |
516 | if (encoder.do_padding) { | |
517 | if (len % 3 > 0) { | |
518 | output_len += 4; | |
519 | } | |
520 | } else { | |
521 | switch (len % 3) { | |
522 | case 0: break; | |
523 | case 1: output_len += 2; break; | |
524 | case 2: output_len += 3; break; | |
525 | } | |
526 | } | |
527 | ||
528 | // Account for the newlines, if any. | |
529 | if (encoder.do_newline && len > 0) { | |
530 | output_len += (((len-1) / (3 * Encoder.LINE_GROUPS)) + 1) * | |
531 | (encoder.do_cr ? 2 : 1); | |
532 | } | |
533 | ||
534 | encoder.output = new byte[output_len]; | |
535 | encoder.process(input, offset, len, true); | |
536 | ||
537 | assert encoder.op == output_len; | |
538 | ||
539 | return encoder.output; | |
540 | } | |
541 | ||
542 | /* package */ static class Encoder extends Coder { | |
543 | /** | |
544 | * Emit a new line every this many output tuples. Corresponds to | |
545 | * a 76-character line length (the maximum allowable according to | |
546 | * <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>). | |
547 | */ | |
548 | public static final int LINE_GROUPS = 19; | |
549 | ||
550 | /** | |
551 | * Lookup table for turning Base64 alphabet positions (6 bits) | |
552 | * into output bytes. | |
553 | */ | |
554 | private static final byte ENCODE[] = { | |
555 | 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', | |
556 | 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', | |
557 | 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', | |
558 | 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', | |
559 | }; | |
560 | ||
561 | /** | |
562 | * Lookup table for turning Base64 alphabet positions (6 bits) | |
563 | * into output bytes. | |
564 | */ | |
565 | private static final byte ENCODE_WEBSAFE[] = { | |
566 | 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', | |
567 | 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', | |
568 | 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', | |
569 | 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_', | |
570 | }; | |
571 | ||
572 | final private byte[] tail; | |
573 | /* package */ int tailLen; | |
574 | private int count; | |
575 | ||
576 | final public boolean do_padding; | |
577 | final public boolean do_newline; | |
578 | final public boolean do_cr; | |
579 | final private byte[] alphabet; | |
580 | ||
581 | public Encoder(int flags, byte[] output) { | |
582 | this.output = output; | |
583 | ||
584 | do_padding = (flags & NO_PADDING) == 0; | |
585 | do_newline = (flags & NO_WRAP) == 0; | |
586 | do_cr = (flags & CRLF) != 0; | |
587 | alphabet = ((flags & URL_SAFE) == 0) ? ENCODE : ENCODE_WEBSAFE; | |
588 | ||
589 | tail = new byte[2]; | |
590 | tailLen = 0; | |
591 | ||
592 | count = do_newline ? LINE_GROUPS : -1; | |
593 | } | |
594 | ||
595 | /** | |
596 | * @return an overestimate for the number of bytes {@code | |
597 | * len} bytes could encode to. | |
598 | */ | |
599 | public int maxOutputSize(int len) { | |
600 | return len * 8/5 + 10; | |
601 | } | |
602 | ||
603 | public boolean process(byte[] input, int offset, int len, boolean finish) { | |
604 | // Using local variables makes the encoder about 9% faster. | |
605 | final byte[] alphabet = this.alphabet; | |
606 | final byte[] output = this.output; | |
607 | int op = 0; | |
608 | int count = this.count; | |
609 | ||
610 | int p = offset; | |
611 | len += offset; | |
612 | int v = -1; | |
613 | ||
614 | // First we need to concatenate the tail of the previous call | |
615 | // with any input bytes available now and see if we can empty | |
616 | // the tail. | |
617 | ||
618 | switch (tailLen) { | |
619 | case 0: | |
620 | // There was no tail. | |
621 | break; | |
622 | ||
623 | case 1: | |
624 | if (p+2 <= len) { | |
625 | // A 1-byte tail with at least 2 bytes of | |
626 | // input available now. | |
627 | v = ((tail[0] & 0xff) << 16) | | |
628 | ((input[p++] & 0xff) << 8) | | |
629 | (input[p++] & 0xff); | |
630 | tailLen = 0; | |
631 | }; | |
632 | break; | |
633 | ||
634 | case 2: | |
635 | if (p+1 <= len) { | |
636 | // A 2-byte tail with at least 1 byte of input. | |
637 | v = ((tail[0] & 0xff) << 16) | | |
638 | ((tail[1] & 0xff) << 8) | | |
639 | (input[p++] & 0xff); | |
640 | tailLen = 0; | |
641 | } | |
642 | break; | |
643 | } | |
644 | ||
645 | if (v != -1) { | |
646 | output[op++] = alphabet[(v >> 18) & 0x3f]; | |
647 | output[op++] = alphabet[(v >> 12) & 0x3f]; | |
648 | output[op++] = alphabet[(v >> 6) & 0x3f]; | |
649 | output[op++] = alphabet[v & 0x3f]; | |
650 | if (--count == 0) { | |
651 | if (do_cr) output[op++] = '\r'; | |
652 | output[op++] = '\n'; | |
653 | count = LINE_GROUPS; | |
654 | } | |
655 | } | |
656 | ||
657 | // At this point either there is no tail, or there are fewer | |
658 | // than 3 bytes of input available. | |
659 | ||
660 | // The main loop, turning 3 input bytes into 4 output bytes on | |
661 | // each iteration. | |
662 | while (p+3 <= len) { | |
663 | v = ((input[p] & 0xff) << 16) | | |
664 | ((input[p+1] & 0xff) << 8) | | |
665 | (input[p+2] & 0xff); | |
666 | output[op] = alphabet[(v >> 18) & 0x3f]; | |
667 | output[op+1] = alphabet[(v >> 12) & 0x3f]; | |
668 | output[op+2] = alphabet[(v >> 6) & 0x3f]; | |
669 | output[op+3] = alphabet[v & 0x3f]; | |
670 | p += 3; | |
671 | op += 4; | |
672 | if (--count == 0) { | |
673 | if (do_cr) output[op++] = '\r'; | |
674 | output[op++] = '\n'; | |
675 | count = LINE_GROUPS; | |
676 | } | |
677 | } | |
678 | ||
679 | if (finish) { | |
680 | // Finish up the tail of the input. Note that we need to | |
681 | // consume any bytes in tail before any bytes | |
682 | // remaining in input; there should be at most two bytes | |
683 | // total. | |
684 | ||
685 | if (p-tailLen == len-1) { | |
686 | int t = 0; | |
687 | v = ((tailLen > 0 ? tail[t++] : input[p++]) & 0xff) << 4; | |
688 | tailLen -= t; | |
689 | output[op++] = alphabet[(v >> 6) & 0x3f]; | |
690 | output[op++] = alphabet[v & 0x3f]; | |
691 | if (do_padding) { | |
692 | output[op++] = '='; | |
693 | output[op++] = '='; | |
694 | } | |
695 | if (do_newline) { | |
696 | if (do_cr) output[op++] = '\r'; | |
697 | output[op++] = '\n'; | |
698 | } | |
699 | } else if (p-tailLen == len-2) { | |
700 | int t = 0; | |
701 | v = (((tailLen > 1 ? tail[t++] : input[p++]) & 0xff) << 10) | | |
702 | (((tailLen > 0 ? tail[t++] : input[p++]) & 0xff) << 2); | |
703 | tailLen -= t; | |
704 | output[op++] = alphabet[(v >> 12) & 0x3f]; | |
705 | output[op++] = alphabet[(v >> 6) & 0x3f]; | |
706 | output[op++] = alphabet[v & 0x3f]; | |
707 | if (do_padding) { | |
708 | output[op++] = '='; | |
709 | } | |
710 | if (do_newline) { | |
711 | if (do_cr) output[op++] = '\r'; | |
712 | output[op++] = '\n'; | |
713 | } | |
714 | } else if (do_newline && op > 0 && count != LINE_GROUPS) { | |
715 | if (do_cr) output[op++] = '\r'; | |
716 | output[op++] = '\n'; | |
717 | } | |
718 | ||
719 | assert tailLen == 0; | |
720 | assert p == len; | |
721 | } else { | |
722 | // Save the leftovers in tail to be consumed on the next | |
723 | // call to encodeInternal. | |
724 | ||
725 | if (p == len-1) { | |
726 | tail[tailLen++] = input[p]; | |
727 | } else if (p == len-2) { | |
728 | tail[tailLen++] = input[p]; | |
729 | tail[tailLen++] = input[p+1]; | |
730 | } | |
731 | } | |
732 | ||
733 | this.op = op; | |
734 | this.count = count; | |
735 | ||
736 | return true; | |
737 | } | |
738 | } | |
739 | ||
740 | private Base64() { } // don't instantiate | |
741 | } |