Commit | Line | Data |
---|---|---|
ec1f3444 NR |
1 | package be.nikiroo.utils; |
2 | ||
a6a73de3 NR |
3 | import java.io.ByteArrayInputStream; |
4 | import java.io.ByteArrayOutputStream; | |
ec1f3444 | 5 | import java.io.IOException; |
a359464f NR |
6 | import java.io.InputStream; |
7 | import java.io.OutputStream; | |
3f8349b7 | 8 | import java.io.UnsupportedEncodingException; |
ec1f3444 NR |
9 | import java.security.MessageDigest; |
10 | import java.security.NoSuchAlgorithmException; | |
11 | import java.text.Normalizer; | |
12 | import java.text.Normalizer.Form; | |
13 | import java.text.ParseException; | |
14 | import java.text.SimpleDateFormat; | |
dc22eb95 NR |
15 | import java.util.AbstractMap; |
16 | import java.util.ArrayList; | |
ec1f3444 | 17 | import java.util.Date; |
cc3e7291 | 18 | import java.util.List; |
dc22eb95 | 19 | import java.util.Map.Entry; |
ec1f3444 | 20 | import java.util.regex.Pattern; |
a6a73de3 NR |
21 | import java.util.zip.GZIPInputStream; |
22 | import java.util.zip.GZIPOutputStream; | |
ec1f3444 | 23 | |
ec1f3444 NR |
24 | import org.unbescape.html.HtmlEscape; |
25 | import org.unbescape.html.HtmlEscapeLevel; | |
26 | import org.unbescape.html.HtmlEscapeType; | |
27 | ||
f28a134e | 28 | import be.nikiroo.utils.streams.Base64InputStream; |
f28a134e | 29 | |
ec1f3444 NR |
30 | /** |
31 | * This class offer some utilities based around {@link String}s. | |
32 | * | |
33 | * @author niki | |
34 | */ | |
35 | public class StringUtils { | |
36 | /** | |
37 | * This enum type will decide the alignment of a {@link String} when padding | |
cc3e7291 NR |
38 | * or justification is applied (if there is enough horizontal space for it |
39 | * to be aligned). | |
ec1f3444 NR |
40 | */ |
41 | public enum Alignment { | |
42 | /** Aligned at left. */ | |
cc3e7291 | 43 | LEFT, |
ec1f3444 | 44 | /** Centered. */ |
cc3e7291 | 45 | CENTER, |
ec1f3444 | 46 | /** Aligned at right. */ |
cc3e7291 NR |
47 | RIGHT, |
48 | /** Full justified (to both left and right). */ | |
49 | JUSTIFY, | |
50 | ||
51 | // Old Deprecated values: | |
52 | ||
53 | /** DEPRECATED: please use LEFT. */ | |
54 | @Deprecated | |
55 | Beginning, | |
56 | /** DEPRECATED: please use CENTER. */ | |
57 | @Deprecated | |
58 | Center, | |
59 | /** DEPRECATED: please use RIGHT. */ | |
60 | @Deprecated | |
61 | End; | |
62 | ||
63 | /** | |
64 | * Return the non-deprecated version of this enum if needed (or return | |
65 | * self if not). | |
66 | * | |
67 | * @return the non-deprecated value | |
68 | */ | |
69 | Alignment undeprecate() { | |
70 | if (this == Beginning) | |
71 | return LEFT; | |
72 | if (this == Center) | |
73 | return CENTER; | |
74 | if (this == End) | |
75 | return RIGHT; | |
76 | return this; | |
77 | } | |
ec1f3444 NR |
78 | } |
79 | ||
e8aa5bf9 | 80 | static private Pattern marks = getMarks(); |
ec1f3444 NR |
81 | |
82 | /** | |
83 | * Fix the size of the given {@link String} either with space-padding or by | |
84 | * shortening it. | |
85 | * | |
86 | * @param text | |
87 | * the {@link String} to fix | |
88 | * @param width | |
89 | * the size of the resulting {@link String} or -1 for a noop | |
90 | * | |
91 | * @return the resulting {@link String} of size <i>size</i> | |
92 | */ | |
93 | static public String padString(String text, int width) { | |
451f434b | 94 | return padString(text, width, true, null); |
ec1f3444 NR |
95 | } |
96 | ||
97 | /** | |
98 | * Fix the size of the given {@link String} either with space-padding or by | |
99 | * optionally shortening it. | |
100 | * | |
101 | * @param text | |
102 | * the {@link String} to fix | |
103 | * @param width | |
104 | * the size of the resulting {@link String} if the text fits or | |
105 | * if cut is TRUE or -1 for a noop | |
106 | * @param cut | |
107 | * cut the {@link String} shorter if needed | |
108 | * @param align | |
109 | * align the {@link String} in this position if we have enough | |
451f434b | 110 | * space (default is Alignment.Beginning) |
ec1f3444 NR |
111 | * |
112 | * @return the resulting {@link String} of size <i>size</i> minimum | |
113 | */ | |
114 | static public String padString(String text, int width, boolean cut, | |
115 | Alignment align) { | |
116 | ||
451f434b | 117 | if (align == null) { |
cc3e7291 | 118 | align = Alignment.LEFT; |
451f434b NR |
119 | } |
120 | ||
cc3e7291 NR |
121 | align = align.undeprecate(); |
122 | ||
ec1f3444 NR |
123 | if (width >= 0) { |
124 | if (text == null) | |
125 | text = ""; | |
126 | ||
127 | int diff = width - text.length(); | |
128 | ||
129 | if (diff < 0) { | |
130 | if (cut) | |
131 | text = text.substring(0, width); | |
132 | } else if (diff > 0) { | |
cc3e7291 NR |
133 | if (diff < 2 && align != Alignment.RIGHT) |
134 | align = Alignment.LEFT; | |
ec1f3444 NR |
135 | |
136 | switch (align) { | |
cc3e7291 | 137 | case RIGHT: |
ec1f3444 NR |
138 | text = new String(new char[diff]).replace('\0', ' ') + text; |
139 | break; | |
cc3e7291 | 140 | case CENTER: |
ec1f3444 NR |
141 | int pad1 = (diff) / 2; |
142 | int pad2 = (diff + 1) / 2; | |
143 | text = new String(new char[pad1]).replace('\0', ' ') + text | |
144 | + new String(new char[pad2]).replace('\0', ' '); | |
145 | break; | |
cc3e7291 NR |
146 | case LEFT: |
147 | default: | |
148 | text = text + new String(new char[diff]).replace('\0', ' '); | |
149 | break; | |
ec1f3444 NR |
150 | } |
151 | } | |
152 | } | |
153 | ||
154 | return text; | |
155 | } | |
156 | ||
cc3e7291 NR |
157 | /** |
158 | * Justify a text into width-sized (at the maximum) lines. | |
159 | * | |
160 | * @param text | |
161 | * the {@link String} to justify | |
162 | * @param width | |
163 | * the maximum size of the resulting lines | |
164 | * | |
165 | * @return a list of justified text lines | |
166 | */ | |
167 | static public List<String> justifyText(String text, int width) { | |
168 | return justifyText(text, width, null); | |
169 | } | |
170 | ||
171 | /** | |
172 | * Justify a text into width-sized (at the maximum) lines. | |
173 | * | |
174 | * @param text | |
175 | * the {@link String} to justify | |
176 | * @param width | |
177 | * the maximum size of the resulting lines | |
178 | * @param align | |
179 | * align the lines in this position (default is | |
180 | * Alignment.Beginning) | |
181 | * | |
182 | * @return a list of justified text lines | |
183 | */ | |
184 | static public List<String> justifyText(String text, int width, | |
185 | Alignment align) { | |
186 | if (align == null) { | |
187 | align = Alignment.LEFT; | |
188 | } | |
189 | ||
190 | align = align.undeprecate(); | |
191 | ||
192 | switch (align) { | |
193 | case CENTER: | |
194 | return StringJustifier.center(text, width); | |
195 | case RIGHT: | |
196 | return StringJustifier.right(text, width); | |
197 | case JUSTIFY: | |
198 | return StringJustifier.full(text, width); | |
199 | case LEFT: | |
200 | default: | |
201 | return StringJustifier.left(text, width); | |
202 | } | |
203 | } | |
204 | ||
dc22eb95 NR |
205 | /** |
206 | * Justify a text into width-sized (at the maximum) lines. | |
207 | * | |
208 | * @param text | |
209 | * the {@link String} to justify | |
210 | * @param width | |
211 | * the maximum size of the resulting lines | |
212 | * | |
213 | * @return a list of justified text lines | |
214 | */ | |
215 | static public List<String> justifyText(List<String> text, int width) { | |
216 | return justifyText(text, width, null); | |
217 | } | |
218 | ||
219 | /** | |
220 | * Justify a text into width-sized (at the maximum) lines. | |
221 | * | |
222 | * @param text | |
223 | * the {@link String} to justify | |
224 | * @param width | |
225 | * the maximum size of the resulting lines | |
226 | * @param align | |
227 | * align the lines in this position (default is | |
228 | * Alignment.Beginning) | |
229 | * | |
230 | * @return a list of justified text lines | |
231 | */ | |
232 | static public List<String> justifyText(List<String> text, int width, | |
233 | Alignment align) { | |
234 | List<String> result = new ArrayList<String>(); | |
235 | ||
236 | // Content <-> Bullet spacing (null = no spacing) | |
237 | List<Entry<String, String>> lines = new ArrayList<Entry<String, String>>(); | |
238 | StringBuilder previous = null; | |
239 | StringBuilder tmp = new StringBuilder(); | |
240 | String previousItemBulletSpacing = null; | |
241 | String itemBulletSpacing = null; | |
242 | for (String inputLine : text) { | |
243 | boolean previousLineComplete = true; | |
244 | ||
245 | String current = inputLine.replace("\t", " "); | |
246 | itemBulletSpacing = getItemSpacing(current); | |
247 | boolean bullet = isItemLine(current); | |
248 | if ((previousItemBulletSpacing == null || itemBulletSpacing | |
249 | .length() <= previousItemBulletSpacing.length()) && !bullet) { | |
250 | itemBulletSpacing = null; | |
251 | } | |
252 | ||
253 | if (itemBulletSpacing != null) { | |
254 | current = current.trim(); | |
255 | if (!current.isEmpty() && bullet) { | |
256 | current = current.substring(1); | |
257 | } | |
258 | current = current.trim(); | |
259 | previousLineComplete = bullet; | |
260 | } else { | |
261 | tmp.setLength(0); | |
262 | for (String word : current.split(" ")) { | |
263 | if (word.isEmpty()) { | |
264 | continue; | |
265 | } | |
266 | ||
267 | if (tmp.length() > 0) { | |
268 | tmp.append(' '); | |
269 | } | |
270 | tmp.append(word.trim()); | |
271 | } | |
272 | current = tmp.toString(); | |
273 | ||
274 | previousLineComplete = current.isEmpty() | |
275 | || previousItemBulletSpacing != null | |
c0c091af NR |
276 | || (previous != null && isFullLine(previous)) |
277 | || isHrLine(current) || isHrLine(previous); | |
dc22eb95 NR |
278 | } |
279 | ||
280 | if (previous == null) { | |
281 | previous = new StringBuilder(); | |
282 | } else { | |
283 | if (previousLineComplete) { | |
284 | lines.add(new AbstractMap.SimpleEntry<String, String>( | |
285 | previous.toString(), previousItemBulletSpacing)); | |
286 | previous.setLength(0); | |
287 | previousItemBulletSpacing = itemBulletSpacing; | |
288 | } else { | |
289 | previous.append(' '); | |
290 | } | |
291 | } | |
292 | ||
293 | previous.append(current); | |
294 | ||
295 | } | |
296 | ||
297 | if (previous != null) { | |
298 | lines.add(new AbstractMap.SimpleEntry<String, String>(previous | |
299 | .toString(), previousItemBulletSpacing)); | |
300 | } | |
301 | ||
302 | for (Entry<String, String> line : lines) { | |
303 | String content = line.getKey(); | |
304 | String spacing = line.getValue(); | |
305 | ||
306 | String bullet = "- "; | |
307 | if (spacing == null) { | |
308 | bullet = ""; | |
309 | spacing = ""; | |
310 | } | |
311 | ||
312 | if (spacing.length() > width + 3) { | |
313 | spacing = ""; | |
314 | } | |
315 | ||
316 | for (String subline : StringUtils.justifyText(content, width | |
317 | - (spacing.length() + bullet.length()), align)) { | |
318 | result.add(spacing + bullet + subline); | |
319 | if (!bullet.isEmpty()) { | |
320 | bullet = " "; | |
321 | } | |
322 | } | |
323 | } | |
324 | ||
325 | return result; | |
326 | } | |
327 | ||
ec1f3444 NR |
328 | /** |
329 | * Sanitise the given input to make it more Terminal-friendly by removing | |
330 | * combining characters. | |
331 | * | |
332 | * @param input | |
333 | * the input to sanitise | |
334 | * @param allowUnicode | |
335 | * allow Unicode or only allow ASCII Latin characters | |
336 | * | |
337 | * @return the sanitised {@link String} | |
338 | */ | |
339 | static public String sanitize(String input, boolean allowUnicode) { | |
340 | return sanitize(input, allowUnicode, !allowUnicode); | |
341 | } | |
342 | ||
343 | /** | |
344 | * Sanitise the given input to make it more Terminal-friendly by removing | |
345 | * combining characters. | |
346 | * | |
347 | * @param input | |
348 | * the input to sanitise | |
349 | * @param allowUnicode | |
350 | * allow Unicode or only allow ASCII Latin characters | |
351 | * @param removeAllAccents | |
352 | * TRUE to replace all accentuated characters by their non | |
353 | * accentuated counter-parts | |
354 | * | |
355 | * @return the sanitised {@link String} | |
356 | */ | |
357 | static public String sanitize(String input, boolean allowUnicode, | |
358 | boolean removeAllAccents) { | |
359 | ||
360 | if (removeAllAccents) { | |
361 | input = Normalizer.normalize(input, Form.NFKD); | |
e8aa5bf9 NR |
362 | if (marks != null) { |
363 | input = marks.matcher(input).replaceAll(""); | |
364 | } | |
ec1f3444 NR |
365 | } |
366 | ||
367 | input = Normalizer.normalize(input, Form.NFKC); | |
368 | ||
369 | if (!allowUnicode) { | |
370 | StringBuilder builder = new StringBuilder(); | |
371 | for (int index = 0; index < input.length(); index++) { | |
372 | char car = input.charAt(index); | |
373 | // displayable chars in ASCII are in the range 32<->255, | |
374 | // except DEL (127) | |
375 | if (car >= 32 && car <= 255 && car != 127) { | |
376 | builder.append(car); | |
377 | } | |
378 | } | |
379 | input = builder.toString(); | |
380 | } | |
381 | ||
382 | return input; | |
383 | } | |
384 | ||
385 | /** | |
451f434b NR |
386 | * Convert between the time in milliseconds to a {@link String} in a "fixed" |
387 | * way (to exchange data over the wire, for instance). | |
388 | * <p> | |
389 | * Precise to the second. | |
ec1f3444 NR |
390 | * |
391 | * @param time | |
451f434b NR |
392 | * the specified number of milliseconds since the standard base |
393 | * time known as "the epoch", namely January 1, 1970, 00:00:00 | |
394 | * GMT | |
ec1f3444 NR |
395 | * |
396 | * @return the time as a {@link String} | |
397 | */ | |
398 | static public String fromTime(long time) { | |
399 | SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); | |
400 | return sdf.format(new Date(time)); | |
401 | } | |
402 | ||
403 | /** | |
451f434b | 404 | * Convert between the time as a {@link String} to milliseconds in a "fixed" |
ec1f3444 | 405 | * way (to exchange data over the wire, for instance). |
451f434b NR |
406 | * <p> |
407 | * Precise to the second. | |
ec1f3444 | 408 | * |
db31c358 | 409 | * @param displayTime |
ec1f3444 NR |
410 | * the time as a {@link String} |
411 | * | |
451f434b | 412 | * @return the number of milliseconds since the standard base time known as |
e8aa5bf9 NR |
413 | * "the epoch", namely January 1, 1970, 00:00:00 GMT, or -1 in case |
414 | * of error | |
415 | * | |
416 | * @throws ParseException | |
417 | * in case of parse error | |
ec1f3444 | 418 | */ |
e8aa5bf9 | 419 | static public long toTime(String displayTime) throws ParseException { |
ec1f3444 | 420 | SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); |
e8aa5bf9 | 421 | return sdf.parse(displayTime).getTime(); |
ec1f3444 NR |
422 | } |
423 | ||
ec1f3444 NR |
424 | /** |
425 | * Return a hash of the given {@link String}. | |
426 | * | |
427 | * @param input | |
428 | * the input data | |
429 | * | |
430 | * @return the hash | |
431 | */ | |
b771aed5 | 432 | static public String getMd5Hash(String input) { |
ec1f3444 NR |
433 | try { |
434 | MessageDigest md = MessageDigest.getInstance("MD5"); | |
f8147a0e | 435 | md.update(getBytes(input)); |
ec1f3444 NR |
436 | byte byteData[] = md.digest(); |
437 | ||
438 | StringBuffer hexString = new StringBuffer(); | |
439 | for (int i = 0; i < byteData.length; i++) { | |
440 | String hex = Integer.toHexString(0xff & byteData[i]); | |
441 | if (hex.length() == 1) | |
442 | hexString.append('0'); | |
443 | hexString.append(hex); | |
444 | } | |
445 | ||
446 | return hexString.toString(); | |
447 | } catch (NoSuchAlgorithmException e) { | |
448 | return input; | |
449 | } | |
450 | } | |
451 | ||
ec1f3444 NR |
452 | /** |
453 | * Remove the HTML content from the given input, and un-html-ize the rest. | |
454 | * | |
455 | * @param html | |
456 | * the HTML-encoded content | |
457 | * | |
458 | * @return the HTML-free equivalent content | |
459 | */ | |
460 | public static String unhtml(String html) { | |
461 | StringBuilder builder = new StringBuilder(); | |
462 | ||
463 | int inTag = 0; | |
464 | for (char car : html.toCharArray()) { | |
465 | if (car == '<') { | |
466 | inTag++; | |
467 | } else if (car == '>') { | |
468 | inTag--; | |
469 | } else if (inTag <= 0) { | |
470 | builder.append(car); | |
471 | } | |
472 | } | |
473 | ||
7ee9568b NR |
474 | char nbsp = ' '; // non-breakable space (a special char) |
475 | char space = ' '; | |
476 | return HtmlEscape.unescapeHtml(builder.toString()).replace(nbsp, space); | |
ec1f3444 NR |
477 | } |
478 | ||
479 | /** | |
480 | * Escape the given {@link String} so it can be used in XML, as content. | |
481 | * | |
482 | * @param input | |
483 | * the input {@link String} | |
484 | * | |
485 | * @return the escaped {@link String} | |
486 | */ | |
487 | public static String xmlEscape(String input) { | |
488 | if (input == null) { | |
489 | return ""; | |
490 | } | |
491 | ||
492 | return HtmlEscape.escapeHtml(input, | |
493 | HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA, | |
494 | HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT); | |
495 | } | |
496 | ||
497 | /** | |
498 | * Escape the given {@link String} so it can be used in XML, as text content | |
499 | * inside double-quotes. | |
500 | * | |
501 | * @param input | |
502 | * the input {@link String} | |
503 | * | |
504 | * @return the escaped {@link String} | |
505 | */ | |
506 | public static String xmlEscapeQuote(String input) { | |
507 | if (input == null) { | |
508 | return ""; | |
509 | } | |
510 | ||
511 | return HtmlEscape.escapeHtml(input, | |
512 | HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA, | |
513 | HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT); | |
514 | } | |
db31c358 | 515 | |
80500544 NR |
516 | /** |
517 | * Zip the data and then encode it into Base64. | |
518 | * | |
519 | * @param data | |
520 | * the data | |
521 | * | |
522 | * @return the Base64 zipped version | |
a6a73de3 NR |
523 | * |
524 | * @throws IOException | |
525 | * in case of I/O error | |
80500544 | 526 | */ |
a6a73de3 | 527 | public static String zip64s(String data) throws IOException { |
db31c358 | 528 | try { |
f8147a0e | 529 | return zip64(getBytes(data)); |
a6a73de3 NR |
530 | } catch (UnsupportedEncodingException e) { |
531 | // All conforming JVM are required to support UTF-8 | |
db31c358 NR |
532 | e.printStackTrace(); |
533 | return null; | |
534 | } | |
535 | } | |
536 | ||
80500544 | 537 | /** |
a6a73de3 | 538 | * Zip the data and then encode it into Base64. |
bb60bd13 | 539 | * |
80500544 | 540 | * @param data |
a6a73de3 | 541 | * the data |
80500544 | 542 | * |
a6a73de3 | 543 | * @return the Base64 zipped version |
80500544 NR |
544 | * |
545 | * @throws IOException | |
546 | * in case of I/O error | |
547 | */ | |
a6a73de3 NR |
548 | public static String zip64(byte[] data) throws IOException { |
549 | // 1. compress | |
550 | ByteArrayOutputStream bout = new ByteArrayOutputStream(); | |
551 | try { | |
552 | OutputStream out = new GZIPOutputStream(bout); | |
553 | try { | |
554 | out.write(data); | |
555 | } finally { | |
556 | out.close(); | |
557 | } | |
558 | } finally { | |
559 | data = bout.toByteArray(); | |
560 | bout.close(); | |
561 | } | |
562 | ||
563 | // 2. base64 | |
564 | InputStream in = new ByteArrayInputStream(data); | |
565 | try { | |
566 | in = new Base64InputStream(in, true); | |
567 | return new String(IOUtils.toByteArray(in), "UTF-8"); | |
568 | } finally { | |
569 | in.close(); | |
570 | } | |
db31c358 | 571 | } |
e8aa5bf9 | 572 | |
a359464f | 573 | /** |
a6a73de3 NR |
574 | * Unconvert from Base64 then unzip the content, which is assumed to be a |
575 | * String. | |
a359464f NR |
576 | * |
577 | * @param data | |
a6a73de3 | 578 | * the data in Base64 format |
a359464f | 579 | * |
a6a73de3 | 580 | * @return the raw data |
a359464f NR |
581 | * |
582 | * @throws IOException | |
a6a73de3 | 583 | * in case of I/O error |
a359464f | 584 | */ |
a6a73de3 NR |
585 | public static String unzip64s(String data) throws IOException { |
586 | return new String(unzip64(data), "UTF-8"); | |
a359464f NR |
587 | } |
588 | ||
bb60bd13 | 589 | /** |
a6a73de3 | 590 | * Unconvert from Base64 then unzip the content. |
bb60bd13 NR |
591 | * |
592 | * @param data | |
a6a73de3 | 593 | * the data in Base64 format |
bb60bd13 | 594 | * |
a6a73de3 | 595 | * @return the raw data |
bb60bd13 NR |
596 | * |
597 | * @throws IOException | |
a6a73de3 | 598 | * in case of I/O error |
bb60bd13 | 599 | */ |
a6a73de3 NR |
600 | public static byte[] unzip64(String data) throws IOException { |
601 | InputStream in = new Base64InputStream(new ByteArrayInputStream( | |
f8147a0e | 602 | getBytes(data)), false); |
a6a73de3 NR |
603 | try { |
604 | in = new GZIPInputStream(in); | |
605 | return IOUtils.toByteArray(in); | |
606 | } finally { | |
607 | in.close(); | |
608 | } | |
bb60bd13 NR |
609 | } |
610 | ||
611 | /** | |
a359464f NR |
612 | * Convert the given data to Base64 format. |
613 | * | |
614 | * @param data | |
615 | * the data to convert | |
a359464f NR |
616 | * |
617 | * @return the Base64 {@link String} representation of the data | |
618 | * | |
619 | * @throws IOException | |
620 | * in case of I/O errors | |
621 | */ | |
a6a73de3 | 622 | public static String base64(String data) throws IOException { |
f8147a0e | 623 | return base64(getBytes(data)); |
a359464f NR |
624 | } |
625 | ||
626 | /** | |
627 | * Convert the given data to Base64 format. | |
628 | * | |
629 | * @param data | |
630 | * the data to convert | |
a359464f NR |
631 | * |
632 | * @return the Base64 {@link String} representation of the data | |
633 | * | |
634 | * @throws IOException | |
635 | * in case of I/O errors | |
636 | */ | |
a6a73de3 NR |
637 | public static String base64(byte[] data) throws IOException { |
638 | Base64InputStream in = new Base64InputStream(new ByteArrayInputStream( | |
639 | data), true); | |
640 | try { | |
641 | return new String(IOUtils.toByteArray(in), "UTF-8"); | |
642 | } finally { | |
643 | in.close(); | |
a359464f | 644 | } |
a359464f NR |
645 | } |
646 | ||
647 | /** | |
648 | * Unconvert the given data from Base64 format back to a raw array of bytes. | |
bb60bd13 NR |
649 | * |
650 | * @param data | |
651 | * the data to unconvert | |
bb60bd13 NR |
652 | * |
653 | * @return the raw data represented by the given Base64 {@link String}, | |
bb60bd13 NR |
654 | * |
655 | * @throws IOException | |
656 | * in case of I/O errors | |
657 | */ | |
a6a73de3 NR |
658 | public static byte[] unbase64(String data) throws IOException { |
659 | Base64InputStream in = new Base64InputStream(new ByteArrayInputStream( | |
f8147a0e | 660 | getBytes(data)), false); |
a6a73de3 NR |
661 | try { |
662 | return IOUtils.toByteArray(in); | |
663 | } finally { | |
664 | in.close(); | |
a359464f | 665 | } |
a359464f NR |
666 | } |
667 | ||
bb60bd13 NR |
668 | /** |
669 | * Unonvert the given data from Base64 format back to a {@link String}. | |
670 | * | |
671 | * @param data | |
672 | * the data to unconvert | |
bb60bd13 NR |
673 | * |
674 | * @return the {@link String} represented by the given Base64 {@link String} | |
bb60bd13 NR |
675 | * |
676 | * @throws IOException | |
677 | * in case of I/O errors | |
678 | */ | |
a6a73de3 NR |
679 | public static String unbase64s(String data) throws IOException { |
680 | return new String(unbase64(data), "UTF-8"); | |
bb60bd13 NR |
681 | } |
682 | ||
d1e63903 NR |
683 | /** |
684 | * Return a display {@link String} for the given value, which can be | |
685 | * suffixed with "k" or "M" depending upon the number, if it is big enough. | |
686 | * <p> | |
79961c53 NR |
687 | * <p> |
688 | * Examples: | |
d1e63903 | 689 | * <ul> |
79961c53 NR |
690 | * <li><tt>8 765</tt> becomes "8k"</li> |
691 | * <li><tt>998 765</tt> becomes "998k"</li> | |
692 | * <li><tt>12 987 364</tt> becomes "12M"</li> | |
693 | * <li><tt>5 534 333 221</tt> becomes "5G"</li> | |
d1e63903 NR |
694 | * </ul> |
695 | * | |
696 | * @param value | |
697 | * the value to convert | |
698 | * | |
699 | * @return the display value | |
700 | */ | |
701 | public static String formatNumber(long value) { | |
5b46737c | 702 | return formatNumber(value, 0); |
d1e63903 NR |
703 | } |
704 | ||
705 | /** | |
706 | * Return a display {@link String} for the given value, which can be | |
707 | * suffixed with "k" or "M" depending upon the number, if it is big enough. | |
708 | * <p> | |
79961c53 | 709 | * Examples (assuming decimalPositions = 1): |
d1e63903 | 710 | * <ul> |
79961c53 NR |
711 | * <li><tt>8 765</tt> becomes "8.7k"</li> |
712 | * <li><tt>998 765</tt> becomes "998.7k"</li> | |
713 | * <li><tt>12 987 364</tt> becomes "12.9M"</li> | |
714 | * <li><tt>5 534 333 221</tt> becomes "5.5G"</li> | |
d1e63903 NR |
715 | * </ul> |
716 | * | |
717 | * @param value | |
718 | * the value to convert | |
5b46737c NR |
719 | * @param decimalPositions |
720 | * the number of decimal positions to keep | |
d1e63903 NR |
721 | * |
722 | * @return the display value | |
723 | */ | |
5b46737c | 724 | public static String formatNumber(long value, int decimalPositions) { |
79961c53 | 725 | long userValue = value; |
5b46737c | 726 | String suffix = ""; |
79961c53 | 727 | long mult = 1; |
5b46737c | 728 | |
8758aebb | 729 | if (value >= 1000000000l) { |
79961c53 NR |
730 | mult = 1000000000l; |
731 | userValue = value / 1000000000l; | |
39d16a80 | 732 | suffix = " G"; |
8758aebb | 733 | } else if (value >= 1000000l) { |
79961c53 NR |
734 | mult = 1000000l; |
735 | userValue = value / 1000000l; | |
39d16a80 | 736 | suffix = " M"; |
5b46737c | 737 | } else if (value >= 1000l) { |
79961c53 NR |
738 | mult = 1000l; |
739 | userValue = value / 1000l; | |
39d16a80 | 740 | suffix = " k"; |
d1e63903 NR |
741 | } |
742 | ||
79961c53 | 743 | String deci = ""; |
5b46737c | 744 | if (decimalPositions > 0) { |
79961c53 NR |
745 | deci = Long.toString(value % mult); |
746 | int size = Long.toString(mult).length() - 1; | |
747 | while (deci.length() < size) { | |
748 | deci = "0" + deci; | |
749 | } | |
750 | ||
5b46737c NR |
751 | deci = deci.substring(0, Math.min(decimalPositions, deci.length())); |
752 | while (deci.length() < decimalPositions) { | |
753 | deci += "0"; | |
754 | } | |
79961c53 | 755 | |
5b46737c | 756 | deci = "." + deci; |
d1e63903 NR |
757 | } |
758 | ||
79961c53 | 759 | return Long.toString(userValue) + deci + suffix; |
d1e63903 NR |
760 | } |
761 | ||
60033478 NR |
762 | /** |
763 | * The reverse operation to {@link StringUtils#formatNumber(long)}: it will | |
764 | * read a "display" number that can contain a "M" or "k" suffix and return | |
765 | * the full value. | |
766 | * <p> | |
767 | * Of course, the conversion to and from display form is lossy (example: | |
5b46737c | 768 | * <tt>6870</tt> to "6.5k" to <tt>6500</tt>). |
60033478 NR |
769 | * |
770 | * @param value | |
771 | * the value in display form with possible "M" and "k" suffixes, | |
772 | * can be NULL | |
773 | * | |
774 | * @return the value as a number, or 0 if not possible to convert | |
775 | */ | |
776 | public static long toNumber(String value) { | |
777 | return toNumber(value, 0l); | |
778 | } | |
779 | ||
780 | /** | |
781 | * The reverse operation to {@link StringUtils#formatNumber(long)}: it will | |
782 | * read a "display" number that can contain a "M" or "k" suffix and return | |
783 | * the full value. | |
784 | * <p> | |
785 | * Of course, the conversion to and from display form is lossy (example: | |
5b46737c | 786 | * <tt>6870</tt> to "6.5k" to <tt>6500</tt>). |
60033478 NR |
787 | * |
788 | * @param value | |
789 | * the value in display form with possible "M" and "k" suffixes, | |
790 | * can be NULL | |
791 | * @param def | |
792 | * the default value if it is not possible to convert the given | |
793 | * value to a number | |
794 | * | |
795 | * @return the value as a number, or 0 if not possible to convert | |
796 | */ | |
797 | public static long toNumber(String value, long def) { | |
798 | long count = def; | |
799 | if (value != null) { | |
5b46737c | 800 | value = value.trim().toLowerCase(); |
60033478 | 801 | try { |
79961c53 NR |
802 | long mult = 1; |
803 | if (value.endsWith("g")) { | |
804 | value = value.substring(0, value.length() - 1).trim(); | |
805 | mult = 1000000000; | |
806 | } else if (value.endsWith("m")) { | |
5b46737c NR |
807 | value = value.substring(0, value.length() - 1).trim(); |
808 | mult = 1000000; | |
809 | } else if (value.endsWith("k")) { | |
810 | value = value.substring(0, value.length() - 1).trim(); | |
811 | mult = 1000; | |
812 | } | |
813 | ||
814 | long deci = 0; | |
815 | if (value.contains(".")) { | |
816 | String[] tab = value.split("\\."); | |
817 | if (tab.length != 2) { | |
818 | throw new NumberFormatException(value); | |
819 | } | |
820 | double decimal = Double.parseDouble("0." | |
821 | + tab[tab.length - 1]); | |
822 | deci = ((long) (mult * decimal)); | |
823 | value = tab[0]; | |
60033478 | 824 | } |
5b46737c NR |
825 | count = mult * Long.parseLong(value) + deci; |
826 | } catch (Exception e) { | |
60033478 NR |
827 | } |
828 | } | |
829 | ||
830 | return count; | |
831 | } | |
832 | ||
f8147a0e NR |
833 | /** |
834 | * Return the bytes array representation of the given {@link String} in | |
835 | * UTF-8. | |
836 | * | |
837 | * @param str | |
838 | * the {@link String} to transform into bytes | |
839 | * @return the content in bytes | |
840 | */ | |
841 | static public byte[] getBytes(String str) { | |
842 | try { | |
843 | return str.getBytes("UTF-8"); | |
844 | } catch (UnsupportedEncodingException e) { | |
845 | // All conforming JVM must support UTF-8 | |
846 | e.printStackTrace(); | |
847 | return null; | |
848 | } | |
849 | } | |
850 | ||
e8aa5bf9 NR |
851 | /** |
852 | * The "remove accents" pattern. | |
853 | * | |
854 | * @return the pattern, or NULL if a problem happens | |
855 | */ | |
856 | private static Pattern getMarks() { | |
857 | try { | |
858 | return Pattern | |
859 | .compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+"); | |
860 | } catch (Exception e) { | |
861 | // Can fail on Android... | |
862 | return null; | |
863 | } | |
864 | } | |
dc22eb95 | 865 | |
bb60bd13 | 866 | // |
dc22eb95 | 867 | // justify List<String> related: |
bb60bd13 | 868 | // |
dc22eb95 | 869 | |
bb60bd13 NR |
870 | /** |
871 | * Check if this line ends as a complete line (ends with a "." or similar). | |
872 | * <p> | |
873 | * Note that we consider an empty line as full, and a line ending with | |
874 | * spaces as not complete. | |
875 | * | |
876 | * @param line | |
877 | * the line to check | |
878 | * | |
879 | * @return TRUE if it does | |
880 | */ | |
dc22eb95 | 881 | static private boolean isFullLine(StringBuilder line) { |
bb60bd13 NR |
882 | if (line.length() == 0) { |
883 | return true; | |
884 | } | |
885 | ||
886 | char lastCar = line.charAt(line.length() - 1); | |
887 | switch (lastCar) { | |
888 | case '.': // points | |
889 | case '?': | |
890 | case '!': | |
891 | ||
892 | case '\'': // quotes | |
893 | case '‘': | |
894 | case '’': | |
895 | ||
896 | case '"': // double quotes | |
897 | case '”': | |
898 | case '“': | |
899 | case '»': | |
900 | case '«': | |
901 | return true; | |
902 | default: | |
903 | return false; | |
904 | } | |
dc22eb95 NR |
905 | } |
906 | ||
bb60bd13 NR |
907 | /** |
908 | * Check if this line represent an item in a list or description (i.e., | |
909 | * check that the first non-space char is "-"). | |
910 | * | |
911 | * @param line | |
912 | * the line to check | |
913 | * | |
914 | * @return TRUE if it is | |
915 | */ | |
dc22eb95 NR |
916 | static private boolean isItemLine(String line) { |
917 | String spacing = getItemSpacing(line); | |
c0c091af NR |
918 | return spacing != null && !spacing.isEmpty() |
919 | && line.charAt(spacing.length()) == '-'; | |
dc22eb95 NR |
920 | } |
921 | ||
bb60bd13 NR |
922 | /** |
923 | * Return all the spaces that start this line (or Empty if none). | |
924 | * | |
925 | * @param line | |
926 | * the line to get the starting spaces from | |
927 | * | |
928 | * @return the left spacing | |
929 | */ | |
dc22eb95 NR |
930 | static private String getItemSpacing(String line) { |
931 | int i; | |
932 | for (i = 0; i < line.length(); i++) { | |
933 | if (line.charAt(i) != ' ') { | |
934 | return line.substring(0, i); | |
935 | } | |
936 | } | |
937 | ||
938 | return ""; | |
939 | } | |
c0c091af | 940 | |
bb60bd13 NR |
941 | /** |
942 | * This line is an horizontal spacer line. | |
943 | * | |
944 | * @param line | |
945 | * the line to test | |
946 | * | |
947 | * @return TRUE if it is | |
948 | */ | |
c0c091af NR |
949 | static private boolean isHrLine(CharSequence line) { |
950 | int count = 0; | |
951 | if (line != null) { | |
952 | for (int i = 0; i < line.length(); i++) { | |
953 | char car = line.charAt(i); | |
954 | if (car == ' ' || car == '\t' || car == '*' || car == '-' | |
955 | || car == '_' || car == '~' || car == '=' || car == '/' | |
956 | || car == '\\') { | |
957 | count++; | |
958 | } else { | |
959 | return false; | |
960 | } | |
961 | } | |
962 | } | |
963 | ||
964 | return count > 2; | |
965 | } | |
ec1f3444 | 966 | } |