Commit | Line | Data |
---|---|---|
ec1f3444 NR |
1 | package be.nikiroo.utils; |
2 | ||
a6a73de3 NR |
3 | import java.io.ByteArrayInputStream; |
4 | import java.io.ByteArrayOutputStream; | |
ec1f3444 | 5 | import java.io.IOException; |
a359464f NR |
6 | import java.io.InputStream; |
7 | import java.io.OutputStream; | |
3f8349b7 | 8 | import java.io.UnsupportedEncodingException; |
ec1f3444 NR |
9 | import java.security.MessageDigest; |
10 | import java.security.NoSuchAlgorithmException; | |
11 | import java.text.Normalizer; | |
12 | import java.text.Normalizer.Form; | |
13 | import java.text.ParseException; | |
14 | import java.text.SimpleDateFormat; | |
dc22eb95 NR |
15 | import java.util.AbstractMap; |
16 | import java.util.ArrayList; | |
5221cf7f | 17 | import java.util.Arrays; |
ec1f3444 | 18 | import java.util.Date; |
cc3e7291 | 19 | import java.util.List; |
dc22eb95 | 20 | import java.util.Map.Entry; |
ec1f3444 | 21 | import java.util.regex.Pattern; |
a6a73de3 NR |
22 | import java.util.zip.GZIPInputStream; |
23 | import java.util.zip.GZIPOutputStream; | |
ec1f3444 | 24 | |
ec1f3444 NR |
25 | import org.unbescape.html.HtmlEscape; |
26 | import org.unbescape.html.HtmlEscapeLevel; | |
27 | import org.unbescape.html.HtmlEscapeType; | |
28 | ||
f28a134e | 29 | import be.nikiroo.utils.streams.Base64InputStream; |
5221cf7f | 30 | import be.nikiroo.utils.streams.Base64OutputStream; |
f28a134e | 31 | |
ec1f3444 NR |
32 | /** |
33 | * This class offer some utilities based around {@link String}s. | |
34 | * | |
35 | * @author niki | |
36 | */ | |
37 | public class StringUtils { | |
38 | /** | |
39 | * This enum type will decide the alignment of a {@link String} when padding | |
cc3e7291 NR |
40 | * or justification is applied (if there is enough horizontal space for it |
41 | * to be aligned). | |
ec1f3444 NR |
42 | */ |
43 | public enum Alignment { | |
44 | /** Aligned at left. */ | |
cc3e7291 | 45 | LEFT, |
ec1f3444 | 46 | /** Centered. */ |
cc3e7291 | 47 | CENTER, |
ec1f3444 | 48 | /** Aligned at right. */ |
cc3e7291 NR |
49 | RIGHT, |
50 | /** Full justified (to both left and right). */ | |
51 | JUSTIFY, | |
52 | ||
53 | // Old Deprecated values: | |
54 | ||
55 | /** DEPRECATED: please use LEFT. */ | |
56 | @Deprecated | |
57 | Beginning, | |
58 | /** DEPRECATED: please use CENTER. */ | |
59 | @Deprecated | |
60 | Center, | |
61 | /** DEPRECATED: please use RIGHT. */ | |
62 | @Deprecated | |
63 | End; | |
64 | ||
65 | /** | |
66 | * Return the non-deprecated version of this enum if needed (or return | |
67 | * self if not). | |
68 | * | |
69 | * @return the non-deprecated value | |
70 | */ | |
71 | Alignment undeprecate() { | |
72 | if (this == Beginning) | |
73 | return LEFT; | |
74 | if (this == Center) | |
75 | return CENTER; | |
76 | if (this == End) | |
77 | return RIGHT; | |
78 | return this; | |
79 | } | |
ec1f3444 NR |
80 | } |
81 | ||
e8aa5bf9 | 82 | static private Pattern marks = getMarks(); |
ec1f3444 NR |
83 | |
84 | /** | |
85 | * Fix the size of the given {@link String} either with space-padding or by | |
86 | * shortening it. | |
87 | * | |
88 | * @param text | |
89 | * the {@link String} to fix | |
90 | * @param width | |
91 | * the size of the resulting {@link String} or -1 for a noop | |
92 | * | |
93 | * @return the resulting {@link String} of size <i>size</i> | |
94 | */ | |
95 | static public String padString(String text, int width) { | |
451f434b | 96 | return padString(text, width, true, null); |
ec1f3444 NR |
97 | } |
98 | ||
99 | /** | |
100 | * Fix the size of the given {@link String} either with space-padding or by | |
101 | * optionally shortening it. | |
102 | * | |
103 | * @param text | |
104 | * the {@link String} to fix | |
105 | * @param width | |
106 | * the size of the resulting {@link String} if the text fits or | |
107 | * if cut is TRUE or -1 for a noop | |
108 | * @param cut | |
109 | * cut the {@link String} shorter if needed | |
110 | * @param align | |
111 | * align the {@link String} in this position if we have enough | |
451f434b | 112 | * space (default is Alignment.Beginning) |
ec1f3444 NR |
113 | * |
114 | * @return the resulting {@link String} of size <i>size</i> minimum | |
115 | */ | |
116 | static public String padString(String text, int width, boolean cut, | |
117 | Alignment align) { | |
118 | ||
451f434b | 119 | if (align == null) { |
cc3e7291 | 120 | align = Alignment.LEFT; |
451f434b NR |
121 | } |
122 | ||
cc3e7291 NR |
123 | align = align.undeprecate(); |
124 | ||
ec1f3444 NR |
125 | if (width >= 0) { |
126 | if (text == null) | |
127 | text = ""; | |
128 | ||
129 | int diff = width - text.length(); | |
130 | ||
131 | if (diff < 0) { | |
132 | if (cut) | |
133 | text = text.substring(0, width); | |
134 | } else if (diff > 0) { | |
cc3e7291 NR |
135 | if (diff < 2 && align != Alignment.RIGHT) |
136 | align = Alignment.LEFT; | |
ec1f3444 NR |
137 | |
138 | switch (align) { | |
cc3e7291 | 139 | case RIGHT: |
ec1f3444 NR |
140 | text = new String(new char[diff]).replace('\0', ' ') + text; |
141 | break; | |
cc3e7291 | 142 | case CENTER: |
ec1f3444 NR |
143 | int pad1 = (diff) / 2; |
144 | int pad2 = (diff + 1) / 2; | |
145 | text = new String(new char[pad1]).replace('\0', ' ') + text | |
146 | + new String(new char[pad2]).replace('\0', ' '); | |
147 | break; | |
cc3e7291 NR |
148 | case LEFT: |
149 | default: | |
150 | text = text + new String(new char[diff]).replace('\0', ' '); | |
151 | break; | |
ec1f3444 NR |
152 | } |
153 | } | |
154 | } | |
155 | ||
156 | return text; | |
157 | } | |
158 | ||
35425f12 NR |
159 | /** |
160 | * Justify a text into width-sized (at the maximum) lines and return all the | |
161 | * lines concatenated into a single '\\n'-separated line of text. | |
162 | * | |
163 | * @param text | |
164 | * the {@link String} to justify | |
165 | * @param width | |
166 | * the maximum size of the resulting lines | |
167 | * | |
168 | * @return a list of justified text lines concatenated into a single | |
169 | * '\\n'-separated line of text | |
170 | */ | |
171 | static public String justifyTexts(String text, int width) { | |
172 | StringBuilder builder = new StringBuilder(); | |
173 | for (String line : justifyText(text, width, null)) { | |
174 | if (builder.length() > 0) { | |
175 | builder.append('\n'); | |
176 | } | |
177 | builder.append(line); | |
178 | } | |
179 | ||
180 | return builder.toString(); | |
181 | } | |
182 | ||
cc3e7291 NR |
183 | /** |
184 | * Justify a text into width-sized (at the maximum) lines. | |
185 | * | |
186 | * @param text | |
187 | * the {@link String} to justify | |
188 | * @param width | |
189 | * the maximum size of the resulting lines | |
190 | * | |
191 | * @return a list of justified text lines | |
192 | */ | |
193 | static public List<String> justifyText(String text, int width) { | |
194 | return justifyText(text, width, null); | |
195 | } | |
196 | ||
197 | /** | |
198 | * Justify a text into width-sized (at the maximum) lines. | |
199 | * | |
200 | * @param text | |
201 | * the {@link String} to justify | |
202 | * @param width | |
203 | * the maximum size of the resulting lines | |
204 | * @param align | |
205 | * align the lines in this position (default is | |
206 | * Alignment.Beginning) | |
207 | * | |
208 | * @return a list of justified text lines | |
209 | */ | |
210 | static public List<String> justifyText(String text, int width, | |
211 | Alignment align) { | |
212 | if (align == null) { | |
213 | align = Alignment.LEFT; | |
214 | } | |
215 | ||
216 | align = align.undeprecate(); | |
217 | ||
218 | switch (align) { | |
219 | case CENTER: | |
220 | return StringJustifier.center(text, width); | |
221 | case RIGHT: | |
222 | return StringJustifier.right(text, width); | |
223 | case JUSTIFY: | |
224 | return StringJustifier.full(text, width); | |
225 | case LEFT: | |
226 | default: | |
227 | return StringJustifier.left(text, width); | |
228 | } | |
229 | } | |
230 | ||
dc22eb95 NR |
231 | /** |
232 | * Justify a text into width-sized (at the maximum) lines. | |
233 | * | |
234 | * @param text | |
235 | * the {@link String} to justify | |
236 | * @param width | |
237 | * the maximum size of the resulting lines | |
238 | * | |
239 | * @return a list of justified text lines | |
240 | */ | |
241 | static public List<String> justifyText(List<String> text, int width) { | |
242 | return justifyText(text, width, null); | |
243 | } | |
244 | ||
245 | /** | |
246 | * Justify a text into width-sized (at the maximum) lines. | |
247 | * | |
248 | * @param text | |
249 | * the {@link String} to justify | |
250 | * @param width | |
251 | * the maximum size of the resulting lines | |
252 | * @param align | |
253 | * align the lines in this position (default is | |
254 | * Alignment.Beginning) | |
255 | * | |
256 | * @return a list of justified text lines | |
257 | */ | |
258 | static public List<String> justifyText(List<String> text, int width, | |
259 | Alignment align) { | |
260 | List<String> result = new ArrayList<String>(); | |
261 | ||
262 | // Content <-> Bullet spacing (null = no spacing) | |
263 | List<Entry<String, String>> lines = new ArrayList<Entry<String, String>>(); | |
264 | StringBuilder previous = null; | |
265 | StringBuilder tmp = new StringBuilder(); | |
266 | String previousItemBulletSpacing = null; | |
267 | String itemBulletSpacing = null; | |
268 | for (String inputLine : text) { | |
269 | boolean previousLineComplete = true; | |
270 | ||
271 | String current = inputLine.replace("\t", " "); | |
272 | itemBulletSpacing = getItemSpacing(current); | |
273 | boolean bullet = isItemLine(current); | |
274 | if ((previousItemBulletSpacing == null || itemBulletSpacing | |
275 | .length() <= previousItemBulletSpacing.length()) && !bullet) { | |
276 | itemBulletSpacing = null; | |
277 | } | |
278 | ||
279 | if (itemBulletSpacing != null) { | |
280 | current = current.trim(); | |
281 | if (!current.isEmpty() && bullet) { | |
282 | current = current.substring(1); | |
283 | } | |
284 | current = current.trim(); | |
285 | previousLineComplete = bullet; | |
286 | } else { | |
287 | tmp.setLength(0); | |
288 | for (String word : current.split(" ")) { | |
289 | if (word.isEmpty()) { | |
290 | continue; | |
291 | } | |
292 | ||
293 | if (tmp.length() > 0) { | |
294 | tmp.append(' '); | |
295 | } | |
296 | tmp.append(word.trim()); | |
297 | } | |
298 | current = tmp.toString(); | |
299 | ||
300 | previousLineComplete = current.isEmpty() | |
301 | || previousItemBulletSpacing != null | |
c0c091af NR |
302 | || (previous != null && isFullLine(previous)) |
303 | || isHrLine(current) || isHrLine(previous); | |
dc22eb95 NR |
304 | } |
305 | ||
306 | if (previous == null) { | |
307 | previous = new StringBuilder(); | |
308 | } else { | |
309 | if (previousLineComplete) { | |
310 | lines.add(new AbstractMap.SimpleEntry<String, String>( | |
311 | previous.toString(), previousItemBulletSpacing)); | |
312 | previous.setLength(0); | |
313 | previousItemBulletSpacing = itemBulletSpacing; | |
314 | } else { | |
315 | previous.append(' '); | |
316 | } | |
317 | } | |
318 | ||
319 | previous.append(current); | |
320 | ||
321 | } | |
322 | ||
323 | if (previous != null) { | |
324 | lines.add(new AbstractMap.SimpleEntry<String, String>(previous | |
325 | .toString(), previousItemBulletSpacing)); | |
326 | } | |
327 | ||
328 | for (Entry<String, String> line : lines) { | |
329 | String content = line.getKey(); | |
330 | String spacing = line.getValue(); | |
331 | ||
332 | String bullet = "- "; | |
333 | if (spacing == null) { | |
334 | bullet = ""; | |
335 | spacing = ""; | |
336 | } | |
337 | ||
338 | if (spacing.length() > width + 3) { | |
339 | spacing = ""; | |
340 | } | |
341 | ||
342 | for (String subline : StringUtils.justifyText(content, width | |
343 | - (spacing.length() + bullet.length()), align)) { | |
344 | result.add(spacing + bullet + subline); | |
345 | if (!bullet.isEmpty()) { | |
346 | bullet = " "; | |
347 | } | |
348 | } | |
349 | } | |
350 | ||
351 | return result; | |
352 | } | |
353 | ||
ec1f3444 NR |
354 | /** |
355 | * Sanitise the given input to make it more Terminal-friendly by removing | |
356 | * combining characters. | |
357 | * | |
358 | * @param input | |
359 | * the input to sanitise | |
360 | * @param allowUnicode | |
361 | * allow Unicode or only allow ASCII Latin characters | |
362 | * | |
363 | * @return the sanitised {@link String} | |
364 | */ | |
365 | static public String sanitize(String input, boolean allowUnicode) { | |
366 | return sanitize(input, allowUnicode, !allowUnicode); | |
367 | } | |
368 | ||
369 | /** | |
370 | * Sanitise the given input to make it more Terminal-friendly by removing | |
371 | * combining characters. | |
372 | * | |
373 | * @param input | |
374 | * the input to sanitise | |
375 | * @param allowUnicode | |
376 | * allow Unicode or only allow ASCII Latin characters | |
377 | * @param removeAllAccents | |
378 | * TRUE to replace all accentuated characters by their non | |
379 | * accentuated counter-parts | |
380 | * | |
381 | * @return the sanitised {@link String} | |
382 | */ | |
383 | static public String sanitize(String input, boolean allowUnicode, | |
384 | boolean removeAllAccents) { | |
385 | ||
386 | if (removeAllAccents) { | |
387 | input = Normalizer.normalize(input, Form.NFKD); | |
e8aa5bf9 NR |
388 | if (marks != null) { |
389 | input = marks.matcher(input).replaceAll(""); | |
390 | } | |
ec1f3444 NR |
391 | } |
392 | ||
393 | input = Normalizer.normalize(input, Form.NFKC); | |
394 | ||
395 | if (!allowUnicode) { | |
396 | StringBuilder builder = new StringBuilder(); | |
397 | for (int index = 0; index < input.length(); index++) { | |
398 | char car = input.charAt(index); | |
399 | // displayable chars in ASCII are in the range 32<->255, | |
400 | // except DEL (127) | |
401 | if (car >= 32 && car <= 255 && car != 127) { | |
402 | builder.append(car); | |
403 | } | |
404 | } | |
405 | input = builder.toString(); | |
406 | } | |
407 | ||
408 | return input; | |
409 | } | |
410 | ||
411 | /** | |
451f434b NR |
412 | * Convert between the time in milliseconds to a {@link String} in a "fixed" |
413 | * way (to exchange data over the wire, for instance). | |
414 | * <p> | |
415 | * Precise to the second. | |
ec1f3444 NR |
416 | * |
417 | * @param time | |
451f434b NR |
418 | * the specified number of milliseconds since the standard base |
419 | * time known as "the epoch", namely January 1, 1970, 00:00:00 | |
420 | * GMT | |
ec1f3444 NR |
421 | * |
422 | * @return the time as a {@link String} | |
423 | */ | |
424 | static public String fromTime(long time) { | |
425 | SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); | |
426 | return sdf.format(new Date(time)); | |
427 | } | |
428 | ||
429 | /** | |
451f434b | 430 | * Convert between the time as a {@link String} to milliseconds in a "fixed" |
ec1f3444 | 431 | * way (to exchange data over the wire, for instance). |
451f434b NR |
432 | * <p> |
433 | * Precise to the second. | |
ec1f3444 | 434 | * |
db31c358 | 435 | * @param displayTime |
ec1f3444 NR |
436 | * the time as a {@link String} |
437 | * | |
451f434b | 438 | * @return the number of milliseconds since the standard base time known as |
e8aa5bf9 NR |
439 | * "the epoch", namely January 1, 1970, 00:00:00 GMT, or -1 in case |
440 | * of error | |
441 | * | |
442 | * @throws ParseException | |
443 | * in case of parse error | |
ec1f3444 | 444 | */ |
e8aa5bf9 | 445 | static public long toTime(String displayTime) throws ParseException { |
ec1f3444 | 446 | SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); |
e8aa5bf9 | 447 | return sdf.parse(displayTime).getTime(); |
ec1f3444 NR |
448 | } |
449 | ||
ec1f3444 NR |
450 | /** |
451 | * Return a hash of the given {@link String}. | |
452 | * | |
453 | * @param input | |
454 | * the input data | |
455 | * | |
456 | * @return the hash | |
091c6af2 NR |
457 | * |
458 | * @deprecated please use {@link HashUtils} | |
ec1f3444 | 459 | */ |
091c6af2 | 460 | @Deprecated |
b771aed5 | 461 | static public String getMd5Hash(String input) { |
ec1f3444 NR |
462 | try { |
463 | MessageDigest md = MessageDigest.getInstance("MD5"); | |
f8147a0e | 464 | md.update(getBytes(input)); |
ec1f3444 NR |
465 | byte byteData[] = md.digest(); |
466 | ||
467 | StringBuffer hexString = new StringBuffer(); | |
468 | for (int i = 0; i < byteData.length; i++) { | |
469 | String hex = Integer.toHexString(0xff & byteData[i]); | |
470 | if (hex.length() == 1) | |
471 | hexString.append('0'); | |
472 | hexString.append(hex); | |
473 | } | |
474 | ||
475 | return hexString.toString(); | |
476 | } catch (NoSuchAlgorithmException e) { | |
477 | return input; | |
478 | } | |
479 | } | |
480 | ||
ec1f3444 NR |
481 | /** |
482 | * Remove the HTML content from the given input, and un-html-ize the rest. | |
483 | * | |
484 | * @param html | |
485 | * the HTML-encoded content | |
486 | * | |
487 | * @return the HTML-free equivalent content | |
488 | */ | |
489 | public static String unhtml(String html) { | |
490 | StringBuilder builder = new StringBuilder(); | |
491 | ||
492 | int inTag = 0; | |
493 | for (char car : html.toCharArray()) { | |
494 | if (car == '<') { | |
495 | inTag++; | |
496 | } else if (car == '>') { | |
497 | inTag--; | |
498 | } else if (inTag <= 0) { | |
499 | builder.append(car); | |
500 | } | |
501 | } | |
502 | ||
7ee9568b NR |
503 | char nbsp = ' '; // non-breakable space (a special char) |
504 | char space = ' '; | |
505 | return HtmlEscape.unescapeHtml(builder.toString()).replace(nbsp, space); | |
ec1f3444 NR |
506 | } |
507 | ||
508 | /** | |
509 | * Escape the given {@link String} so it can be used in XML, as content. | |
510 | * | |
511 | * @param input | |
512 | * the input {@link String} | |
513 | * | |
514 | * @return the escaped {@link String} | |
515 | */ | |
516 | public static String xmlEscape(String input) { | |
517 | if (input == null) { | |
518 | return ""; | |
519 | } | |
520 | ||
521 | return HtmlEscape.escapeHtml(input, | |
522 | HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA, | |
523 | HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT); | |
524 | } | |
525 | ||
526 | /** | |
527 | * Escape the given {@link String} so it can be used in XML, as text content | |
528 | * inside double-quotes. | |
529 | * | |
530 | * @param input | |
531 | * the input {@link String} | |
532 | * | |
533 | * @return the escaped {@link String} | |
534 | */ | |
535 | public static String xmlEscapeQuote(String input) { | |
536 | if (input == null) { | |
537 | return ""; | |
538 | } | |
539 | ||
540 | return HtmlEscape.escapeHtml(input, | |
541 | HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA, | |
542 | HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT); | |
543 | } | |
db31c358 | 544 | |
80500544 NR |
545 | /** |
546 | * Zip the data and then encode it into Base64. | |
547 | * | |
548 | * @param data | |
549 | * the data | |
550 | * | |
551 | * @return the Base64 zipped version | |
a6a73de3 NR |
552 | * |
553 | * @throws IOException | |
554 | * in case of I/O error | |
80500544 | 555 | */ |
5221cf7f | 556 | public static String zip64(String data) throws IOException { |
db31c358 | 557 | try { |
f8147a0e | 558 | return zip64(getBytes(data)); |
a6a73de3 NR |
559 | } catch (UnsupportedEncodingException e) { |
560 | // All conforming JVM are required to support UTF-8 | |
db31c358 NR |
561 | e.printStackTrace(); |
562 | return null; | |
563 | } | |
564 | } | |
565 | ||
80500544 | 566 | /** |
a6a73de3 | 567 | * Zip the data and then encode it into Base64. |
bb60bd13 | 568 | * |
80500544 | 569 | * @param data |
a6a73de3 | 570 | * the data |
80500544 | 571 | * |
a6a73de3 | 572 | * @return the Base64 zipped version |
80500544 NR |
573 | * |
574 | * @throws IOException | |
575 | * in case of I/O error | |
576 | */ | |
a6a73de3 NR |
577 | public static String zip64(byte[] data) throws IOException { |
578 | // 1. compress | |
579 | ByteArrayOutputStream bout = new ByteArrayOutputStream(); | |
580 | try { | |
581 | OutputStream out = new GZIPOutputStream(bout); | |
582 | try { | |
583 | out.write(data); | |
584 | } finally { | |
585 | out.close(); | |
586 | } | |
587 | } finally { | |
588 | data = bout.toByteArray(); | |
589 | bout.close(); | |
590 | } | |
591 | ||
592 | // 2. base64 | |
593 | InputStream in = new ByteArrayInputStream(data); | |
594 | try { | |
595 | in = new Base64InputStream(in, true); | |
596 | return new String(IOUtils.toByteArray(in), "UTF-8"); | |
597 | } finally { | |
598 | in.close(); | |
599 | } | |
db31c358 | 600 | } |
e8aa5bf9 | 601 | |
a359464f | 602 | /** |
a6a73de3 NR |
603 | * Unconvert from Base64 then unzip the content, which is assumed to be a |
604 | * String. | |
a359464f NR |
605 | * |
606 | * @param data | |
a6a73de3 | 607 | * the data in Base64 format |
a359464f | 608 | * |
a6a73de3 | 609 | * @return the raw data |
a359464f NR |
610 | * |
611 | * @throws IOException | |
a6a73de3 | 612 | * in case of I/O error |
a359464f | 613 | */ |
a6a73de3 NR |
614 | public static String unzip64s(String data) throws IOException { |
615 | return new String(unzip64(data), "UTF-8"); | |
a359464f NR |
616 | } |
617 | ||
bb60bd13 | 618 | /** |
a6a73de3 | 619 | * Unconvert from Base64 then unzip the content. |
bb60bd13 NR |
620 | * |
621 | * @param data | |
a6a73de3 | 622 | * the data in Base64 format |
bb60bd13 | 623 | * |
a6a73de3 | 624 | * @return the raw data |
bb60bd13 NR |
625 | * |
626 | * @throws IOException | |
a6a73de3 | 627 | * in case of I/O error |
bb60bd13 | 628 | */ |
a6a73de3 NR |
629 | public static byte[] unzip64(String data) throws IOException { |
630 | InputStream in = new Base64InputStream(new ByteArrayInputStream( | |
f8147a0e | 631 | getBytes(data)), false); |
a6a73de3 NR |
632 | try { |
633 | in = new GZIPInputStream(in); | |
634 | return IOUtils.toByteArray(in); | |
635 | } finally { | |
636 | in.close(); | |
637 | } | |
bb60bd13 NR |
638 | } |
639 | ||
640 | /** | |
a359464f NR |
641 | * Convert the given data to Base64 format. |
642 | * | |
643 | * @param data | |
644 | * the data to convert | |
a359464f NR |
645 | * |
646 | * @return the Base64 {@link String} representation of the data | |
647 | * | |
648 | * @throws IOException | |
649 | * in case of I/O errors | |
650 | */ | |
a6a73de3 | 651 | public static String base64(String data) throws IOException { |
f8147a0e | 652 | return base64(getBytes(data)); |
a359464f NR |
653 | } |
654 | ||
655 | /** | |
656 | * Convert the given data to Base64 format. | |
657 | * | |
658 | * @param data | |
659 | * the data to convert | |
a359464f NR |
660 | * |
661 | * @return the Base64 {@link String} representation of the data | |
662 | * | |
663 | * @throws IOException | |
664 | * in case of I/O errors | |
665 | */ | |
a6a73de3 NR |
666 | public static String base64(byte[] data) throws IOException { |
667 | Base64InputStream in = new Base64InputStream(new ByteArrayInputStream( | |
668 | data), true); | |
669 | try { | |
670 | return new String(IOUtils.toByteArray(in), "UTF-8"); | |
671 | } finally { | |
672 | in.close(); | |
a359464f | 673 | } |
a359464f NR |
674 | } |
675 | ||
676 | /** | |
677 | * Unconvert the given data from Base64 format back to a raw array of bytes. | |
bb60bd13 NR |
678 | * |
679 | * @param data | |
680 | * the data to unconvert | |
bb60bd13 NR |
681 | * |
682 | * @return the raw data represented by the given Base64 {@link String}, | |
bb60bd13 NR |
683 | * |
684 | * @throws IOException | |
685 | * in case of I/O errors | |
686 | */ | |
a6a73de3 NR |
687 | public static byte[] unbase64(String data) throws IOException { |
688 | Base64InputStream in = new Base64InputStream(new ByteArrayInputStream( | |
f8147a0e | 689 | getBytes(data)), false); |
a6a73de3 NR |
690 | try { |
691 | return IOUtils.toByteArray(in); | |
692 | } finally { | |
693 | in.close(); | |
a359464f | 694 | } |
a359464f NR |
695 | } |
696 | ||
bb60bd13 NR |
697 | /** |
698 | * Unonvert the given data from Base64 format back to a {@link String}. | |
699 | * | |
700 | * @param data | |
701 | * the data to unconvert | |
bb60bd13 NR |
702 | * |
703 | * @return the {@link String} represented by the given Base64 {@link String} | |
bb60bd13 NR |
704 | * |
705 | * @throws IOException | |
706 | * in case of I/O errors | |
707 | */ | |
a6a73de3 NR |
708 | public static String unbase64s(String data) throws IOException { |
709 | return new String(unbase64(data), "UTF-8"); | |
bb60bd13 NR |
710 | } |
711 | ||
d1e63903 NR |
712 | /** |
713 | * Return a display {@link String} for the given value, which can be | |
714 | * suffixed with "k" or "M" depending upon the number, if it is big enough. | |
715 | * <p> | |
79961c53 NR |
716 | * <p> |
717 | * Examples: | |
d1e63903 | 718 | * <ul> |
59654e2a NR |
719 | * <li><tt>8 765</tt> becomes "8 k"</li> |
720 | * <li><tt>998 765</tt> becomes "998 k"</li> | |
721 | * <li><tt>12 987 364</tt> becomes "12 M"</li> | |
722 | * <li><tt>5 534 333 221</tt> becomes "5 G"</li> | |
d1e63903 NR |
723 | * </ul> |
724 | * | |
725 | * @param value | |
726 | * the value to convert | |
727 | * | |
728 | * @return the display value | |
729 | */ | |
730 | public static String formatNumber(long value) { | |
5b46737c | 731 | return formatNumber(value, 0); |
d1e63903 NR |
732 | } |
733 | ||
734 | /** | |
735 | * Return a display {@link String} for the given value, which can be | |
736 | * suffixed with "k" or "M" depending upon the number, if it is big enough. | |
737 | * <p> | |
79961c53 | 738 | * Examples (assuming decimalPositions = 1): |
d1e63903 | 739 | * <ul> |
59654e2a NR |
740 | * <li><tt>8 765</tt> becomes "8.7 k"</li> |
741 | * <li><tt>998 765</tt> becomes "998.7 k"</li> | |
742 | * <li><tt>12 987 364</tt> becomes "12.9 M"</li> | |
743 | * <li><tt>5 534 333 221</tt> becomes "5.5 G"</li> | |
d1e63903 NR |
744 | * </ul> |
745 | * | |
746 | * @param value | |
747 | * the value to convert | |
5b46737c NR |
748 | * @param decimalPositions |
749 | * the number of decimal positions to keep | |
d1e63903 NR |
750 | * |
751 | * @return the display value | |
752 | */ | |
5b46737c | 753 | public static String formatNumber(long value, int decimalPositions) { |
79961c53 | 754 | long userValue = value; |
59654e2a | 755 | String suffix = " "; |
79961c53 | 756 | long mult = 1; |
5b46737c | 757 | |
8758aebb | 758 | if (value >= 1000000000l) { |
79961c53 NR |
759 | mult = 1000000000l; |
760 | userValue = value / 1000000000l; | |
39d16a80 | 761 | suffix = " G"; |
8758aebb | 762 | } else if (value >= 1000000l) { |
79961c53 NR |
763 | mult = 1000000l; |
764 | userValue = value / 1000000l; | |
39d16a80 | 765 | suffix = " M"; |
5b46737c | 766 | } else if (value >= 1000l) { |
79961c53 NR |
767 | mult = 1000l; |
768 | userValue = value / 1000l; | |
39d16a80 | 769 | suffix = " k"; |
d1e63903 NR |
770 | } |
771 | ||
79961c53 | 772 | String deci = ""; |
5b46737c | 773 | if (decimalPositions > 0) { |
79961c53 NR |
774 | deci = Long.toString(value % mult); |
775 | int size = Long.toString(mult).length() - 1; | |
776 | while (deci.length() < size) { | |
777 | deci = "0" + deci; | |
778 | } | |
779 | ||
5b46737c NR |
780 | deci = deci.substring(0, Math.min(decimalPositions, deci.length())); |
781 | while (deci.length() < decimalPositions) { | |
782 | deci += "0"; | |
783 | } | |
79961c53 | 784 | |
5b46737c | 785 | deci = "." + deci; |
d1e63903 NR |
786 | } |
787 | ||
79961c53 | 788 | return Long.toString(userValue) + deci + suffix; |
d1e63903 NR |
789 | } |
790 | ||
60033478 NR |
791 | /** |
792 | * The reverse operation to {@link StringUtils#formatNumber(long)}: it will | |
793 | * read a "display" number that can contain a "M" or "k" suffix and return | |
794 | * the full value. | |
795 | * <p> | |
796 | * Of course, the conversion to and from display form is lossy (example: | |
5b46737c | 797 | * <tt>6870</tt> to "6.5k" to <tt>6500</tt>). |
60033478 NR |
798 | * |
799 | * @param value | |
800 | * the value in display form with possible "M" and "k" suffixes, | |
801 | * can be NULL | |
802 | * | |
803 | * @return the value as a number, or 0 if not possible to convert | |
804 | */ | |
805 | public static long toNumber(String value) { | |
806 | return toNumber(value, 0l); | |
807 | } | |
808 | ||
809 | /** | |
810 | * The reverse operation to {@link StringUtils#formatNumber(long)}: it will | |
811 | * read a "display" number that can contain a "M" or "k" suffix and return | |
812 | * the full value. | |
813 | * <p> | |
814 | * Of course, the conversion to and from display form is lossy (example: | |
5b46737c | 815 | * <tt>6870</tt> to "6.5k" to <tt>6500</tt>). |
60033478 NR |
816 | * |
817 | * @param value | |
818 | * the value in display form with possible "M" and "k" suffixes, | |
819 | * can be NULL | |
820 | * @param def | |
821 | * the default value if it is not possible to convert the given | |
822 | * value to a number | |
823 | * | |
824 | * @return the value as a number, or 0 if not possible to convert | |
825 | */ | |
826 | public static long toNumber(String value, long def) { | |
827 | long count = def; | |
828 | if (value != null) { | |
5b46737c | 829 | value = value.trim().toLowerCase(); |
60033478 | 830 | try { |
79961c53 NR |
831 | long mult = 1; |
832 | if (value.endsWith("g")) { | |
833 | value = value.substring(0, value.length() - 1).trim(); | |
834 | mult = 1000000000; | |
835 | } else if (value.endsWith("m")) { | |
5b46737c NR |
836 | value = value.substring(0, value.length() - 1).trim(); |
837 | mult = 1000000; | |
838 | } else if (value.endsWith("k")) { | |
839 | value = value.substring(0, value.length() - 1).trim(); | |
840 | mult = 1000; | |
841 | } | |
842 | ||
843 | long deci = 0; | |
844 | if (value.contains(".")) { | |
845 | String[] tab = value.split("\\."); | |
846 | if (tab.length != 2) { | |
847 | throw new NumberFormatException(value); | |
848 | } | |
849 | double decimal = Double.parseDouble("0." | |
850 | + tab[tab.length - 1]); | |
851 | deci = ((long) (mult * decimal)); | |
852 | value = tab[0]; | |
60033478 | 853 | } |
5b46737c NR |
854 | count = mult * Long.parseLong(value) + deci; |
855 | } catch (Exception e) { | |
60033478 NR |
856 | } |
857 | } | |
858 | ||
859 | return count; | |
860 | } | |
861 | ||
f8147a0e NR |
862 | /** |
863 | * Return the bytes array representation of the given {@link String} in | |
864 | * UTF-8. | |
865 | * | |
866 | * @param str | |
867 | * the {@link String} to transform into bytes | |
868 | * @return the content in bytes | |
869 | */ | |
870 | static public byte[] getBytes(String str) { | |
871 | try { | |
872 | return str.getBytes("UTF-8"); | |
873 | } catch (UnsupportedEncodingException e) { | |
874 | // All conforming JVM must support UTF-8 | |
875 | e.printStackTrace(); | |
876 | return null; | |
877 | } | |
878 | } | |
879 | ||
e8aa5bf9 NR |
880 | /** |
881 | * The "remove accents" pattern. | |
882 | * | |
883 | * @return the pattern, or NULL if a problem happens | |
884 | */ | |
885 | private static Pattern getMarks() { | |
886 | try { | |
887 | return Pattern | |
888 | .compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+"); | |
889 | } catch (Exception e) { | |
890 | // Can fail on Android... | |
891 | return null; | |
892 | } | |
893 | } | |
dc22eb95 | 894 | |
bb60bd13 | 895 | // |
dc22eb95 | 896 | // justify List<String> related: |
bb60bd13 | 897 | // |
dc22eb95 | 898 | |
bb60bd13 NR |
899 | /** |
900 | * Check if this line ends as a complete line (ends with a "." or similar). | |
901 | * <p> | |
902 | * Note that we consider an empty line as full, and a line ending with | |
903 | * spaces as not complete. | |
904 | * | |
905 | * @param line | |
906 | * the line to check | |
907 | * | |
908 | * @return TRUE if it does | |
909 | */ | |
dc22eb95 | 910 | static private boolean isFullLine(StringBuilder line) { |
bb60bd13 NR |
911 | if (line.length() == 0) { |
912 | return true; | |
913 | } | |
914 | ||
915 | char lastCar = line.charAt(line.length() - 1); | |
916 | switch (lastCar) { | |
917 | case '.': // points | |
918 | case '?': | |
919 | case '!': | |
920 | ||
921 | case '\'': // quotes | |
922 | case '‘': | |
923 | case '’': | |
924 | ||
925 | case '"': // double quotes | |
926 | case '”': | |
927 | case '“': | |
928 | case '»': | |
929 | case '«': | |
930 | return true; | |
931 | default: | |
932 | return false; | |
933 | } | |
dc22eb95 NR |
934 | } |
935 | ||
bb60bd13 NR |
936 | /** |
937 | * Check if this line represent an item in a list or description (i.e., | |
938 | * check that the first non-space char is "-"). | |
939 | * | |
940 | * @param line | |
941 | * the line to check | |
942 | * | |
943 | * @return TRUE if it is | |
944 | */ | |
dc22eb95 NR |
945 | static private boolean isItemLine(String line) { |
946 | String spacing = getItemSpacing(line); | |
c0c091af NR |
947 | return spacing != null && !spacing.isEmpty() |
948 | && line.charAt(spacing.length()) == '-'; | |
dc22eb95 NR |
949 | } |
950 | ||
bb60bd13 NR |
951 | /** |
952 | * Return all the spaces that start this line (or Empty if none). | |
953 | * | |
954 | * @param line | |
955 | * the line to get the starting spaces from | |
956 | * | |
957 | * @return the left spacing | |
958 | */ | |
dc22eb95 NR |
959 | static private String getItemSpacing(String line) { |
960 | int i; | |
961 | for (i = 0; i < line.length(); i++) { | |
962 | if (line.charAt(i) != ' ') { | |
963 | return line.substring(0, i); | |
964 | } | |
965 | } | |
966 | ||
967 | return ""; | |
968 | } | |
c0c091af | 969 | |
bb60bd13 NR |
970 | /** |
971 | * This line is an horizontal spacer line. | |
972 | * | |
973 | * @param line | |
974 | * the line to test | |
975 | * | |
976 | * @return TRUE if it is | |
977 | */ | |
c0c091af NR |
978 | static private boolean isHrLine(CharSequence line) { |
979 | int count = 0; | |
980 | if (line != null) { | |
981 | for (int i = 0; i < line.length(); i++) { | |
982 | char car = line.charAt(i); | |
983 | if (car == ' ' || car == '\t' || car == '*' || car == '-' | |
984 | || car == '_' || car == '~' || car == '=' || car == '/' | |
985 | || car == '\\') { | |
986 | count++; | |
987 | } else { | |
988 | return false; | |
989 | } | |
990 | } | |
991 | } | |
992 | ||
993 | return count > 2; | |
994 | } | |
5221cf7f NR |
995 | |
996 | // Deprecated functions, please do not use // | |
997 | ||
998 | /** | |
999 | * @deprecated please use {@link StringUtils#zip64(byte[])} or | |
1000 | * {@link StringUtils#base64(byte[])} instead. | |
1001 | * | |
1002 | * @param data | |
1003 | * the data to encode | |
1004 | * @param zip | |
1005 | * TRUE to zip it before Base64 encoding it, FALSE for Base64 | |
1006 | * encoding only | |
1007 | * | |
1008 | * @return the encoded data | |
1009 | * | |
1010 | * @throws IOException | |
1011 | * in case of I/O error | |
1012 | */ | |
1013 | @Deprecated | |
1014 | public static String base64(String data, boolean zip) throws IOException { | |
1015 | return base64(getBytes(data), zip); | |
1016 | } | |
1017 | ||
1018 | /** | |
1019 | * @deprecated please use {@link StringUtils#zip64(String)} or | |
1020 | * {@link StringUtils#base64(String)} instead. | |
1021 | * | |
1022 | * @param data | |
1023 | * the data to encode | |
1024 | * @param zip | |
1025 | * TRUE to zip it before Base64 encoding it, FALSE for Base64 | |
1026 | * encoding only | |
1027 | * | |
1028 | * @return the encoded data | |
1029 | * | |
1030 | * @throws IOException | |
1031 | * in case of I/O error | |
1032 | */ | |
1033 | @Deprecated | |
1034 | public static String base64(byte[] data, boolean zip) throws IOException { | |
1035 | if (zip) { | |
1036 | return zip64(data); | |
1037 | } | |
1038 | ||
1039 | Base64InputStream b64 = new Base64InputStream(new ByteArrayInputStream( | |
1040 | data), true); | |
1041 | try { | |
1042 | return IOUtils.readSmallStream(b64); | |
1043 | } finally { | |
1044 | b64.close(); | |
1045 | } | |
1046 | } | |
1047 | ||
1048 | /** | |
1049 | * @deprecated please use {@link Base64OutputStream} and | |
1050 | * {@link GZIPOutputStream} instead. | |
1051 | * | |
1052 | * @param breakLines | |
1053 | * NOT USED ANYMORE, it is always considered FALSE now | |
1054 | */ | |
1055 | @Deprecated | |
1056 | public static OutputStream base64(OutputStream data, boolean zip, | |
1057 | boolean breakLines) throws IOException { | |
1058 | OutputStream out = new Base64OutputStream(data); | |
1059 | if (zip) { | |
1060 | out = new java.util.zip.GZIPOutputStream(out); | |
1061 | } | |
1062 | ||
1063 | return out; | |
1064 | } | |
1065 | ||
1066 | /** | |
1067 | * Unconvert the given data from Base64 format back to a raw array of bytes. | |
1068 | * <p> | |
1069 | * Will automatically detect zipped data and also uncompress it before | |
1070 | * returning, unless ZIP is false. | |
1071 | * | |
1072 | * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped) | |
1073 | * | |
1074 | * @param data | |
1075 | * the data to unconvert | |
1076 | * @param zip | |
1077 | * TRUE to also uncompress the data from a GZIP format | |
1078 | * automatically; if set to FALSE, zipped data can be returned | |
1079 | * | |
1080 | * @return the raw data represented by the given Base64 {@link String}, | |
1081 | * optionally compressed with GZIP | |
1082 | * | |
1083 | * @throws IOException | |
1084 | * in case of I/O errors | |
1085 | */ | |
1086 | @Deprecated | |
1087 | public static byte[] unbase64(String data, boolean zip) throws IOException { | |
1088 | byte[] buffer = unbase64(data); | |
1089 | if (!zip) { | |
1090 | return buffer; | |
1091 | } | |
1092 | ||
1093 | try { | |
1094 | GZIPInputStream zipped = new GZIPInputStream( | |
1095 | new ByteArrayInputStream(buffer)); | |
1096 | try { | |
1097 | ByteArrayOutputStream out = new ByteArrayOutputStream(); | |
1098 | try { | |
1099 | IOUtils.write(zipped, out); | |
1100 | return out.toByteArray(); | |
1101 | } finally { | |
1102 | out.close(); | |
1103 | } | |
1104 | } finally { | |
1105 | zipped.close(); | |
1106 | } | |
1107 | } catch (Exception e) { | |
1108 | return buffer; | |
1109 | } | |
1110 | } | |
1111 | ||
1112 | /** | |
1113 | * Unconvert the given data from Base64 format back to a raw array of bytes. | |
1114 | * <p> | |
1115 | * Will automatically detect zipped data and also uncompress it before | |
1116 | * returning, unless ZIP is false. | |
1117 | * | |
1118 | * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped) | |
1119 | * | |
1120 | * @param data | |
1121 | * the data to unconvert | |
1122 | * @param zip | |
1123 | * TRUE to also uncompress the data from a GZIP format | |
1124 | * automatically; if set to FALSE, zipped data can be returned | |
1125 | * | |
1126 | * @return the raw data represented by the given Base64 {@link String}, | |
1127 | * optionally compressed with GZIP | |
1128 | * | |
1129 | * @throws IOException | |
1130 | * in case of I/O errors | |
1131 | */ | |
1132 | @Deprecated | |
1133 | public static InputStream unbase64(InputStream data, boolean zip) | |
1134 | throws IOException { | |
1135 | return new ByteArrayInputStream(unbase64(IOUtils.readSmallStream(data), | |
1136 | zip)); | |
1137 | } | |
1138 | ||
1139 | /** | |
1140 | * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped) | |
1141 | */ | |
1142 | @Deprecated | |
1143 | public static byte[] unbase64(byte[] data, int offset, int count, | |
1144 | boolean zip) throws IOException { | |
1145 | byte[] dataPart = Arrays.copyOfRange(data, offset, offset + count); | |
1146 | return unbase64(new String(dataPart, "UTF-8"), zip); | |
1147 | } | |
1148 | ||
1149 | /** | |
1150 | * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped) | |
1151 | */ | |
1152 | @Deprecated | |
1153 | public static String unbase64s(String data, boolean zip) throws IOException { | |
1154 | return new String(unbase64(data, zip), "UTF-8"); | |
1155 | } | |
1156 | ||
1157 | /** | |
1158 | * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped) | |
1159 | */ | |
1160 | @Deprecated | |
1161 | public static String unbase64s(byte[] data, int offset, int count, | |
1162 | boolean zip) throws IOException { | |
1163 | return new String(unbase64(data, offset, count, zip), "UTF-8"); | |
1164 | } | |
ec1f3444 | 1165 | } |