bufixes
[nikiroo-utils.git] / src / be / nikiroo / utils / StringUtils.java
CommitLineData
ec1f3444
NR
1package be.nikiroo.utils;
2
ec1f3444 3import java.io.IOException;
a359464f
NR
4import java.io.InputStream;
5import java.io.OutputStream;
3f8349b7 6import java.io.UnsupportedEncodingException;
ec1f3444
NR
7import java.security.MessageDigest;
8import java.security.NoSuchAlgorithmException;
9import java.text.Normalizer;
10import java.text.Normalizer.Form;
11import java.text.ParseException;
12import java.text.SimpleDateFormat;
dc22eb95
NR
13import java.util.AbstractMap;
14import java.util.ArrayList;
ec1f3444 15import java.util.Date;
cc3e7291 16import java.util.List;
dc22eb95 17import java.util.Map.Entry;
ec1f3444
NR
18import java.util.regex.Pattern;
19
ec1f3444
NR
20import org.unbescape.html.HtmlEscape;
21import org.unbescape.html.HtmlEscapeLevel;
22import org.unbescape.html.HtmlEscapeType;
23
24/**
25 * This class offer some utilities based around {@link String}s.
26 *
27 * @author niki
28 */
29public class StringUtils {
30 /**
31 * This enum type will decide the alignment of a {@link String} when padding
cc3e7291
NR
32 * or justification is applied (if there is enough horizontal space for it
33 * to be aligned).
ec1f3444
NR
34 */
35 public enum Alignment {
36 /** Aligned at left. */
cc3e7291 37 LEFT,
ec1f3444 38 /** Centered. */
cc3e7291 39 CENTER,
ec1f3444 40 /** Aligned at right. */
cc3e7291
NR
41 RIGHT,
42 /** Full justified (to both left and right). */
43 JUSTIFY,
44
45 // Old Deprecated values:
46
47 /** DEPRECATED: please use LEFT. */
48 @Deprecated
49 Beginning,
50 /** DEPRECATED: please use CENTER. */
51 @Deprecated
52 Center,
53 /** DEPRECATED: please use RIGHT. */
54 @Deprecated
55 End;
56
57 /**
58 * Return the non-deprecated version of this enum if needed (or return
59 * self if not).
60 *
61 * @return the non-deprecated value
62 */
63 Alignment undeprecate() {
64 if (this == Beginning)
65 return LEFT;
66 if (this == Center)
67 return CENTER;
68 if (this == End)
69 return RIGHT;
70 return this;
71 }
ec1f3444
NR
72 }
73
e8aa5bf9 74 static private Pattern marks = getMarks();
ec1f3444
NR
75
76 /**
77 * Fix the size of the given {@link String} either with space-padding or by
78 * shortening it.
79 *
80 * @param text
81 * the {@link String} to fix
82 * @param width
83 * the size of the resulting {@link String} or -1 for a noop
84 *
85 * @return the resulting {@link String} of size <i>size</i>
86 */
87 static public String padString(String text, int width) {
451f434b 88 return padString(text, width, true, null);
ec1f3444
NR
89 }
90
91 /**
92 * Fix the size of the given {@link String} either with space-padding or by
93 * optionally shortening it.
94 *
95 * @param text
96 * the {@link String} to fix
97 * @param width
98 * the size of the resulting {@link String} if the text fits or
99 * if cut is TRUE or -1 for a noop
100 * @param cut
101 * cut the {@link String} shorter if needed
102 * @param align
103 * align the {@link String} in this position if we have enough
451f434b 104 * space (default is Alignment.Beginning)
ec1f3444
NR
105 *
106 * @return the resulting {@link String} of size <i>size</i> minimum
107 */
108 static public String padString(String text, int width, boolean cut,
109 Alignment align) {
110
451f434b 111 if (align == null) {
cc3e7291 112 align = Alignment.LEFT;
451f434b
NR
113 }
114
cc3e7291
NR
115 align = align.undeprecate();
116
ec1f3444
NR
117 if (width >= 0) {
118 if (text == null)
119 text = "";
120
121 int diff = width - text.length();
122
123 if (diff < 0) {
124 if (cut)
125 text = text.substring(0, width);
126 } else if (diff > 0) {
cc3e7291
NR
127 if (diff < 2 && align != Alignment.RIGHT)
128 align = Alignment.LEFT;
ec1f3444
NR
129
130 switch (align) {
cc3e7291 131 case RIGHT:
ec1f3444
NR
132 text = new String(new char[diff]).replace('\0', ' ') + text;
133 break;
cc3e7291 134 case CENTER:
ec1f3444
NR
135 int pad1 = (diff) / 2;
136 int pad2 = (diff + 1) / 2;
137 text = new String(new char[pad1]).replace('\0', ' ') + text
138 + new String(new char[pad2]).replace('\0', ' ');
139 break;
cc3e7291
NR
140 case LEFT:
141 default:
142 text = text + new String(new char[diff]).replace('\0', ' ');
143 break;
ec1f3444
NR
144 }
145 }
146 }
147
148 return text;
149 }
150
cc3e7291
NR
151 /**
152 * Justify a text into width-sized (at the maximum) lines.
153 *
154 * @param text
155 * the {@link String} to justify
156 * @param width
157 * the maximum size of the resulting lines
158 *
159 * @return a list of justified text lines
160 */
161 static public List<String> justifyText(String text, int width) {
162 return justifyText(text, width, null);
163 }
164
165 /**
166 * Justify a text into width-sized (at the maximum) lines.
167 *
168 * @param text
169 * the {@link String} to justify
170 * @param width
171 * the maximum size of the resulting lines
172 * @param align
173 * align the lines in this position (default is
174 * Alignment.Beginning)
175 *
176 * @return a list of justified text lines
177 */
178 static public List<String> justifyText(String text, int width,
179 Alignment align) {
180 if (align == null) {
181 align = Alignment.LEFT;
182 }
183
184 align = align.undeprecate();
185
186 switch (align) {
187 case CENTER:
188 return StringJustifier.center(text, width);
189 case RIGHT:
190 return StringJustifier.right(text, width);
191 case JUSTIFY:
192 return StringJustifier.full(text, width);
193 case LEFT:
194 default:
195 return StringJustifier.left(text, width);
196 }
197 }
198
dc22eb95
NR
199 /**
200 * Justify a text into width-sized (at the maximum) lines.
201 *
202 * @param text
203 * the {@link String} to justify
204 * @param width
205 * the maximum size of the resulting lines
206 *
207 * @return a list of justified text lines
208 */
209 static public List<String> justifyText(List<String> text, int width) {
210 return justifyText(text, width, null);
211 }
212
213 /**
214 * Justify a text into width-sized (at the maximum) lines.
215 *
216 * @param text
217 * the {@link String} to justify
218 * @param width
219 * the maximum size of the resulting lines
220 * @param align
221 * align the lines in this position (default is
222 * Alignment.Beginning)
223 *
224 * @return a list of justified text lines
225 */
226 static public List<String> justifyText(List<String> text, int width,
227 Alignment align) {
228 List<String> result = new ArrayList<String>();
229
230 // Content <-> Bullet spacing (null = no spacing)
231 List<Entry<String, String>> lines = new ArrayList<Entry<String, String>>();
232 StringBuilder previous = null;
233 StringBuilder tmp = new StringBuilder();
234 String previousItemBulletSpacing = null;
235 String itemBulletSpacing = null;
236 for (String inputLine : text) {
237 boolean previousLineComplete = true;
238
239 String current = inputLine.replace("\t", " ");
240 itemBulletSpacing = getItemSpacing(current);
241 boolean bullet = isItemLine(current);
242 if ((previousItemBulletSpacing == null || itemBulletSpacing
243 .length() <= previousItemBulletSpacing.length()) && !bullet) {
244 itemBulletSpacing = null;
245 }
246
247 if (itemBulletSpacing != null) {
248 current = current.trim();
249 if (!current.isEmpty() && bullet) {
250 current = current.substring(1);
251 }
252 current = current.trim();
253 previousLineComplete = bullet;
254 } else {
255 tmp.setLength(0);
256 for (String word : current.split(" ")) {
257 if (word.isEmpty()) {
258 continue;
259 }
260
261 if (tmp.length() > 0) {
262 tmp.append(' ');
263 }
264 tmp.append(word.trim());
265 }
266 current = tmp.toString();
267
268 previousLineComplete = current.isEmpty()
269 || previousItemBulletSpacing != null
c0c091af
NR
270 || (previous != null && isFullLine(previous))
271 || isHrLine(current) || isHrLine(previous);
dc22eb95
NR
272 }
273
274 if (previous == null) {
275 previous = new StringBuilder();
276 } else {
277 if (previousLineComplete) {
278 lines.add(new AbstractMap.SimpleEntry<String, String>(
279 previous.toString(), previousItemBulletSpacing));
280 previous.setLength(0);
281 previousItemBulletSpacing = itemBulletSpacing;
282 } else {
283 previous.append(' ');
284 }
285 }
286
287 previous.append(current);
288
289 }
290
291 if (previous != null) {
292 lines.add(new AbstractMap.SimpleEntry<String, String>(previous
293 .toString(), previousItemBulletSpacing));
294 }
295
296 for (Entry<String, String> line : lines) {
297 String content = line.getKey();
298 String spacing = line.getValue();
299
300 String bullet = "- ";
301 if (spacing == null) {
302 bullet = "";
303 spacing = "";
304 }
305
306 if (spacing.length() > width + 3) {
307 spacing = "";
308 }
309
310 for (String subline : StringUtils.justifyText(content, width
311 - (spacing.length() + bullet.length()), align)) {
312 result.add(spacing + bullet + subline);
313 if (!bullet.isEmpty()) {
314 bullet = " ";
315 }
316 }
317 }
318
319 return result;
320 }
321
ec1f3444
NR
322 /**
323 * Sanitise the given input to make it more Terminal-friendly by removing
324 * combining characters.
325 *
326 * @param input
327 * the input to sanitise
328 * @param allowUnicode
329 * allow Unicode or only allow ASCII Latin characters
330 *
331 * @return the sanitised {@link String}
332 */
333 static public String sanitize(String input, boolean allowUnicode) {
334 return sanitize(input, allowUnicode, !allowUnicode);
335 }
336
337 /**
338 * Sanitise the given input to make it more Terminal-friendly by removing
339 * combining characters.
340 *
341 * @param input
342 * the input to sanitise
343 * @param allowUnicode
344 * allow Unicode or only allow ASCII Latin characters
345 * @param removeAllAccents
346 * TRUE to replace all accentuated characters by their non
347 * accentuated counter-parts
348 *
349 * @return the sanitised {@link String}
350 */
351 static public String sanitize(String input, boolean allowUnicode,
352 boolean removeAllAccents) {
353
354 if (removeAllAccents) {
355 input = Normalizer.normalize(input, Form.NFKD);
e8aa5bf9
NR
356 if (marks != null) {
357 input = marks.matcher(input).replaceAll("");
358 }
ec1f3444
NR
359 }
360
361 input = Normalizer.normalize(input, Form.NFKC);
362
363 if (!allowUnicode) {
364 StringBuilder builder = new StringBuilder();
365 for (int index = 0; index < input.length(); index++) {
366 char car = input.charAt(index);
367 // displayable chars in ASCII are in the range 32<->255,
368 // except DEL (127)
369 if (car >= 32 && car <= 255 && car != 127) {
370 builder.append(car);
371 }
372 }
373 input = builder.toString();
374 }
375
376 return input;
377 }
378
379 /**
451f434b
NR
380 * Convert between the time in milliseconds to a {@link String} in a "fixed"
381 * way (to exchange data over the wire, for instance).
382 * <p>
383 * Precise to the second.
ec1f3444
NR
384 *
385 * @param time
451f434b
NR
386 * the specified number of milliseconds since the standard base
387 * time known as "the epoch", namely January 1, 1970, 00:00:00
388 * GMT
ec1f3444
NR
389 *
390 * @return the time as a {@link String}
391 */
392 static public String fromTime(long time) {
393 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
394 return sdf.format(new Date(time));
395 }
396
397 /**
451f434b 398 * Convert between the time as a {@link String} to milliseconds in a "fixed"
ec1f3444 399 * way (to exchange data over the wire, for instance).
451f434b
NR
400 * <p>
401 * Precise to the second.
ec1f3444 402 *
db31c358 403 * @param displayTime
ec1f3444
NR
404 * the time as a {@link String}
405 *
451f434b 406 * @return the number of milliseconds since the standard base time known as
e8aa5bf9
NR
407 * "the epoch", namely January 1, 1970, 00:00:00 GMT, or -1 in case
408 * of error
409 *
410 * @throws ParseException
411 * in case of parse error
ec1f3444 412 */
e8aa5bf9 413 static public long toTime(String displayTime) throws ParseException {
ec1f3444 414 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
e8aa5bf9 415 return sdf.parse(displayTime).getTime();
ec1f3444
NR
416 }
417
ec1f3444
NR
418 /**
419 * Return a hash of the given {@link String}.
420 *
421 * @param input
422 * the input data
423 *
424 * @return the hash
425 */
b771aed5 426 static public String getMd5Hash(String input) {
ec1f3444
NR
427 try {
428 MessageDigest md = MessageDigest.getInstance("MD5");
3f8349b7 429 md.update(input.getBytes("UTF-8"));
ec1f3444
NR
430 byte byteData[] = md.digest();
431
432 StringBuffer hexString = new StringBuffer();
433 for (int i = 0; i < byteData.length; i++) {
434 String hex = Integer.toHexString(0xff & byteData[i]);
435 if (hex.length() == 1)
436 hexString.append('0');
437 hexString.append(hex);
438 }
439
440 return hexString.toString();
441 } catch (NoSuchAlgorithmException e) {
442 return input;
3f8349b7
NR
443 } catch (UnsupportedEncodingException e) {
444 return input;
ec1f3444
NR
445 }
446 }
447
ec1f3444
NR
448 /**
449 * Remove the HTML content from the given input, and un-html-ize the rest.
450 *
451 * @param html
452 * the HTML-encoded content
453 *
454 * @return the HTML-free equivalent content
455 */
456 public static String unhtml(String html) {
457 StringBuilder builder = new StringBuilder();
458
459 int inTag = 0;
460 for (char car : html.toCharArray()) {
461 if (car == '<') {
462 inTag++;
463 } else if (car == '>') {
464 inTag--;
465 } else if (inTag <= 0) {
466 builder.append(car);
467 }
468 }
469
7ee9568b
NR
470 char nbsp = ' '; // non-breakable space (a special char)
471 char space = ' ';
472 return HtmlEscape.unescapeHtml(builder.toString()).replace(nbsp, space);
ec1f3444
NR
473 }
474
475 /**
476 * Escape the given {@link String} so it can be used in XML, as content.
477 *
478 * @param input
479 * the input {@link String}
480 *
481 * @return the escaped {@link String}
482 */
483 public static String xmlEscape(String input) {
484 if (input == null) {
485 return "";
486 }
487
488 return HtmlEscape.escapeHtml(input,
489 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
490 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
491 }
492
493 /**
494 * Escape the given {@link String} so it can be used in XML, as text content
495 * inside double-quotes.
496 *
497 * @param input
498 * the input {@link String}
499 *
500 * @return the escaped {@link String}
501 */
502 public static String xmlEscapeQuote(String input) {
503 if (input == null) {
504 return "";
505 }
506
507 return HtmlEscape.escapeHtml(input,
508 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
509 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
510 }
db31c358 511
80500544
NR
512 /**
513 * Zip the data and then encode it into Base64.
514 *
bb60bd13
NR
515 * @deprecated use {@link StringUtils#base64(byte[], boolean)} with the
516 * correct parameter instead
517 *
80500544
NR
518 * @param data
519 * the data
520 *
521 * @return the Base64 zipped version
522 */
bb60bd13 523 @Deprecated
db31c358
NR
524 public static String zip64(String data) {
525 try {
a359464f 526 return Base64.encodeBytes(data.getBytes("UTF-8"), Base64.GZIP);
db31c358
NR
527 } catch (IOException e) {
528 e.printStackTrace();
529 return null;
530 }
531 }
532
80500544
NR
533 /**
534 * Unconvert from Base64 then unzip the content.
535 *
bb60bd13
NR
536 * @deprecated use {@link StringUtils#unbase64s(String, boolean)} with the
537 * correct parameter instead
538 *
80500544
NR
539 * @param data
540 * the data in Base64 format
541 *
542 * @return the raw data
543 *
544 * @throws IOException
545 * in case of I/O error
546 */
bb60bd13 547 @Deprecated
db31c358 548 public static String unzip64(String data) throws IOException {
b6200792 549 return new String(Base64.decode(data, Base64.GZIP), "UTF-8");
db31c358 550 }
e8aa5bf9 551
a359464f
NR
552 /**
553 * Convert the given data to Base64 format.
554 *
555 * @param data
556 * the data to convert
557 * @param zip
558 * TRUE to also compress the data in GZIP format; remember that
559 * compressed and not-compressed content are different; you need
560 * to know which is which when decoding
561 *
562 * @return the Base64 {@link String} representation of the data
563 *
564 * @throws IOException
565 * in case of I/O errors
566 */
567 public static String base64(String data, boolean zip) throws IOException {
568 return base64(data.getBytes("UTF-8"), zip);
569 }
570
bb60bd13
NR
571 /**
572 * Convert the given data to Base64 format.
573 *
574 * @param data
575 * the data to convert
576 * @param zip
577 * TRUE to also compress the data in GZIP format; remember that
578 * compressed and not-compressed content are different; you need
579 * to know which is which when decoding
580 *
581 * @return the Base64 {@link String} representation of the data
582 *
583 * @throws IOException
584 * in case of I/O errors
585 */
586 public static String base64(byte[] data, boolean zip) throws IOException {
587 return Base64.encodeBytes(data, zip ? Base64.GZIP : Base64.NO_OPTIONS);
588 }
589
590 /**
a359464f
NR
591 * Convert the given data to Base64 format.
592 *
593 * @param data
594 * the data to convert
595 * @param zip
596 * TRUE to also uncompress the data from a GZIP format; take care
597 * about this flag, as it could easily cause errors in the
598 * returned content or an {@link IOException}
599 * @param breakLines
600 * TRUE to break lines on every 76th character
601 *
602 * @return the Base64 {@link String} representation of the data
603 *
604 * @throws IOException
605 * in case of I/O errors
606 */
607 public static OutputStream base64(OutputStream data, boolean zip,
608 boolean breakLines) throws IOException {
609 OutputStream out = new Base64.OutputStream(data,
610 breakLines ? Base64.DO_BREAK_LINES & Base64.ENCODE
611 : Base64.ENCODE);
612
613 if (zip) {
614 out = new java.util.zip.GZIPOutputStream(out);
615 }
616
617 return out;
618 }
619
620 /**
621 * Convert the given data to Base64 format.
622 *
623 * @param data
624 * the data to convert
625 * @param zip
626 * TRUE to also uncompress the data from a GZIP format; take care
627 * about this flag, as it could easily cause errors in the
628 * returned content or an {@link IOException}
629 * @param breakLines
630 * TRUE to break lines on every 76th character
631 *
632 * @return the Base64 {@link String} representation of the data
633 *
634 * @throws IOException
635 * in case of I/O errors
636 */
637 public static InputStream base64(InputStream data, boolean zip,
638 boolean breakLines) throws IOException {
639 if (zip) {
640 data = new java.util.zip.GZIPInputStream(data);
641 }
642
643 return new Base64.InputStream(data, breakLines ? Base64.DO_BREAK_LINES
644 & Base64.ENCODE : Base64.ENCODE);
645 }
646
647 /**
648 * Unconvert the given data from Base64 format back to a raw array of bytes.
b6200792
NR
649 * <p>
650 * Will automatically detect zipped data and also uncompress it before
651 * returning, unless ZIP is false.
bb60bd13
NR
652 *
653 * @param data
654 * the data to unconvert
655 * @param zip
b6200792
NR
656 * TRUE to also uncompress the data from a GZIP format
657 * automatically; if set to FALSE, zipped data can be returned
bb60bd13
NR
658 *
659 * @return the raw data represented by the given Base64 {@link String},
660 * optionally compressed with GZIP
661 *
662 * @throws IOException
663 * in case of I/O errors
664 */
665 public static byte[] unbase64(String data, boolean zip) throws IOException {
b6200792
NR
666 return Base64
667 .decode(data, zip ? Base64.NO_OPTIONS : Base64.DONT_GUNZIP);
bb60bd13
NR
668 }
669
a359464f
NR
670 /**
671 * Unconvert the given data from Base64 format back to a raw array of bytes.
672 *
673 * @param data
674 * the data to unconvert
675 * @param zip
676 * TRUE to also uncompress the data from a GZIP format; take care
677 * about this flag, as it could easily cause errors in the
678 * returned content or an {@link IOException}
a359464f
NR
679 *
680 * @return the raw data represented by the given Base64 {@link String}
681 *
682 * @throws IOException
683 * in case of I/O errors
684 */
b6200792
NR
685 public static OutputStream unbase64(OutputStream data, boolean zip)
686 throws IOException {
687 OutputStream out = new Base64.OutputStream(data, Base64.DECODE);
a359464f
NR
688
689 if (zip) {
690 out = new java.util.zip.GZIPOutputStream(out);
691 }
692
693 return out;
694 }
695
696 /**
697 * Unconvert the given data from Base64 format back to a raw array of bytes.
698 *
699 * @param data
700 * the data to unconvert
701 * @param zip
702 * TRUE to also uncompress the data from a GZIP format; take care
703 * about this flag, as it could easily cause errors in the
704 * returned content or an {@link IOException}
a359464f
NR
705 *
706 * @return the raw data represented by the given Base64 {@link String}
707 *
708 * @throws IOException
709 * in case of I/O errors
710 */
b6200792
NR
711 public static InputStream unbase64(InputStream data, boolean zip)
712 throws IOException {
a359464f
NR
713 if (zip) {
714 data = new java.util.zip.GZIPInputStream(data);
715 }
716
b6200792
NR
717 return new Base64.InputStream(data, Base64.DECODE);
718 }
719
720 /**
721 * Unconvert the given data from Base64 format back to a raw array of bytes.
722 * <p>
723 * Will automatically detect zipped data and also uncompress it before
724 * returning, unless ZIP is false.
725 *
726 * @param data
727 * the data to unconvert
728 * @param offset
729 * the offset at which to start taking the data (do not take the
730 * data before it into account)
731 * @param count
732 * the number of bytes to take into account (do not process after
733 * this number of bytes has been processed)
734 * @param zip
735 * TRUE to also uncompress the data from a GZIP format
736 * automatically; if set to FALSE, zipped data can be returned
737 *
738 * @return the raw data represented by the given Base64 {@link String}
739 *
740 * @throws IOException
741 * in case of I/O errors
742 */
743 public static byte[] unbase64(byte[] data, int offset, int count,
744 boolean zip) throws IOException {
745 return Base64.niki_decode(data, offset, count, zip ? Base64.NO_OPTIONS
746 : Base64.DONT_GUNZIP);
a359464f
NR
747 }
748
bb60bd13
NR
749 /**
750 * Unonvert the given data from Base64 format back to a {@link String}.
b6200792
NR
751 * <p>
752 * Will automatically detect zipped data and also uncompress it before
753 * returning, unless ZIP is false.
754 *
755 * @param data
756 * the data to unconvert
757 * @param zip
758 * TRUE to also uncompress the data from a GZIP format
759 * automatically; if set to FALSE, zipped data can be returned
760 *
761 * @return the {@link String} represented by the given Base64 {@link String}
762 * , optionally compressed with GZIP
763 *
764 * @throws IOException
765 * in case of I/O errors
766 */
767 public static String unbase64s(String data, boolean zip) throws IOException {
768 return new String(unbase64(data, zip), "UTF-8");
769 }
770
771 /**
772 * Unconvert the given data from Base64 format back into a {@link String}.
bb60bd13
NR
773 *
774 * @param data
775 * the data to unconvert
b6200792
NR
776 * @param offset
777 * the offset at which to start taking the data (do not take the
778 * data before it into account)
779 * @param count
780 * the number of bytes to take into account (do not process after
781 * this number of bytes has been processed)
bb60bd13
NR
782 * @param zip
783 * TRUE to also uncompress the data from a GZIP format; take care
784 * about this flag, as it could easily cause errors in the
785 * returned content or an {@link IOException}
786 *
787 * @return the {@link String} represented by the given Base64 {@link String}
788 * , optionally compressed with GZIP
789 *
790 * @throws IOException
791 * in case of I/O errors
792 */
b6200792
NR
793 public static String unbase64s(byte[] data, int offset, int count,
794 boolean zip) throws IOException {
795 return new String(unbase64(data, offset, count, zip), "UTF-8");
bb60bd13
NR
796 }
797
d1e63903
NR
798 /**
799 * Return a display {@link String} for the given value, which can be
800 * suffixed with "k" or "M" depending upon the number, if it is big enough.
801 * <p>
79961c53
NR
802 * <p>
803 * Examples:
d1e63903 804 * <ul>
79961c53
NR
805 * <li><tt>8 765</tt> becomes "8k"</li>
806 * <li><tt>998 765</tt> becomes "998k"</li>
807 * <li><tt>12 987 364</tt> becomes "12M"</li>
808 * <li><tt>5 534 333 221</tt> becomes "5G"</li>
d1e63903
NR
809 * </ul>
810 *
811 * @param value
812 * the value to convert
813 *
814 * @return the display value
815 */
816 public static String formatNumber(long value) {
5b46737c 817 return formatNumber(value, 0);
d1e63903
NR
818 }
819
820 /**
821 * Return a display {@link String} for the given value, which can be
822 * suffixed with "k" or "M" depending upon the number, if it is big enough.
823 * <p>
79961c53 824 * Examples (assuming decimalPositions = 1):
d1e63903 825 * <ul>
79961c53
NR
826 * <li><tt>8 765</tt> becomes "8.7k"</li>
827 * <li><tt>998 765</tt> becomes "998.7k"</li>
828 * <li><tt>12 987 364</tt> becomes "12.9M"</li>
829 * <li><tt>5 534 333 221</tt> becomes "5.5G"</li>
d1e63903
NR
830 * </ul>
831 *
832 * @param value
833 * the value to convert
5b46737c
NR
834 * @param decimalPositions
835 * the number of decimal positions to keep
d1e63903
NR
836 *
837 * @return the display value
838 */
5b46737c 839 public static String formatNumber(long value, int decimalPositions) {
79961c53 840 long userValue = value;
5b46737c 841 String suffix = "";
79961c53 842 long mult = 1;
5b46737c 843
8758aebb 844 if (value >= 1000000000l) {
79961c53
NR
845 mult = 1000000000l;
846 userValue = value / 1000000000l;
39d16a80 847 suffix = " G";
8758aebb 848 } else if (value >= 1000000l) {
79961c53
NR
849 mult = 1000000l;
850 userValue = value / 1000000l;
39d16a80 851 suffix = " M";
5b46737c 852 } else if (value >= 1000l) {
79961c53
NR
853 mult = 1000l;
854 userValue = value / 1000l;
39d16a80 855 suffix = " k";
d1e63903
NR
856 }
857
79961c53 858 String deci = "";
5b46737c 859 if (decimalPositions > 0) {
79961c53
NR
860 deci = Long.toString(value % mult);
861 int size = Long.toString(mult).length() - 1;
862 while (deci.length() < size) {
863 deci = "0" + deci;
864 }
865
5b46737c
NR
866 deci = deci.substring(0, Math.min(decimalPositions, deci.length()));
867 while (deci.length() < decimalPositions) {
868 deci += "0";
869 }
79961c53 870
5b46737c 871 deci = "." + deci;
d1e63903
NR
872 }
873
79961c53 874 return Long.toString(userValue) + deci + suffix;
d1e63903
NR
875 }
876
60033478
NR
877 /**
878 * The reverse operation to {@link StringUtils#formatNumber(long)}: it will
879 * read a "display" number that can contain a "M" or "k" suffix and return
880 * the full value.
881 * <p>
882 * Of course, the conversion to and from display form is lossy (example:
5b46737c 883 * <tt>6870</tt> to "6.5k" to <tt>6500</tt>).
60033478
NR
884 *
885 * @param value
886 * the value in display form with possible "M" and "k" suffixes,
887 * can be NULL
888 *
889 * @return the value as a number, or 0 if not possible to convert
890 */
891 public static long toNumber(String value) {
892 return toNumber(value, 0l);
893 }
894
895 /**
896 * The reverse operation to {@link StringUtils#formatNumber(long)}: it will
897 * read a "display" number that can contain a "M" or "k" suffix and return
898 * the full value.
899 * <p>
900 * Of course, the conversion to and from display form is lossy (example:
5b46737c 901 * <tt>6870</tt> to "6.5k" to <tt>6500</tt>).
60033478
NR
902 *
903 * @param value
904 * the value in display form with possible "M" and "k" suffixes,
905 * can be NULL
906 * @param def
907 * the default value if it is not possible to convert the given
908 * value to a number
909 *
910 * @return the value as a number, or 0 if not possible to convert
911 */
912 public static long toNumber(String value, long def) {
913 long count = def;
914 if (value != null) {
5b46737c 915 value = value.trim().toLowerCase();
60033478 916 try {
79961c53
NR
917 long mult = 1;
918 if (value.endsWith("g")) {
919 value = value.substring(0, value.length() - 1).trim();
920 mult = 1000000000;
921 } else if (value.endsWith("m")) {
5b46737c
NR
922 value = value.substring(0, value.length() - 1).trim();
923 mult = 1000000;
924 } else if (value.endsWith("k")) {
925 value = value.substring(0, value.length() - 1).trim();
926 mult = 1000;
927 }
928
929 long deci = 0;
930 if (value.contains(".")) {
931 String[] tab = value.split("\\.");
932 if (tab.length != 2) {
933 throw new NumberFormatException(value);
934 }
935 double decimal = Double.parseDouble("0."
936 + tab[tab.length - 1]);
937 deci = ((long) (mult * decimal));
938 value = tab[0];
60033478 939 }
5b46737c
NR
940 count = mult * Long.parseLong(value) + deci;
941 } catch (Exception e) {
60033478
NR
942 }
943 }
944
945 return count;
946 }
947
e8aa5bf9
NR
948 /**
949 * The "remove accents" pattern.
950 *
951 * @return the pattern, or NULL if a problem happens
952 */
953 private static Pattern getMarks() {
954 try {
955 return Pattern
956 .compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
957 } catch (Exception e) {
958 // Can fail on Android...
959 return null;
960 }
961 }
dc22eb95 962
bb60bd13 963 //
dc22eb95 964 // justify List<String> related:
bb60bd13 965 //
dc22eb95 966
bb60bd13
NR
967 /**
968 * Check if this line ends as a complete line (ends with a "." or similar).
969 * <p>
970 * Note that we consider an empty line as full, and a line ending with
971 * spaces as not complete.
972 *
973 * @param line
974 * the line to check
975 *
976 * @return TRUE if it does
977 */
dc22eb95 978 static private boolean isFullLine(StringBuilder line) {
bb60bd13
NR
979 if (line.length() == 0) {
980 return true;
981 }
982
983 char lastCar = line.charAt(line.length() - 1);
984 switch (lastCar) {
985 case '.': // points
986 case '?':
987 case '!':
988
989 case '\'': // quotes
990 case '‘':
991 case '’':
992
993 case '"': // double quotes
994 case '”':
995 case '“':
996 case '»':
997 case '«':
998 return true;
999 default:
1000 return false;
1001 }
dc22eb95
NR
1002 }
1003
bb60bd13
NR
1004 /**
1005 * Check if this line represent an item in a list or description (i.e.,
1006 * check that the first non-space char is "-").
1007 *
1008 * @param line
1009 * the line to check
1010 *
1011 * @return TRUE if it is
1012 */
dc22eb95
NR
1013 static private boolean isItemLine(String line) {
1014 String spacing = getItemSpacing(line);
c0c091af
NR
1015 return spacing != null && !spacing.isEmpty()
1016 && line.charAt(spacing.length()) == '-';
dc22eb95
NR
1017 }
1018
bb60bd13
NR
1019 /**
1020 * Return all the spaces that start this line (or Empty if none).
1021 *
1022 * @param line
1023 * the line to get the starting spaces from
1024 *
1025 * @return the left spacing
1026 */
dc22eb95
NR
1027 static private String getItemSpacing(String line) {
1028 int i;
1029 for (i = 0; i < line.length(); i++) {
1030 if (line.charAt(i) != ' ') {
1031 return line.substring(0, i);
1032 }
1033 }
1034
1035 return "";
1036 }
c0c091af 1037
bb60bd13
NR
1038 /**
1039 * This line is an horizontal spacer line.
1040 *
1041 * @param line
1042 * the line to test
1043 *
1044 * @return TRUE if it is
1045 */
c0c091af
NR
1046 static private boolean isHrLine(CharSequence line) {
1047 int count = 0;
1048 if (line != null) {
1049 for (int i = 0; i < line.length(); i++) {
1050 char car = line.charAt(i);
1051 if (car == ' ' || car == '\t' || car == '*' || car == '-'
1052 || car == '_' || car == '~' || car == '=' || car == '/'
1053 || car == '\\') {
1054 count++;
1055 } else {
1056 return false;
1057 }
1058 }
1059 }
1060
1061 return count > 2;
1062 }
ec1f3444 1063}