VERSION
[nikiroo-utils.git] / src / be / nikiroo / utils / StringUtils.java
CommitLineData
ec1f3444
NR
1package be.nikiroo.utils;
2
ec1f3444 3import java.io.IOException;
a359464f
NR
4import java.io.InputStream;
5import java.io.OutputStream;
3f8349b7 6import java.io.UnsupportedEncodingException;
ec1f3444
NR
7import java.security.MessageDigest;
8import java.security.NoSuchAlgorithmException;
9import java.text.Normalizer;
10import java.text.Normalizer.Form;
11import java.text.ParseException;
12import java.text.SimpleDateFormat;
dc22eb95
NR
13import java.util.AbstractMap;
14import java.util.ArrayList;
ec1f3444 15import java.util.Date;
cc3e7291 16import java.util.List;
dc22eb95 17import java.util.Map.Entry;
ec1f3444
NR
18import java.util.regex.Pattern;
19
ec1f3444
NR
20import org.unbescape.html.HtmlEscape;
21import org.unbescape.html.HtmlEscapeLevel;
22import org.unbescape.html.HtmlEscapeType;
23
f28a134e
NR
24import be.nikiroo.utils.streams.Base64InputStream;
25import be.nikiroo.utils.streams.Base64OutputStream;
26
ec1f3444
NR
27/**
28 * This class offer some utilities based around {@link String}s.
29 *
30 * @author niki
31 */
32public class StringUtils {
33 /**
34 * This enum type will decide the alignment of a {@link String} when padding
cc3e7291
NR
35 * or justification is applied (if there is enough horizontal space for it
36 * to be aligned).
ec1f3444
NR
37 */
38 public enum Alignment {
39 /** Aligned at left. */
cc3e7291 40 LEFT,
ec1f3444 41 /** Centered. */
cc3e7291 42 CENTER,
ec1f3444 43 /** Aligned at right. */
cc3e7291
NR
44 RIGHT,
45 /** Full justified (to both left and right). */
46 JUSTIFY,
47
48 // Old Deprecated values:
49
50 /** DEPRECATED: please use LEFT. */
51 @Deprecated
52 Beginning,
53 /** DEPRECATED: please use CENTER. */
54 @Deprecated
55 Center,
56 /** DEPRECATED: please use RIGHT. */
57 @Deprecated
58 End;
59
60 /**
61 * Return the non-deprecated version of this enum if needed (or return
62 * self if not).
63 *
64 * @return the non-deprecated value
65 */
66 Alignment undeprecate() {
67 if (this == Beginning)
68 return LEFT;
69 if (this == Center)
70 return CENTER;
71 if (this == End)
72 return RIGHT;
73 return this;
74 }
ec1f3444
NR
75 }
76
e8aa5bf9 77 static private Pattern marks = getMarks();
ec1f3444
NR
78
79 /**
80 * Fix the size of the given {@link String} either with space-padding or by
81 * shortening it.
82 *
83 * @param text
84 * the {@link String} to fix
85 * @param width
86 * the size of the resulting {@link String} or -1 for a noop
87 *
88 * @return the resulting {@link String} of size <i>size</i>
89 */
90 static public String padString(String text, int width) {
451f434b 91 return padString(text, width, true, null);
ec1f3444
NR
92 }
93
94 /**
95 * Fix the size of the given {@link String} either with space-padding or by
96 * optionally shortening it.
97 *
98 * @param text
99 * the {@link String} to fix
100 * @param width
101 * the size of the resulting {@link String} if the text fits or
102 * if cut is TRUE or -1 for a noop
103 * @param cut
104 * cut the {@link String} shorter if needed
105 * @param align
106 * align the {@link String} in this position if we have enough
451f434b 107 * space (default is Alignment.Beginning)
ec1f3444
NR
108 *
109 * @return the resulting {@link String} of size <i>size</i> minimum
110 */
111 static public String padString(String text, int width, boolean cut,
112 Alignment align) {
113
451f434b 114 if (align == null) {
cc3e7291 115 align = Alignment.LEFT;
451f434b
NR
116 }
117
cc3e7291
NR
118 align = align.undeprecate();
119
ec1f3444
NR
120 if (width >= 0) {
121 if (text == null)
122 text = "";
123
124 int diff = width - text.length();
125
126 if (diff < 0) {
127 if (cut)
128 text = text.substring(0, width);
129 } else if (diff > 0) {
cc3e7291
NR
130 if (diff < 2 && align != Alignment.RIGHT)
131 align = Alignment.LEFT;
ec1f3444
NR
132
133 switch (align) {
cc3e7291 134 case RIGHT:
ec1f3444
NR
135 text = new String(new char[diff]).replace('\0', ' ') + text;
136 break;
cc3e7291 137 case CENTER:
ec1f3444
NR
138 int pad1 = (diff) / 2;
139 int pad2 = (diff + 1) / 2;
140 text = new String(new char[pad1]).replace('\0', ' ') + text
141 + new String(new char[pad2]).replace('\0', ' ');
142 break;
cc3e7291
NR
143 case LEFT:
144 default:
145 text = text + new String(new char[diff]).replace('\0', ' ');
146 break;
ec1f3444
NR
147 }
148 }
149 }
150
151 return text;
152 }
153
cc3e7291
NR
154 /**
155 * Justify a text into width-sized (at the maximum) lines.
156 *
157 * @param text
158 * the {@link String} to justify
159 * @param width
160 * the maximum size of the resulting lines
161 *
162 * @return a list of justified text lines
163 */
164 static public List<String> justifyText(String text, int width) {
165 return justifyText(text, width, null);
166 }
167
168 /**
169 * Justify a text into width-sized (at the maximum) lines.
170 *
171 * @param text
172 * the {@link String} to justify
173 * @param width
174 * the maximum size of the resulting lines
175 * @param align
176 * align the lines in this position (default is
177 * Alignment.Beginning)
178 *
179 * @return a list of justified text lines
180 */
181 static public List<String> justifyText(String text, int width,
182 Alignment align) {
183 if (align == null) {
184 align = Alignment.LEFT;
185 }
186
187 align = align.undeprecate();
188
189 switch (align) {
190 case CENTER:
191 return StringJustifier.center(text, width);
192 case RIGHT:
193 return StringJustifier.right(text, width);
194 case JUSTIFY:
195 return StringJustifier.full(text, width);
196 case LEFT:
197 default:
198 return StringJustifier.left(text, width);
199 }
200 }
201
dc22eb95
NR
202 /**
203 * Justify a text into width-sized (at the maximum) lines.
204 *
205 * @param text
206 * the {@link String} to justify
207 * @param width
208 * the maximum size of the resulting lines
209 *
210 * @return a list of justified text lines
211 */
212 static public List<String> justifyText(List<String> text, int width) {
213 return justifyText(text, width, null);
214 }
215
216 /**
217 * Justify a text into width-sized (at the maximum) lines.
218 *
219 * @param text
220 * the {@link String} to justify
221 * @param width
222 * the maximum size of the resulting lines
223 * @param align
224 * align the lines in this position (default is
225 * Alignment.Beginning)
226 *
227 * @return a list of justified text lines
228 */
229 static public List<String> justifyText(List<String> text, int width,
230 Alignment align) {
231 List<String> result = new ArrayList<String>();
232
233 // Content <-> Bullet spacing (null = no spacing)
234 List<Entry<String, String>> lines = new ArrayList<Entry<String, String>>();
235 StringBuilder previous = null;
236 StringBuilder tmp = new StringBuilder();
237 String previousItemBulletSpacing = null;
238 String itemBulletSpacing = null;
239 for (String inputLine : text) {
240 boolean previousLineComplete = true;
241
242 String current = inputLine.replace("\t", " ");
243 itemBulletSpacing = getItemSpacing(current);
244 boolean bullet = isItemLine(current);
245 if ((previousItemBulletSpacing == null || itemBulletSpacing
246 .length() <= previousItemBulletSpacing.length()) && !bullet) {
247 itemBulletSpacing = null;
248 }
249
250 if (itemBulletSpacing != null) {
251 current = current.trim();
252 if (!current.isEmpty() && bullet) {
253 current = current.substring(1);
254 }
255 current = current.trim();
256 previousLineComplete = bullet;
257 } else {
258 tmp.setLength(0);
259 for (String word : current.split(" ")) {
260 if (word.isEmpty()) {
261 continue;
262 }
263
264 if (tmp.length() > 0) {
265 tmp.append(' ');
266 }
267 tmp.append(word.trim());
268 }
269 current = tmp.toString();
270
271 previousLineComplete = current.isEmpty()
272 || previousItemBulletSpacing != null
c0c091af
NR
273 || (previous != null && isFullLine(previous))
274 || isHrLine(current) || isHrLine(previous);
dc22eb95
NR
275 }
276
277 if (previous == null) {
278 previous = new StringBuilder();
279 } else {
280 if (previousLineComplete) {
281 lines.add(new AbstractMap.SimpleEntry<String, String>(
282 previous.toString(), previousItemBulletSpacing));
283 previous.setLength(0);
284 previousItemBulletSpacing = itemBulletSpacing;
285 } else {
286 previous.append(' ');
287 }
288 }
289
290 previous.append(current);
291
292 }
293
294 if (previous != null) {
295 lines.add(new AbstractMap.SimpleEntry<String, String>(previous
296 .toString(), previousItemBulletSpacing));
297 }
298
299 for (Entry<String, String> line : lines) {
300 String content = line.getKey();
301 String spacing = line.getValue();
302
303 String bullet = "- ";
304 if (spacing == null) {
305 bullet = "";
306 spacing = "";
307 }
308
309 if (spacing.length() > width + 3) {
310 spacing = "";
311 }
312
313 for (String subline : StringUtils.justifyText(content, width
314 - (spacing.length() + bullet.length()), align)) {
315 result.add(spacing + bullet + subline);
316 if (!bullet.isEmpty()) {
317 bullet = " ";
318 }
319 }
320 }
321
322 return result;
323 }
324
ec1f3444
NR
325 /**
326 * Sanitise the given input to make it more Terminal-friendly by removing
327 * combining characters.
328 *
329 * @param input
330 * the input to sanitise
331 * @param allowUnicode
332 * allow Unicode or only allow ASCII Latin characters
333 *
334 * @return the sanitised {@link String}
335 */
336 static public String sanitize(String input, boolean allowUnicode) {
337 return sanitize(input, allowUnicode, !allowUnicode);
338 }
339
340 /**
341 * Sanitise the given input to make it more Terminal-friendly by removing
342 * combining characters.
343 *
344 * @param input
345 * the input to sanitise
346 * @param allowUnicode
347 * allow Unicode or only allow ASCII Latin characters
348 * @param removeAllAccents
349 * TRUE to replace all accentuated characters by their non
350 * accentuated counter-parts
351 *
352 * @return the sanitised {@link String}
353 */
354 static public String sanitize(String input, boolean allowUnicode,
355 boolean removeAllAccents) {
356
357 if (removeAllAccents) {
358 input = Normalizer.normalize(input, Form.NFKD);
e8aa5bf9
NR
359 if (marks != null) {
360 input = marks.matcher(input).replaceAll("");
361 }
ec1f3444
NR
362 }
363
364 input = Normalizer.normalize(input, Form.NFKC);
365
366 if (!allowUnicode) {
367 StringBuilder builder = new StringBuilder();
368 for (int index = 0; index < input.length(); index++) {
369 char car = input.charAt(index);
370 // displayable chars in ASCII are in the range 32<->255,
371 // except DEL (127)
372 if (car >= 32 && car <= 255 && car != 127) {
373 builder.append(car);
374 }
375 }
376 input = builder.toString();
377 }
378
379 return input;
380 }
381
382 /**
451f434b
NR
383 * Convert between the time in milliseconds to a {@link String} in a "fixed"
384 * way (to exchange data over the wire, for instance).
385 * <p>
386 * Precise to the second.
ec1f3444
NR
387 *
388 * @param time
451f434b
NR
389 * the specified number of milliseconds since the standard base
390 * time known as "the epoch", namely January 1, 1970, 00:00:00
391 * GMT
ec1f3444
NR
392 *
393 * @return the time as a {@link String}
394 */
395 static public String fromTime(long time) {
396 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
397 return sdf.format(new Date(time));
398 }
399
400 /**
451f434b 401 * Convert between the time as a {@link String} to milliseconds in a "fixed"
ec1f3444 402 * way (to exchange data over the wire, for instance).
451f434b
NR
403 * <p>
404 * Precise to the second.
ec1f3444 405 *
db31c358 406 * @param displayTime
ec1f3444
NR
407 * the time as a {@link String}
408 *
451f434b 409 * @return the number of milliseconds since the standard base time known as
e8aa5bf9
NR
410 * "the epoch", namely January 1, 1970, 00:00:00 GMT, or -1 in case
411 * of error
412 *
413 * @throws ParseException
414 * in case of parse error
ec1f3444 415 */
e8aa5bf9 416 static public long toTime(String displayTime) throws ParseException {
ec1f3444 417 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
e8aa5bf9 418 return sdf.parse(displayTime).getTime();
ec1f3444
NR
419 }
420
ec1f3444
NR
421 /**
422 * Return a hash of the given {@link String}.
423 *
424 * @param input
425 * the input data
426 *
427 * @return the hash
428 */
b771aed5 429 static public String getMd5Hash(String input) {
ec1f3444
NR
430 try {
431 MessageDigest md = MessageDigest.getInstance("MD5");
3f8349b7 432 md.update(input.getBytes("UTF-8"));
ec1f3444
NR
433 byte byteData[] = md.digest();
434
435 StringBuffer hexString = new StringBuffer();
436 for (int i = 0; i < byteData.length; i++) {
437 String hex = Integer.toHexString(0xff & byteData[i]);
438 if (hex.length() == 1)
439 hexString.append('0');
440 hexString.append(hex);
441 }
442
443 return hexString.toString();
444 } catch (NoSuchAlgorithmException e) {
445 return input;
3f8349b7
NR
446 } catch (UnsupportedEncodingException e) {
447 return input;
ec1f3444
NR
448 }
449 }
450
ec1f3444
NR
451 /**
452 * Remove the HTML content from the given input, and un-html-ize the rest.
453 *
454 * @param html
455 * the HTML-encoded content
456 *
457 * @return the HTML-free equivalent content
458 */
459 public static String unhtml(String html) {
460 StringBuilder builder = new StringBuilder();
461
462 int inTag = 0;
463 for (char car : html.toCharArray()) {
464 if (car == '<') {
465 inTag++;
466 } else if (car == '>') {
467 inTag--;
468 } else if (inTag <= 0) {
469 builder.append(car);
470 }
471 }
472
7ee9568b
NR
473 char nbsp = ' '; // non-breakable space (a special char)
474 char space = ' ';
475 return HtmlEscape.unescapeHtml(builder.toString()).replace(nbsp, space);
ec1f3444
NR
476 }
477
478 /**
479 * Escape the given {@link String} so it can be used in XML, as content.
480 *
481 * @param input
482 * the input {@link String}
483 *
484 * @return the escaped {@link String}
485 */
486 public static String xmlEscape(String input) {
487 if (input == null) {
488 return "";
489 }
490
491 return HtmlEscape.escapeHtml(input,
492 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
493 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
494 }
495
496 /**
497 * Escape the given {@link String} so it can be used in XML, as text content
498 * inside double-quotes.
499 *
500 * @param input
501 * the input {@link String}
502 *
503 * @return the escaped {@link String}
504 */
505 public static String xmlEscapeQuote(String input) {
506 if (input == null) {
507 return "";
508 }
509
510 return HtmlEscape.escapeHtml(input,
511 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
512 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
513 }
db31c358 514
80500544
NR
515 /**
516 * Zip the data and then encode it into Base64.
517 *
bb60bd13
NR
518 * @deprecated use {@link StringUtils#base64(byte[], boolean)} with the
519 * correct parameter instead
520 *
80500544
NR
521 * @param data
522 * the data
523 *
524 * @return the Base64 zipped version
525 */
bb60bd13 526 @Deprecated
db31c358
NR
527 public static String zip64(String data) {
528 try {
a359464f 529 return Base64.encodeBytes(data.getBytes("UTF-8"), Base64.GZIP);
db31c358
NR
530 } catch (IOException e) {
531 e.printStackTrace();
532 return null;
533 }
534 }
535
80500544
NR
536 /**
537 * Unconvert from Base64 then unzip the content.
538 *
bb60bd13
NR
539 * @deprecated use {@link StringUtils#unbase64s(String, boolean)} with the
540 * correct parameter instead
541 *
80500544
NR
542 * @param data
543 * the data in Base64 format
544 *
545 * @return the raw data
546 *
547 * @throws IOException
548 * in case of I/O error
549 */
bb60bd13 550 @Deprecated
db31c358 551 public static String unzip64(String data) throws IOException {
b6200792 552 return new String(Base64.decode(data, Base64.GZIP), "UTF-8");
db31c358 553 }
e8aa5bf9 554
a359464f
NR
555 /**
556 * Convert the given data to Base64 format.
557 *
558 * @param data
559 * the data to convert
560 * @param zip
561 * TRUE to also compress the data in GZIP format; remember that
562 * compressed and not-compressed content are different; you need
563 * to know which is which when decoding
564 *
565 * @return the Base64 {@link String} representation of the data
566 *
567 * @throws IOException
568 * in case of I/O errors
569 */
570 public static String base64(String data, boolean zip) throws IOException {
571 return base64(data.getBytes("UTF-8"), zip);
572 }
573
bb60bd13
NR
574 /**
575 * Convert the given data to Base64 format.
576 *
577 * @param data
578 * the data to convert
579 * @param zip
580 * TRUE to also compress the data in GZIP format; remember that
581 * compressed and not-compressed content are different; you need
582 * to know which is which when decoding
583 *
584 * @return the Base64 {@link String} representation of the data
585 *
586 * @throws IOException
587 * in case of I/O errors
588 */
589 public static String base64(byte[] data, boolean zip) throws IOException {
590 return Base64.encodeBytes(data, zip ? Base64.GZIP : Base64.NO_OPTIONS);
591 }
592
593 /**
a359464f
NR
594 * Convert the given data to Base64 format.
595 *
596 * @param data
597 * the data to convert
598 * @param zip
599 * TRUE to also uncompress the data from a GZIP format; take care
600 * about this flag, as it could easily cause errors in the
601 * returned content or an {@link IOException}
602 * @param breakLines
603 * TRUE to break lines on every 76th character
604 *
605 * @return the Base64 {@link String} representation of the data
606 *
607 * @throws IOException
608 * in case of I/O errors
609 */
f28a134e 610 @Deprecated
a359464f
NR
611 public static OutputStream base64(OutputStream data, boolean zip,
612 boolean breakLines) throws IOException {
f28a134e
NR
613 OutputStream out = new Base64OutputStream(data, true);
614
a359464f
NR
615 if (zip) {
616 out = new java.util.zip.GZIPOutputStream(out);
617 }
618
619 return out;
620 }
621
622 /**
623 * Convert the given data to Base64 format.
624 *
625 * @param data
626 * the data to convert
627 * @param zip
628 * TRUE to also uncompress the data from a GZIP format; take care
629 * about this flag, as it could easily cause errors in the
630 * returned content or an {@link IOException}
631 * @param breakLines
632 * TRUE to break lines on every 76th character
633 *
634 * @return the Base64 {@link String} representation of the data
635 *
636 * @throws IOException
637 * in case of I/O errors
638 */
f28a134e 639 @Deprecated
a359464f
NR
640 public static InputStream base64(InputStream data, boolean zip,
641 boolean breakLines) throws IOException {
642 if (zip) {
643 data = new java.util.zip.GZIPInputStream(data);
644 }
645
f28a134e 646 return new Base64InputStream(data, true);
a359464f
NR
647 }
648
649 /**
650 * Unconvert the given data from Base64 format back to a raw array of bytes.
b6200792
NR
651 * <p>
652 * Will automatically detect zipped data and also uncompress it before
653 * returning, unless ZIP is false.
bb60bd13
NR
654 *
655 * @param data
656 * the data to unconvert
657 * @param zip
b6200792
NR
658 * TRUE to also uncompress the data from a GZIP format
659 * automatically; if set to FALSE, zipped data can be returned
bb60bd13
NR
660 *
661 * @return the raw data represented by the given Base64 {@link String},
662 * optionally compressed with GZIP
663 *
664 * @throws IOException
665 * in case of I/O errors
666 */
667 public static byte[] unbase64(String data, boolean zip) throws IOException {
b6200792
NR
668 return Base64
669 .decode(data, zip ? Base64.NO_OPTIONS : Base64.DONT_GUNZIP);
bb60bd13
NR
670 }
671
a359464f
NR
672 /**
673 * Unconvert the given data from Base64 format back to a raw array of bytes.
674 *
675 * @param data
676 * the data to unconvert
677 * @param zip
678 * TRUE to also uncompress the data from a GZIP format; take care
679 * about this flag, as it could easily cause errors in the
680 * returned content or an {@link IOException}
a359464f
NR
681 *
682 * @return the raw data represented by the given Base64 {@link String}
683 *
684 * @throws IOException
685 * in case of I/O errors
686 */
f28a134e 687 @Deprecated
b6200792
NR
688 public static OutputStream unbase64(OutputStream data, boolean zip)
689 throws IOException {
f28a134e
NR
690 OutputStream out = new Base64OutputStream(data, false);
691
a359464f
NR
692
693 if (zip) {
694 out = new java.util.zip.GZIPOutputStream(out);
695 }
696
697 return out;
698 }
699
700 /**
701 * Unconvert the given data from Base64 format back to a raw array of bytes.
702 *
703 * @param data
704 * the data to unconvert
705 * @param zip
706 * TRUE to also uncompress the data from a GZIP format; take care
707 * about this flag, as it could easily cause errors in the
708 * returned content or an {@link IOException}
a359464f
NR
709 *
710 * @return the raw data represented by the given Base64 {@link String}
711 *
712 * @throws IOException
713 * in case of I/O errors
714 */
f28a134e 715 @Deprecated
b6200792
NR
716 public static InputStream unbase64(InputStream data, boolean zip)
717 throws IOException {
a359464f
NR
718 if (zip) {
719 data = new java.util.zip.GZIPInputStream(data);
720 }
721
f28a134e 722 return new Base64InputStream(data, false);
b6200792
NR
723 }
724
725 /**
726 * Unconvert the given data from Base64 format back to a raw array of bytes.
727 * <p>
728 * Will automatically detect zipped data and also uncompress it before
729 * returning, unless ZIP is false.
730 *
731 * @param data
732 * the data to unconvert
733 * @param offset
734 * the offset at which to start taking the data (do not take the
735 * data before it into account)
736 * @param count
737 * the number of bytes to take into account (do not process after
738 * this number of bytes has been processed)
739 * @param zip
740 * TRUE to also uncompress the data from a GZIP format
741 * automatically; if set to FALSE, zipped data can be returned
742 *
743 * @return the raw data represented by the given Base64 {@link String}
744 *
745 * @throws IOException
746 * in case of I/O errors
747 */
748 public static byte[] unbase64(byte[] data, int offset, int count,
749 boolean zip) throws IOException {
750 return Base64.niki_decode(data, offset, count, zip ? Base64.NO_OPTIONS
751 : Base64.DONT_GUNZIP);
a359464f
NR
752 }
753
bb60bd13
NR
754 /**
755 * Unonvert the given data from Base64 format back to a {@link String}.
b6200792
NR
756 * <p>
757 * Will automatically detect zipped data and also uncompress it before
758 * returning, unless ZIP is false.
759 *
760 * @param data
761 * the data to unconvert
762 * @param zip
763 * TRUE to also uncompress the data from a GZIP format
764 * automatically; if set to FALSE, zipped data can be returned
765 *
766 * @return the {@link String} represented by the given Base64 {@link String}
767 * , optionally compressed with GZIP
768 *
769 * @throws IOException
770 * in case of I/O errors
771 */
772 public static String unbase64s(String data, boolean zip) throws IOException {
773 return new String(unbase64(data, zip), "UTF-8");
774 }
775
776 /**
777 * Unconvert the given data from Base64 format back into a {@link String}.
bb60bd13
NR
778 *
779 * @param data
780 * the data to unconvert
b6200792
NR
781 * @param offset
782 * the offset at which to start taking the data (do not take the
783 * data before it into account)
784 * @param count
785 * the number of bytes to take into account (do not process after
786 * this number of bytes has been processed)
bb60bd13
NR
787 * @param zip
788 * TRUE to also uncompress the data from a GZIP format; take care
789 * about this flag, as it could easily cause errors in the
790 * returned content or an {@link IOException}
791 *
792 * @return the {@link String} represented by the given Base64 {@link String}
793 * , optionally compressed with GZIP
794 *
795 * @throws IOException
796 * in case of I/O errors
797 */
b6200792
NR
798 public static String unbase64s(byte[] data, int offset, int count,
799 boolean zip) throws IOException {
800 return new String(unbase64(data, offset, count, zip), "UTF-8");
bb60bd13
NR
801 }
802
d1e63903
NR
803 /**
804 * Return a display {@link String} for the given value, which can be
805 * suffixed with "k" or "M" depending upon the number, if it is big enough.
806 * <p>
79961c53
NR
807 * <p>
808 * Examples:
d1e63903 809 * <ul>
79961c53
NR
810 * <li><tt>8 765</tt> becomes "8k"</li>
811 * <li><tt>998 765</tt> becomes "998k"</li>
812 * <li><tt>12 987 364</tt> becomes "12M"</li>
813 * <li><tt>5 534 333 221</tt> becomes "5G"</li>
d1e63903
NR
814 * </ul>
815 *
816 * @param value
817 * the value to convert
818 *
819 * @return the display value
820 */
821 public static String formatNumber(long value) {
5b46737c 822 return formatNumber(value, 0);
d1e63903
NR
823 }
824
825 /**
826 * Return a display {@link String} for the given value, which can be
827 * suffixed with "k" or "M" depending upon the number, if it is big enough.
828 * <p>
79961c53 829 * Examples (assuming decimalPositions = 1):
d1e63903 830 * <ul>
79961c53
NR
831 * <li><tt>8 765</tt> becomes "8.7k"</li>
832 * <li><tt>998 765</tt> becomes "998.7k"</li>
833 * <li><tt>12 987 364</tt> becomes "12.9M"</li>
834 * <li><tt>5 534 333 221</tt> becomes "5.5G"</li>
d1e63903
NR
835 * </ul>
836 *
837 * @param value
838 * the value to convert
5b46737c
NR
839 * @param decimalPositions
840 * the number of decimal positions to keep
d1e63903
NR
841 *
842 * @return the display value
843 */
5b46737c 844 public static String formatNumber(long value, int decimalPositions) {
79961c53 845 long userValue = value;
5b46737c 846 String suffix = "";
79961c53 847 long mult = 1;
5b46737c 848
8758aebb 849 if (value >= 1000000000l) {
79961c53
NR
850 mult = 1000000000l;
851 userValue = value / 1000000000l;
39d16a80 852 suffix = " G";
8758aebb 853 } else if (value >= 1000000l) {
79961c53
NR
854 mult = 1000000l;
855 userValue = value / 1000000l;
39d16a80 856 suffix = " M";
5b46737c 857 } else if (value >= 1000l) {
79961c53
NR
858 mult = 1000l;
859 userValue = value / 1000l;
39d16a80 860 suffix = " k";
d1e63903
NR
861 }
862
79961c53 863 String deci = "";
5b46737c 864 if (decimalPositions > 0) {
79961c53
NR
865 deci = Long.toString(value % mult);
866 int size = Long.toString(mult).length() - 1;
867 while (deci.length() < size) {
868 deci = "0" + deci;
869 }
870
5b46737c
NR
871 deci = deci.substring(0, Math.min(decimalPositions, deci.length()));
872 while (deci.length() < decimalPositions) {
873 deci += "0";
874 }
79961c53 875
5b46737c 876 deci = "." + deci;
d1e63903
NR
877 }
878
79961c53 879 return Long.toString(userValue) + deci + suffix;
d1e63903
NR
880 }
881
60033478
NR
882 /**
883 * The reverse operation to {@link StringUtils#formatNumber(long)}: it will
884 * read a "display" number that can contain a "M" or "k" suffix and return
885 * the full value.
886 * <p>
887 * Of course, the conversion to and from display form is lossy (example:
5b46737c 888 * <tt>6870</tt> to "6.5k" to <tt>6500</tt>).
60033478
NR
889 *
890 * @param value
891 * the value in display form with possible "M" and "k" suffixes,
892 * can be NULL
893 *
894 * @return the value as a number, or 0 if not possible to convert
895 */
896 public static long toNumber(String value) {
897 return toNumber(value, 0l);
898 }
899
900 /**
901 * The reverse operation to {@link StringUtils#formatNumber(long)}: it will
902 * read a "display" number that can contain a "M" or "k" suffix and return
903 * the full value.
904 * <p>
905 * Of course, the conversion to and from display form is lossy (example:
5b46737c 906 * <tt>6870</tt> to "6.5k" to <tt>6500</tt>).
60033478
NR
907 *
908 * @param value
909 * the value in display form with possible "M" and "k" suffixes,
910 * can be NULL
911 * @param def
912 * the default value if it is not possible to convert the given
913 * value to a number
914 *
915 * @return the value as a number, or 0 if not possible to convert
916 */
917 public static long toNumber(String value, long def) {
918 long count = def;
919 if (value != null) {
5b46737c 920 value = value.trim().toLowerCase();
60033478 921 try {
79961c53
NR
922 long mult = 1;
923 if (value.endsWith("g")) {
924 value = value.substring(0, value.length() - 1).trim();
925 mult = 1000000000;
926 } else if (value.endsWith("m")) {
5b46737c
NR
927 value = value.substring(0, value.length() - 1).trim();
928 mult = 1000000;
929 } else if (value.endsWith("k")) {
930 value = value.substring(0, value.length() - 1).trim();
931 mult = 1000;
932 }
933
934 long deci = 0;
935 if (value.contains(".")) {
936 String[] tab = value.split("\\.");
937 if (tab.length != 2) {
938 throw new NumberFormatException(value);
939 }
940 double decimal = Double.parseDouble("0."
941 + tab[tab.length - 1]);
942 deci = ((long) (mult * decimal));
943 value = tab[0];
60033478 944 }
5b46737c
NR
945 count = mult * Long.parseLong(value) + deci;
946 } catch (Exception e) {
60033478
NR
947 }
948 }
949
950 return count;
951 }
952
e8aa5bf9
NR
953 /**
954 * The "remove accents" pattern.
955 *
956 * @return the pattern, or NULL if a problem happens
957 */
958 private static Pattern getMarks() {
959 try {
960 return Pattern
961 .compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
962 } catch (Exception e) {
963 // Can fail on Android...
964 return null;
965 }
966 }
dc22eb95 967
bb60bd13 968 //
dc22eb95 969 // justify List<String> related:
bb60bd13 970 //
dc22eb95 971
bb60bd13
NR
972 /**
973 * Check if this line ends as a complete line (ends with a "." or similar).
974 * <p>
975 * Note that we consider an empty line as full, and a line ending with
976 * spaces as not complete.
977 *
978 * @param line
979 * the line to check
980 *
981 * @return TRUE if it does
982 */
dc22eb95 983 static private boolean isFullLine(StringBuilder line) {
bb60bd13
NR
984 if (line.length() == 0) {
985 return true;
986 }
987
988 char lastCar = line.charAt(line.length() - 1);
989 switch (lastCar) {
990 case '.': // points
991 case '?':
992 case '!':
993
994 case '\'': // quotes
995 case '‘':
996 case '’':
997
998 case '"': // double quotes
999 case '”':
1000 case '“':
1001 case '»':
1002 case '«':
1003 return true;
1004 default:
1005 return false;
1006 }
dc22eb95
NR
1007 }
1008
bb60bd13
NR
1009 /**
1010 * Check if this line represent an item in a list or description (i.e.,
1011 * check that the first non-space char is "-").
1012 *
1013 * @param line
1014 * the line to check
1015 *
1016 * @return TRUE if it is
1017 */
dc22eb95
NR
1018 static private boolean isItemLine(String line) {
1019 String spacing = getItemSpacing(line);
c0c091af
NR
1020 return spacing != null && !spacing.isEmpty()
1021 && line.charAt(spacing.length()) == '-';
dc22eb95
NR
1022 }
1023
bb60bd13
NR
1024 /**
1025 * Return all the spaces that start this line (or Empty if none).
1026 *
1027 * @param line
1028 * the line to get the starting spaces from
1029 *
1030 * @return the left spacing
1031 */
dc22eb95
NR
1032 static private String getItemSpacing(String line) {
1033 int i;
1034 for (i = 0; i < line.length(); i++) {
1035 if (line.charAt(i) != ' ') {
1036 return line.substring(0, i);
1037 }
1038 }
1039
1040 return "";
1041 }
c0c091af 1042
bb60bd13
NR
1043 /**
1044 * This line is an horizontal spacer line.
1045 *
1046 * @param line
1047 * the line to test
1048 *
1049 * @return TRUE if it is
1050 */
c0c091af
NR
1051 static private boolean isHrLine(CharSequence line) {
1052 int count = 0;
1053 if (line != null) {
1054 for (int i = 0; i < line.length(); i++) {
1055 char car = line.charAt(i);
1056 if (car == ' ' || car == '\t' || car == '*' || car == '-'
1057 || car == '_' || car == '~' || car == '=' || car == '/'
1058 || car == '\\') {
1059 count++;
1060 } else {
1061 return false;
1062 }
1063 }
1064 }
1065
1066 return count > 2;
1067 }
ec1f3444 1068}