Merge commit 'edcd53bbbba9f94e21f43fd03d3a2febcc2b1564'
[nikiroo-utils.git] / src / be / nikiroo / utils / StringUtils.java
1 package be.nikiroo.utils;
2
3 import java.io.ByteArrayInputStream;
4 import java.io.ByteArrayOutputStream;
5 import java.io.IOException;
6 import java.io.InputStream;
7 import java.io.OutputStream;
8 import java.io.UnsupportedEncodingException;
9 import java.security.MessageDigest;
10 import java.security.NoSuchAlgorithmException;
11 import java.text.Normalizer;
12 import java.text.Normalizer.Form;
13 import java.text.ParseException;
14 import java.text.SimpleDateFormat;
15 import java.util.AbstractMap;
16 import java.util.ArrayList;
17 import java.util.Arrays;
18 import java.util.Date;
19 import java.util.List;
20 import java.util.Map.Entry;
21 import java.util.regex.Pattern;
22 import java.util.zip.GZIPInputStream;
23 import java.util.zip.GZIPOutputStream;
24
25 import org.unbescape.html.HtmlEscape;
26 import org.unbescape.html.HtmlEscapeLevel;
27 import org.unbescape.html.HtmlEscapeType;
28
29 import be.nikiroo.utils.streams.Base64InputStream;
30 import be.nikiroo.utils.streams.Base64OutputStream;
31
32 /**
33 * This class offer some utilities based around {@link String}s.
34 *
35 * @author niki
36 */
37 public class StringUtils {
38 /**
39 * This enum type will decide the alignment of a {@link String} when padding
40 * or justification is applied (if there is enough horizontal space for it
41 * to be aligned).
42 */
43 public enum Alignment {
44 /** Aligned at left. */
45 LEFT,
46 /** Centered. */
47 CENTER,
48 /** Aligned at right. */
49 RIGHT,
50 /** Full justified (to both left and right). */
51 JUSTIFY,
52
53 // Old Deprecated values:
54
55 /** DEPRECATED: please use LEFT. */
56 @Deprecated
57 Beginning,
58 /** DEPRECATED: please use CENTER. */
59 @Deprecated
60 Center,
61 /** DEPRECATED: please use RIGHT. */
62 @Deprecated
63 End;
64
65 /**
66 * Return the non-deprecated version of this enum if needed (or return
67 * self if not).
68 *
69 * @return the non-deprecated value
70 */
71 Alignment undeprecate() {
72 if (this == Beginning)
73 return LEFT;
74 if (this == Center)
75 return CENTER;
76 if (this == End)
77 return RIGHT;
78 return this;
79 }
80 }
81
82 static private Pattern marks = getMarks();
83
84 /**
85 * Fix the size of the given {@link String} either with space-padding or by
86 * shortening it.
87 *
88 * @param text
89 * the {@link String} to fix
90 * @param width
91 * the size of the resulting {@link String} or -1 for a noop
92 *
93 * @return the resulting {@link String} of size <i>size</i>
94 */
95 static public String padString(String text, int width) {
96 return padString(text, width, true, null);
97 }
98
99 /**
100 * Fix the size of the given {@link String} either with space-padding or by
101 * optionally shortening it.
102 *
103 * @param text
104 * the {@link String} to fix
105 * @param width
106 * the size of the resulting {@link String} if the text fits or
107 * if cut is TRUE or -1 for a noop
108 * @param cut
109 * cut the {@link String} shorter if needed
110 * @param align
111 * align the {@link String} in this position if we have enough
112 * space (default is Alignment.Beginning)
113 *
114 * @return the resulting {@link String} of size <i>size</i> minimum
115 */
116 static public String padString(String text, int width, boolean cut,
117 Alignment align) {
118
119 if (align == null) {
120 align = Alignment.LEFT;
121 }
122
123 align = align.undeprecate();
124
125 if (width >= 0) {
126 if (text == null)
127 text = "";
128
129 int diff = width - text.length();
130
131 if (diff < 0) {
132 if (cut)
133 text = text.substring(0, width);
134 } else if (diff > 0) {
135 if (diff < 2 && align != Alignment.RIGHT)
136 align = Alignment.LEFT;
137
138 switch (align) {
139 case RIGHT:
140 text = new String(new char[diff]).replace('\0', ' ') + text;
141 break;
142 case CENTER:
143 int pad1 = (diff) / 2;
144 int pad2 = (diff + 1) / 2;
145 text = new String(new char[pad1]).replace('\0', ' ') + text
146 + new String(new char[pad2]).replace('\0', ' ');
147 break;
148 case LEFT:
149 default:
150 text = text + new String(new char[diff]).replace('\0', ' ');
151 break;
152 }
153 }
154 }
155
156 return text;
157 }
158
159 /**
160 * Justify a text into width-sized (at the maximum) lines and return all the
161 * lines concatenated into a single '\\n'-separated line of text.
162 *
163 * @param text
164 * the {@link String} to justify
165 * @param width
166 * the maximum size of the resulting lines
167 *
168 * @return a list of justified text lines concatenated into a single
169 * '\\n'-separated line of text
170 */
171 static public String justifyTexts(String text, int width) {
172 StringBuilder builder = new StringBuilder();
173 for (String line : justifyText(text, width, null)) {
174 if (builder.length() > 0) {
175 builder.append('\n');
176 }
177 builder.append(line);
178 }
179
180 return builder.toString();
181 }
182
183 /**
184 * Justify a text into width-sized (at the maximum) lines.
185 *
186 * @param text
187 * the {@link String} to justify
188 * @param width
189 * the maximum size of the resulting lines
190 *
191 * @return a list of justified text lines
192 */
193 static public List<String> justifyText(String text, int width) {
194 return justifyText(text, width, null);
195 }
196
197 /**
198 * Justify a text into width-sized (at the maximum) lines.
199 *
200 * @param text
201 * the {@link String} to justify
202 * @param width
203 * the maximum size of the resulting lines
204 * @param align
205 * align the lines in this position (default is
206 * Alignment.Beginning)
207 *
208 * @return a list of justified text lines
209 */
210 static public List<String> justifyText(String text, int width,
211 Alignment align) {
212 if (align == null) {
213 align = Alignment.LEFT;
214 }
215
216 align = align.undeprecate();
217
218 switch (align) {
219 case CENTER:
220 return StringJustifier.center(text, width);
221 case RIGHT:
222 return StringJustifier.right(text, width);
223 case JUSTIFY:
224 return StringJustifier.full(text, width);
225 case LEFT:
226 default:
227 return StringJustifier.left(text, width);
228 }
229 }
230
231 /**
232 * Justify a text into width-sized (at the maximum) lines.
233 *
234 * @param text
235 * the {@link String} to justify
236 * @param width
237 * the maximum size of the resulting lines
238 *
239 * @return a list of justified text lines
240 */
241 static public List<String> justifyText(List<String> text, int width) {
242 return justifyText(text, width, null);
243 }
244
245 /**
246 * Justify a text into width-sized (at the maximum) lines.
247 *
248 * @param text
249 * the {@link String} to justify
250 * @param width
251 * the maximum size of the resulting lines
252 * @param align
253 * align the lines in this position (default is
254 * Alignment.Beginning)
255 *
256 * @return a list of justified text lines
257 */
258 static public List<String> justifyText(List<String> text, int width,
259 Alignment align) {
260 List<String> result = new ArrayList<String>();
261
262 // Content <-> Bullet spacing (null = no spacing)
263 List<Entry<String, String>> lines = new ArrayList<Entry<String, String>>();
264 StringBuilder previous = null;
265 StringBuilder tmp = new StringBuilder();
266 String previousItemBulletSpacing = null;
267 String itemBulletSpacing = null;
268 for (String inputLine : text) {
269 boolean previousLineComplete = true;
270
271 String current = inputLine.replace("\t", " ");
272 itemBulletSpacing = getItemSpacing(current);
273 boolean bullet = isItemLine(current);
274 if ((previousItemBulletSpacing == null || itemBulletSpacing
275 .length() <= previousItemBulletSpacing.length()) && !bullet) {
276 itemBulletSpacing = null;
277 }
278
279 if (itemBulletSpacing != null) {
280 current = current.trim();
281 if (!current.isEmpty() && bullet) {
282 current = current.substring(1);
283 }
284 current = current.trim();
285 previousLineComplete = bullet;
286 } else {
287 tmp.setLength(0);
288 for (String word : current.split(" ")) {
289 if (word.isEmpty()) {
290 continue;
291 }
292
293 if (tmp.length() > 0) {
294 tmp.append(' ');
295 }
296 tmp.append(word.trim());
297 }
298 current = tmp.toString();
299
300 previousLineComplete = current.isEmpty()
301 || previousItemBulletSpacing != null
302 || (previous != null && isFullLine(previous))
303 || isHrLine(current) || isHrLine(previous);
304 }
305
306 if (previous == null) {
307 previous = new StringBuilder();
308 } else {
309 if (previousLineComplete) {
310 lines.add(new AbstractMap.SimpleEntry<String, String>(
311 previous.toString(), previousItemBulletSpacing));
312 previous.setLength(0);
313 previousItemBulletSpacing = itemBulletSpacing;
314 } else {
315 previous.append(' ');
316 }
317 }
318
319 previous.append(current);
320
321 }
322
323 if (previous != null) {
324 lines.add(new AbstractMap.SimpleEntry<String, String>(previous
325 .toString(), previousItemBulletSpacing));
326 }
327
328 for (Entry<String, String> line : lines) {
329 String content = line.getKey();
330 String spacing = line.getValue();
331
332 String bullet = "- ";
333 if (spacing == null) {
334 bullet = "";
335 spacing = "";
336 }
337
338 if (spacing.length() > width + 3) {
339 spacing = "";
340 }
341
342 for (String subline : StringUtils.justifyText(content, width
343 - (spacing.length() + bullet.length()), align)) {
344 result.add(spacing + bullet + subline);
345 if (!bullet.isEmpty()) {
346 bullet = " ";
347 }
348 }
349 }
350
351 return result;
352 }
353
354 /**
355 * Sanitise the given input to make it more Terminal-friendly by removing
356 * combining characters.
357 *
358 * @param input
359 * the input to sanitise
360 * @param allowUnicode
361 * allow Unicode or only allow ASCII Latin characters
362 *
363 * @return the sanitised {@link String}
364 */
365 static public String sanitize(String input, boolean allowUnicode) {
366 return sanitize(input, allowUnicode, !allowUnicode);
367 }
368
369 /**
370 * Sanitise the given input to make it more Terminal-friendly by removing
371 * combining characters.
372 *
373 * @param input
374 * the input to sanitise
375 * @param allowUnicode
376 * allow Unicode or only allow ASCII Latin characters
377 * @param removeAllAccents
378 * TRUE to replace all accentuated characters by their non
379 * accentuated counter-parts
380 *
381 * @return the sanitised {@link String}
382 */
383 static public String sanitize(String input, boolean allowUnicode,
384 boolean removeAllAccents) {
385
386 if (removeAllAccents) {
387 input = Normalizer.normalize(input, Form.NFKD);
388 if (marks != null) {
389 input = marks.matcher(input).replaceAll("");
390 }
391 }
392
393 input = Normalizer.normalize(input, Form.NFKC);
394
395 if (!allowUnicode) {
396 StringBuilder builder = new StringBuilder();
397 for (int index = 0; index < input.length(); index++) {
398 char car = input.charAt(index);
399 // displayable chars in ASCII are in the range 32<->255,
400 // except DEL (127)
401 if (car >= 32 && car <= 255 && car != 127) {
402 builder.append(car);
403 }
404 }
405 input = builder.toString();
406 }
407
408 return input;
409 }
410
411 /**
412 * Convert between the time in milliseconds to a {@link String} in a "fixed"
413 * way (to exchange data over the wire, for instance).
414 * <p>
415 * Precise to the second.
416 *
417 * @param time
418 * the specified number of milliseconds since the standard base
419 * time known as "the epoch", namely January 1, 1970, 00:00:00
420 * GMT
421 *
422 * @return the time as a {@link String}
423 */
424 static public String fromTime(long time) {
425 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
426 return sdf.format(new Date(time));
427 }
428
429 /**
430 * Convert between the time as a {@link String} to milliseconds in a "fixed"
431 * way (to exchange data over the wire, for instance).
432 * <p>
433 * Precise to the second.
434 *
435 * @param displayTime
436 * the time as a {@link String}
437 *
438 * @return the number of milliseconds since the standard base time known as
439 * "the epoch", namely January 1, 1970, 00:00:00 GMT, or -1 in case
440 * of error
441 *
442 * @throws ParseException
443 * in case of parse error
444 */
445 static public long toTime(String displayTime) throws ParseException {
446 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
447 return sdf.parse(displayTime).getTime();
448 }
449
450 /**
451 * Return a hash of the given {@link String}.
452 *
453 * @param input
454 * the input data
455 *
456 * @return the hash
457 */
458 static public String getMd5Hash(String input) {
459 try {
460 MessageDigest md = MessageDigest.getInstance("MD5");
461 md.update(getBytes(input));
462 byte byteData[] = md.digest();
463
464 StringBuffer hexString = new StringBuffer();
465 for (int i = 0; i < byteData.length; i++) {
466 String hex = Integer.toHexString(0xff & byteData[i]);
467 if (hex.length() == 1)
468 hexString.append('0');
469 hexString.append(hex);
470 }
471
472 return hexString.toString();
473 } catch (NoSuchAlgorithmException e) {
474 return input;
475 }
476 }
477
478 /**
479 * Remove the HTML content from the given input, and un-html-ize the rest.
480 *
481 * @param html
482 * the HTML-encoded content
483 *
484 * @return the HTML-free equivalent content
485 */
486 public static String unhtml(String html) {
487 StringBuilder builder = new StringBuilder();
488
489 int inTag = 0;
490 for (char car : html.toCharArray()) {
491 if (car == '<') {
492 inTag++;
493 } else if (car == '>') {
494 inTag--;
495 } else if (inTag <= 0) {
496 builder.append(car);
497 }
498 }
499
500 char nbsp = ' '; // non-breakable space (a special char)
501 char space = ' ';
502 return HtmlEscape.unescapeHtml(builder.toString()).replace(nbsp, space);
503 }
504
505 /**
506 * Escape the given {@link String} so it can be used in XML, as content.
507 *
508 * @param input
509 * the input {@link String}
510 *
511 * @return the escaped {@link String}
512 */
513 public static String xmlEscape(String input) {
514 if (input == null) {
515 return "";
516 }
517
518 return HtmlEscape.escapeHtml(input,
519 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
520 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
521 }
522
523 /**
524 * Escape the given {@link String} so it can be used in XML, as text content
525 * inside double-quotes.
526 *
527 * @param input
528 * the input {@link String}
529 *
530 * @return the escaped {@link String}
531 */
532 public static String xmlEscapeQuote(String input) {
533 if (input == null) {
534 return "";
535 }
536
537 return HtmlEscape.escapeHtml(input,
538 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
539 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
540 }
541
542 /**
543 * Zip the data and then encode it into Base64.
544 *
545 * @param data
546 * the data
547 *
548 * @return the Base64 zipped version
549 *
550 * @throws IOException
551 * in case of I/O error
552 */
553 public static String zip64(String data) throws IOException {
554 try {
555 return zip64(getBytes(data));
556 } catch (UnsupportedEncodingException e) {
557 // All conforming JVM are required to support UTF-8
558 e.printStackTrace();
559 return null;
560 }
561 }
562
563 /**
564 * Zip the data and then encode it into Base64.
565 *
566 * @param data
567 * the data
568 *
569 * @return the Base64 zipped version
570 *
571 * @throws IOException
572 * in case of I/O error
573 */
574 public static String zip64(byte[] data) throws IOException {
575 // 1. compress
576 ByteArrayOutputStream bout = new ByteArrayOutputStream();
577 try {
578 OutputStream out = new GZIPOutputStream(bout);
579 try {
580 out.write(data);
581 } finally {
582 out.close();
583 }
584 } finally {
585 data = bout.toByteArray();
586 bout.close();
587 }
588
589 // 2. base64
590 InputStream in = new ByteArrayInputStream(data);
591 try {
592 in = new Base64InputStream(in, true);
593 return new String(IOUtils.toByteArray(in), "UTF-8");
594 } finally {
595 in.close();
596 }
597 }
598
599 /**
600 * Unconvert from Base64 then unzip the content, which is assumed to be a
601 * String.
602 *
603 * @param data
604 * the data in Base64 format
605 *
606 * @return the raw data
607 *
608 * @throws IOException
609 * in case of I/O error
610 */
611 public static String unzip64s(String data) throws IOException {
612 return new String(unzip64(data), "UTF-8");
613 }
614
615 /**
616 * Unconvert from Base64 then unzip the content.
617 *
618 * @param data
619 * the data in Base64 format
620 *
621 * @return the raw data
622 *
623 * @throws IOException
624 * in case of I/O error
625 */
626 public static byte[] unzip64(String data) throws IOException {
627 InputStream in = new Base64InputStream(new ByteArrayInputStream(
628 getBytes(data)), false);
629 try {
630 in = new GZIPInputStream(in);
631 return IOUtils.toByteArray(in);
632 } finally {
633 in.close();
634 }
635 }
636
637 /**
638 * Convert the given data to Base64 format.
639 *
640 * @param data
641 * the data to convert
642 *
643 * @return the Base64 {@link String} representation of the data
644 *
645 * @throws IOException
646 * in case of I/O errors
647 */
648 public static String base64(String data) throws IOException {
649 return base64(getBytes(data));
650 }
651
652 /**
653 * Convert the given data to Base64 format.
654 *
655 * @param data
656 * the data to convert
657 *
658 * @return the Base64 {@link String} representation of the data
659 *
660 * @throws IOException
661 * in case of I/O errors
662 */
663 public static String base64(byte[] data) throws IOException {
664 Base64InputStream in = new Base64InputStream(new ByteArrayInputStream(
665 data), true);
666 try {
667 return new String(IOUtils.toByteArray(in), "UTF-8");
668 } finally {
669 in.close();
670 }
671 }
672
673 /**
674 * Unconvert the given data from Base64 format back to a raw array of bytes.
675 *
676 * @param data
677 * the data to unconvert
678 *
679 * @return the raw data represented by the given Base64 {@link String},
680 *
681 * @throws IOException
682 * in case of I/O errors
683 */
684 public static byte[] unbase64(String data) throws IOException {
685 Base64InputStream in = new Base64InputStream(new ByteArrayInputStream(
686 getBytes(data)), false);
687 try {
688 return IOUtils.toByteArray(in);
689 } finally {
690 in.close();
691 }
692 }
693
694 /**
695 * Unonvert the given data from Base64 format back to a {@link String}.
696 *
697 * @param data
698 * the data to unconvert
699 *
700 * @return the {@link String} represented by the given Base64 {@link String}
701 *
702 * @throws IOException
703 * in case of I/O errors
704 */
705 public static String unbase64s(String data) throws IOException {
706 return new String(unbase64(data), "UTF-8");
707 }
708
709 /**
710 * Return a display {@link String} for the given value, which can be
711 * suffixed with "k" or "M" depending upon the number, if it is big enough.
712 * <p>
713 * <p>
714 * Examples:
715 * <ul>
716 * <li><tt>8 765</tt> becomes "8 k"</li>
717 * <li><tt>998 765</tt> becomes "998 k"</li>
718 * <li><tt>12 987 364</tt> becomes "12 M"</li>
719 * <li><tt>5 534 333 221</tt> becomes "5 G"</li>
720 * </ul>
721 *
722 * @param value
723 * the value to convert
724 *
725 * @return the display value
726 */
727 public static String formatNumber(long value) {
728 return formatNumber(value, 0);
729 }
730
731 /**
732 * Return a display {@link String} for the given value, which can be
733 * suffixed with "k" or "M" depending upon the number, if it is big enough.
734 * <p>
735 * Examples (assuming decimalPositions = 1):
736 * <ul>
737 * <li><tt>8 765</tt> becomes "8.7 k"</li>
738 * <li><tt>998 765</tt> becomes "998.7 k"</li>
739 * <li><tt>12 987 364</tt> becomes "12.9 M"</li>
740 * <li><tt>5 534 333 221</tt> becomes "5.5 G"</li>
741 * </ul>
742 *
743 * @param value
744 * the value to convert
745 * @param decimalPositions
746 * the number of decimal positions to keep
747 *
748 * @return the display value
749 */
750 public static String formatNumber(long value, int decimalPositions) {
751 long userValue = value;
752 String suffix = " ";
753 long mult = 1;
754
755 if (value >= 1000000000l) {
756 mult = 1000000000l;
757 userValue = value / 1000000000l;
758 suffix = " G";
759 } else if (value >= 1000000l) {
760 mult = 1000000l;
761 userValue = value / 1000000l;
762 suffix = " M";
763 } else if (value >= 1000l) {
764 mult = 1000l;
765 userValue = value / 1000l;
766 suffix = " k";
767 }
768
769 String deci = "";
770 if (decimalPositions > 0) {
771 deci = Long.toString(value % mult);
772 int size = Long.toString(mult).length() - 1;
773 while (deci.length() < size) {
774 deci = "0" + deci;
775 }
776
777 deci = deci.substring(0, Math.min(decimalPositions, deci.length()));
778 while (deci.length() < decimalPositions) {
779 deci += "0";
780 }
781
782 deci = "." + deci;
783 }
784
785 return Long.toString(userValue) + deci + suffix;
786 }
787
788 /**
789 * The reverse operation to {@link StringUtils#formatNumber(long)}: it will
790 * read a "display" number that can contain a "M" or "k" suffix and return
791 * the full value.
792 * <p>
793 * Of course, the conversion to and from display form is lossy (example:
794 * <tt>6870</tt> to "6.5k" to <tt>6500</tt>).
795 *
796 * @param value
797 * the value in display form with possible "M" and "k" suffixes,
798 * can be NULL
799 *
800 * @return the value as a number, or 0 if not possible to convert
801 */
802 public static long toNumber(String value) {
803 return toNumber(value, 0l);
804 }
805
806 /**
807 * The reverse operation to {@link StringUtils#formatNumber(long)}: it will
808 * read a "display" number that can contain a "M" or "k" suffix and return
809 * the full value.
810 * <p>
811 * Of course, the conversion to and from display form is lossy (example:
812 * <tt>6870</tt> to "6.5k" to <tt>6500</tt>).
813 *
814 * @param value
815 * the value in display form with possible "M" and "k" suffixes,
816 * can be NULL
817 * @param def
818 * the default value if it is not possible to convert the given
819 * value to a number
820 *
821 * @return the value as a number, or 0 if not possible to convert
822 */
823 public static long toNumber(String value, long def) {
824 long count = def;
825 if (value != null) {
826 value = value.trim().toLowerCase();
827 try {
828 long mult = 1;
829 if (value.endsWith("g")) {
830 value = value.substring(0, value.length() - 1).trim();
831 mult = 1000000000;
832 } else if (value.endsWith("m")) {
833 value = value.substring(0, value.length() - 1).trim();
834 mult = 1000000;
835 } else if (value.endsWith("k")) {
836 value = value.substring(0, value.length() - 1).trim();
837 mult = 1000;
838 }
839
840 long deci = 0;
841 if (value.contains(".")) {
842 String[] tab = value.split("\\.");
843 if (tab.length != 2) {
844 throw new NumberFormatException(value);
845 }
846 double decimal = Double.parseDouble("0."
847 + tab[tab.length - 1]);
848 deci = ((long) (mult * decimal));
849 value = tab[0];
850 }
851 count = mult * Long.parseLong(value) + deci;
852 } catch (Exception e) {
853 }
854 }
855
856 return count;
857 }
858
859 /**
860 * Return the bytes array representation of the given {@link String} in
861 * UTF-8.
862 *
863 * @param str
864 * the {@link String} to transform into bytes
865 * @return the content in bytes
866 */
867 static public byte[] getBytes(String str) {
868 try {
869 return str.getBytes("UTF-8");
870 } catch (UnsupportedEncodingException e) {
871 // All conforming JVM must support UTF-8
872 e.printStackTrace();
873 return null;
874 }
875 }
876
877 /**
878 * The "remove accents" pattern.
879 *
880 * @return the pattern, or NULL if a problem happens
881 */
882 private static Pattern getMarks() {
883 try {
884 return Pattern
885 .compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
886 } catch (Exception e) {
887 // Can fail on Android...
888 return null;
889 }
890 }
891
892 //
893 // justify List<String> related:
894 //
895
896 /**
897 * Check if this line ends as a complete line (ends with a "." or similar).
898 * <p>
899 * Note that we consider an empty line as full, and a line ending with
900 * spaces as not complete.
901 *
902 * @param line
903 * the line to check
904 *
905 * @return TRUE if it does
906 */
907 static private boolean isFullLine(StringBuilder line) {
908 if (line.length() == 0) {
909 return true;
910 }
911
912 char lastCar = line.charAt(line.length() - 1);
913 switch (lastCar) {
914 case '.': // points
915 case '?':
916 case '!':
917
918 case '\'': // quotes
919 case '‘':
920 case '’':
921
922 case '"': // double quotes
923 case '”':
924 case '“':
925 case '»':
926 case '«':
927 return true;
928 default:
929 return false;
930 }
931 }
932
933 /**
934 * Check if this line represent an item in a list or description (i.e.,
935 * check that the first non-space char is "-").
936 *
937 * @param line
938 * the line to check
939 *
940 * @return TRUE if it is
941 */
942 static private boolean isItemLine(String line) {
943 String spacing = getItemSpacing(line);
944 return spacing != null && !spacing.isEmpty()
945 && line.charAt(spacing.length()) == '-';
946 }
947
948 /**
949 * Return all the spaces that start this line (or Empty if none).
950 *
951 * @param line
952 * the line to get the starting spaces from
953 *
954 * @return the left spacing
955 */
956 static private String getItemSpacing(String line) {
957 int i;
958 for (i = 0; i < line.length(); i++) {
959 if (line.charAt(i) != ' ') {
960 return line.substring(0, i);
961 }
962 }
963
964 return "";
965 }
966
967 /**
968 * This line is an horizontal spacer line.
969 *
970 * @param line
971 * the line to test
972 *
973 * @return TRUE if it is
974 */
975 static private boolean isHrLine(CharSequence line) {
976 int count = 0;
977 if (line != null) {
978 for (int i = 0; i < line.length(); i++) {
979 char car = line.charAt(i);
980 if (car == ' ' || car == '\t' || car == '*' || car == '-'
981 || car == '_' || car == '~' || car == '=' || car == '/'
982 || car == '\\') {
983 count++;
984 } else {
985 return false;
986 }
987 }
988 }
989
990 return count > 2;
991 }
992
993 // Deprecated functions, please do not use //
994
995 /**
996 * @deprecated please use {@link StringUtils#zip64(byte[])} or
997 * {@link StringUtils#base64(byte[])} instead.
998 *
999 * @param data
1000 * the data to encode
1001 * @param zip
1002 * TRUE to zip it before Base64 encoding it, FALSE for Base64
1003 * encoding only
1004 *
1005 * @return the encoded data
1006 *
1007 * @throws IOException
1008 * in case of I/O error
1009 */
1010 @Deprecated
1011 public static String base64(String data, boolean zip) throws IOException {
1012 return base64(getBytes(data), zip);
1013 }
1014
1015 /**
1016 * @deprecated please use {@link StringUtils#zip64(String)} or
1017 * {@link StringUtils#base64(String)} instead.
1018 *
1019 * @param data
1020 * the data to encode
1021 * @param zip
1022 * TRUE to zip it before Base64 encoding it, FALSE for Base64
1023 * encoding only
1024 *
1025 * @return the encoded data
1026 *
1027 * @throws IOException
1028 * in case of I/O error
1029 */
1030 @Deprecated
1031 public static String base64(byte[] data, boolean zip) throws IOException {
1032 if (zip) {
1033 return zip64(data);
1034 }
1035
1036 Base64InputStream b64 = new Base64InputStream(new ByteArrayInputStream(
1037 data), true);
1038 try {
1039 return IOUtils.readSmallStream(b64);
1040 } finally {
1041 b64.close();
1042 }
1043 }
1044
1045 /**
1046 * @deprecated please use {@link Base64OutputStream} and
1047 * {@link GZIPOutputStream} instead.
1048 *
1049 * @param breakLines
1050 * NOT USED ANYMORE, it is always considered FALSE now
1051 */
1052 @Deprecated
1053 public static OutputStream base64(OutputStream data, boolean zip,
1054 boolean breakLines) throws IOException {
1055 OutputStream out = new Base64OutputStream(data);
1056 if (zip) {
1057 out = new java.util.zip.GZIPOutputStream(out);
1058 }
1059
1060 return out;
1061 }
1062
1063 /**
1064 * Unconvert the given data from Base64 format back to a raw array of bytes.
1065 * <p>
1066 * Will automatically detect zipped data and also uncompress it before
1067 * returning, unless ZIP is false.
1068 *
1069 * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped)
1070 *
1071 * @param data
1072 * the data to unconvert
1073 * @param zip
1074 * TRUE to also uncompress the data from a GZIP format
1075 * automatically; if set to FALSE, zipped data can be returned
1076 *
1077 * @return the raw data represented by the given Base64 {@link String},
1078 * optionally compressed with GZIP
1079 *
1080 * @throws IOException
1081 * in case of I/O errors
1082 */
1083 @Deprecated
1084 public static byte[] unbase64(String data, boolean zip) throws IOException {
1085 byte[] buffer = unbase64(data);
1086 if (!zip) {
1087 return buffer;
1088 }
1089
1090 try {
1091 GZIPInputStream zipped = new GZIPInputStream(
1092 new ByteArrayInputStream(buffer));
1093 try {
1094 ByteArrayOutputStream out = new ByteArrayOutputStream();
1095 try {
1096 IOUtils.write(zipped, out);
1097 return out.toByteArray();
1098 } finally {
1099 out.close();
1100 }
1101 } finally {
1102 zipped.close();
1103 }
1104 } catch (Exception e) {
1105 return buffer;
1106 }
1107 }
1108
1109 /**
1110 * Unconvert the given data from Base64 format back to a raw array of bytes.
1111 * <p>
1112 * Will automatically detect zipped data and also uncompress it before
1113 * returning, unless ZIP is false.
1114 *
1115 * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped)
1116 *
1117 * @param data
1118 * the data to unconvert
1119 * @param zip
1120 * TRUE to also uncompress the data from a GZIP format
1121 * automatically; if set to FALSE, zipped data can be returned
1122 *
1123 * @return the raw data represented by the given Base64 {@link String},
1124 * optionally compressed with GZIP
1125 *
1126 * @throws IOException
1127 * in case of I/O errors
1128 */
1129 @Deprecated
1130 public static InputStream unbase64(InputStream data, boolean zip)
1131 throws IOException {
1132 return new ByteArrayInputStream(unbase64(IOUtils.readSmallStream(data),
1133 zip));
1134 }
1135
1136 /**
1137 * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped)
1138 */
1139 @Deprecated
1140 public static byte[] unbase64(byte[] data, int offset, int count,
1141 boolean zip) throws IOException {
1142 byte[] dataPart = Arrays.copyOfRange(data, offset, offset + count);
1143 return unbase64(new String(dataPart, "UTF-8"), zip);
1144 }
1145
1146 /**
1147 * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped)
1148 */
1149 @Deprecated
1150 public static String unbase64s(String data, boolean zip) throws IOException {
1151 return new String(unbase64(data, zip), "UTF-8");
1152 }
1153
1154 /**
1155 * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped)
1156 */
1157 @Deprecated
1158 public static String unbase64s(byte[] data, int offset, int count,
1159 boolean zip) throws IOException {
1160 return new String(unbase64(data, offset, count, zip), "UTF-8");
1161 }
1162 }