Merge commit '712ddafb749aada41daab85c36ac12f657b2307e'
[nikiroo-utils.git] / StringUtils.java
1 package be.nikiroo.utils;
2
3 import java.io.ByteArrayInputStream;
4 import java.io.ByteArrayOutputStream;
5 import java.io.IOException;
6 import java.io.InputStream;
7 import java.io.OutputStream;
8 import java.io.UnsupportedEncodingException;
9 import java.security.MessageDigest;
10 import java.security.NoSuchAlgorithmException;
11 import java.text.Normalizer;
12 import java.text.Normalizer.Form;
13 import java.text.ParseException;
14 import java.text.SimpleDateFormat;
15 import java.util.AbstractMap;
16 import java.util.ArrayList;
17 import java.util.Arrays;
18 import java.util.Date;
19 import java.util.List;
20 import java.util.Map.Entry;
21 import java.util.regex.Pattern;
22 import java.util.zip.GZIPInputStream;
23 import java.util.zip.GZIPOutputStream;
24
25 import org.unbescape.html.HtmlEscape;
26 import org.unbescape.html.HtmlEscapeLevel;
27 import org.unbescape.html.HtmlEscapeType;
28
29 import be.nikiroo.utils.streams.Base64InputStream;
30 import be.nikiroo.utils.streams.Base64OutputStream;
31
32 /**
33 * This class offer some utilities based around {@link String}s.
34 *
35 * @author niki
36 */
37 public class StringUtils {
38 /**
39 * This enum type will decide the alignment of a {@link String} when padding
40 * or justification is applied (if there is enough horizontal space for it
41 * to be aligned).
42 */
43 public enum Alignment {
44 /** Aligned at left. */
45 LEFT,
46 /** Centered. */
47 CENTER,
48 /** Aligned at right. */
49 RIGHT,
50 /** Full justified (to both left and right). */
51 JUSTIFY,
52
53 // Old Deprecated values:
54
55 /** DEPRECATED: please use LEFT. */
56 @Deprecated
57 Beginning,
58 /** DEPRECATED: please use CENTER. */
59 @Deprecated
60 Center,
61 /** DEPRECATED: please use RIGHT. */
62 @Deprecated
63 End;
64
65 /**
66 * Return the non-deprecated version of this enum if needed (or return
67 * self if not).
68 *
69 * @return the non-deprecated value
70 */
71 Alignment undeprecate() {
72 if (this == Beginning)
73 return LEFT;
74 if (this == Center)
75 return CENTER;
76 if (this == End)
77 return RIGHT;
78 return this;
79 }
80 }
81
82 static private Pattern marks = getMarks();
83
84 /**
85 * Fix the size of the given {@link String} either with space-padding or by
86 * shortening it.
87 *
88 * @param text
89 * the {@link String} to fix
90 * @param width
91 * the size of the resulting {@link String} or -1 for a noop
92 *
93 * @return the resulting {@link String} of size <i>size</i>
94 */
95 static public String padString(String text, int width) {
96 return padString(text, width, true, null);
97 }
98
99 /**
100 * Fix the size of the given {@link String} either with space-padding or by
101 * optionally shortening it.
102 *
103 * @param text
104 * the {@link String} to fix
105 * @param width
106 * the size of the resulting {@link String} if the text fits or
107 * if cut is TRUE or -1 for a noop
108 * @param cut
109 * cut the {@link String} shorter if needed
110 * @param align
111 * align the {@link String} in this position if we have enough
112 * space (default is Alignment.Beginning)
113 *
114 * @return the resulting {@link String} of size <i>size</i> minimum
115 */
116 static public String padString(String text, int width, boolean cut,
117 Alignment align) {
118
119 if (align == null) {
120 align = Alignment.LEFT;
121 }
122
123 align = align.undeprecate();
124
125 if (width >= 0) {
126 if (text == null)
127 text = "";
128
129 int diff = width - text.length();
130
131 if (diff < 0) {
132 if (cut)
133 text = text.substring(0, width);
134 } else if (diff > 0) {
135 if (diff < 2 && align != Alignment.RIGHT)
136 align = Alignment.LEFT;
137
138 switch (align) {
139 case RIGHT:
140 text = new String(new char[diff]).replace('\0', ' ') + text;
141 break;
142 case CENTER:
143 int pad1 = (diff) / 2;
144 int pad2 = (diff + 1) / 2;
145 text = new String(new char[pad1]).replace('\0', ' ') + text
146 + new String(new char[pad2]).replace('\0', ' ');
147 break;
148 case LEFT:
149 default:
150 text = text + new String(new char[diff]).replace('\0', ' ');
151 break;
152 }
153 }
154 }
155
156 return text;
157 }
158
159 /**
160 * Justify a text into width-sized (at the maximum) lines and return all the
161 * lines concatenated into a single '\\n'-separated line of text.
162 *
163 * @param text
164 * the {@link String} to justify
165 * @param width
166 * the maximum size of the resulting lines
167 *
168 * @return a list of justified text lines concatenated into a single
169 * '\\n'-separated line of text
170 */
171 static public String justifyTexts(String text, int width) {
172 StringBuilder builder = new StringBuilder();
173 for (String line : justifyText(text, width, null)) {
174 if (builder.length() > 0) {
175 builder.append('\n');
176 }
177 builder.append(line);
178 }
179
180 return builder.toString();
181 }
182
183 /**
184 * Justify a text into width-sized (at the maximum) lines.
185 *
186 * @param text
187 * the {@link String} to justify
188 * @param width
189 * the maximum size of the resulting lines
190 *
191 * @return a list of justified text lines
192 */
193 static public List<String> justifyText(String text, int width) {
194 return justifyText(text, width, null);
195 }
196
197 /**
198 * Justify a text into width-sized (at the maximum) lines.
199 *
200 * @param text
201 * the {@link String} to justify
202 * @param width
203 * the maximum size of the resulting lines
204 * @param align
205 * align the lines in this position (default is
206 * Alignment.Beginning)
207 *
208 * @return a list of justified text lines
209 */
210 static public List<String> justifyText(String text, int width,
211 Alignment align) {
212 if (align == null) {
213 align = Alignment.LEFT;
214 }
215
216 align = align.undeprecate();
217
218 switch (align) {
219 case CENTER:
220 return StringJustifier.center(text, width);
221 case RIGHT:
222 return StringJustifier.right(text, width);
223 case JUSTIFY:
224 return StringJustifier.full(text, width);
225 case LEFT:
226 default:
227 return StringJustifier.left(text, width);
228 }
229 }
230
231 /**
232 * Justify a text into width-sized (at the maximum) lines.
233 *
234 * @param text
235 * the {@link String} to justify
236 * @param width
237 * the maximum size of the resulting lines
238 *
239 * @return a list of justified text lines
240 */
241 static public List<String> justifyText(List<String> text, int width) {
242 return justifyText(text, width, null);
243 }
244
245 /**
246 * Justify a text into width-sized (at the maximum) lines.
247 *
248 * @param text
249 * the {@link String} to justify
250 * @param width
251 * the maximum size of the resulting lines
252 * @param align
253 * align the lines in this position (default is
254 * Alignment.Beginning)
255 *
256 * @return a list of justified text lines
257 */
258 static public List<String> justifyText(List<String> text, int width,
259 Alignment align) {
260 List<String> result = new ArrayList<String>();
261
262 // Content <-> Bullet spacing (null = no spacing)
263 List<Entry<String, String>> lines = new ArrayList<Entry<String, String>>();
264 StringBuilder previous = null;
265 StringBuilder tmp = new StringBuilder();
266 String previousItemBulletSpacing = null;
267 String itemBulletSpacing = null;
268 for (String inputLine : text) {
269 boolean previousLineComplete = true;
270
271 String current = inputLine.replace("\t", " ");
272 itemBulletSpacing = getItemSpacing(current);
273 boolean bullet = isItemLine(current);
274 if ((previousItemBulletSpacing == null || itemBulletSpacing
275 .length() <= previousItemBulletSpacing.length()) && !bullet) {
276 itemBulletSpacing = null;
277 }
278
279 if (itemBulletSpacing != null) {
280 current = current.trim();
281 if (!current.isEmpty() && bullet) {
282 current = current.substring(1);
283 }
284 current = current.trim();
285 previousLineComplete = bullet;
286 } else {
287 tmp.setLength(0);
288 for (String word : current.split(" ")) {
289 if (word.isEmpty()) {
290 continue;
291 }
292
293 if (tmp.length() > 0) {
294 tmp.append(' ');
295 }
296 tmp.append(word.trim());
297 }
298 current = tmp.toString();
299
300 previousLineComplete = current.isEmpty()
301 || previousItemBulletSpacing != null
302 || (previous != null && isFullLine(previous))
303 || isHrLine(current) || isHrLine(previous);
304 }
305
306 if (previous == null) {
307 previous = new StringBuilder();
308 } else {
309 if (previousLineComplete) {
310 lines.add(new AbstractMap.SimpleEntry<String, String>(
311 previous.toString(), previousItemBulletSpacing));
312 previous.setLength(0);
313 previousItemBulletSpacing = itemBulletSpacing;
314 } else {
315 previous.append(' ');
316 }
317 }
318
319 previous.append(current);
320
321 }
322
323 if (previous != null) {
324 lines.add(new AbstractMap.SimpleEntry<String, String>(previous
325 .toString(), previousItemBulletSpacing));
326 }
327
328 for (Entry<String, String> line : lines) {
329 String content = line.getKey();
330 String spacing = line.getValue();
331
332 String bullet = "- ";
333 if (spacing == null) {
334 bullet = "";
335 spacing = "";
336 }
337
338 if (spacing.length() > width + 3) {
339 spacing = "";
340 }
341
342 for (String subline : StringUtils.justifyText(content, width
343 - (spacing.length() + bullet.length()), align)) {
344 result.add(spacing + bullet + subline);
345 if (!bullet.isEmpty()) {
346 bullet = " ";
347 }
348 }
349 }
350
351 return result;
352 }
353
354 /**
355 * Sanitise the given input to make it more Terminal-friendly by removing
356 * combining characters.
357 *
358 * @param input
359 * the input to sanitise
360 * @param allowUnicode
361 * allow Unicode or only allow ASCII Latin characters
362 *
363 * @return the sanitised {@link String}
364 */
365 static public String sanitize(String input, boolean allowUnicode) {
366 return sanitize(input, allowUnicode, !allowUnicode);
367 }
368
369 /**
370 * Sanitise the given input to make it more Terminal-friendly by removing
371 * combining characters.
372 *
373 * @param input
374 * the input to sanitise
375 * @param allowUnicode
376 * allow Unicode or only allow ASCII Latin characters
377 * @param removeAllAccents
378 * TRUE to replace all accentuated characters by their non
379 * accentuated counter-parts
380 *
381 * @return the sanitised {@link String}
382 */
383 static public String sanitize(String input, boolean allowUnicode,
384 boolean removeAllAccents) {
385
386 if (removeAllAccents) {
387 input = Normalizer.normalize(input, Form.NFKD);
388 if (marks != null) {
389 input = marks.matcher(input).replaceAll("");
390 }
391 }
392
393 input = Normalizer.normalize(input, Form.NFKC);
394
395 if (!allowUnicode) {
396 StringBuilder builder = new StringBuilder();
397 for (int index = 0; index < input.length(); index++) {
398 char car = input.charAt(index);
399 // displayable chars in ASCII are in the range 32<->255,
400 // except DEL (127)
401 if (car >= 32 && car <= 255 && car != 127) {
402 builder.append(car);
403 }
404 }
405 input = builder.toString();
406 }
407
408 return input;
409 }
410
411 /**
412 * Convert between the time in milliseconds to a {@link String} in a "fixed"
413 * way (to exchange data over the wire, for instance).
414 * <p>
415 * Precise to the second.
416 *
417 * @param time
418 * the specified number of milliseconds since the standard base
419 * time known as "the epoch", namely January 1, 1970, 00:00:00
420 * GMT
421 *
422 * @return the time as a {@link String}
423 */
424 static public String fromTime(long time) {
425 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
426 return sdf.format(new Date(time));
427 }
428
429 /**
430 * Convert between the time as a {@link String} to milliseconds in a "fixed"
431 * way (to exchange data over the wire, for instance).
432 * <p>
433 * Precise to the second.
434 *
435 * @param displayTime
436 * the time as a {@link String}
437 *
438 * @return the number of milliseconds since the standard base time known as
439 * "the epoch", namely January 1, 1970, 00:00:00 GMT, or -1 in case
440 * of error
441 *
442 * @throws ParseException
443 * in case of parse error
444 */
445 static public long toTime(String displayTime) throws ParseException {
446 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
447 return sdf.parse(displayTime).getTime();
448 }
449
450 /**
451 * Return a hash of the given {@link String}.
452 *
453 * @param input
454 * the input data
455 *
456 * @return the hash
457 *
458 * @deprecated please use {@link HashUtils}
459 */
460 @Deprecated
461 static public String getMd5Hash(String input) {
462 try {
463 MessageDigest md = MessageDigest.getInstance("MD5");
464 md.update(getBytes(input));
465 byte byteData[] = md.digest();
466
467 StringBuffer hexString = new StringBuffer();
468 for (int i = 0; i < byteData.length; i++) {
469 String hex = Integer.toHexString(0xff & byteData[i]);
470 if (hex.length() == 1)
471 hexString.append('0');
472 hexString.append(hex);
473 }
474
475 return hexString.toString();
476 } catch (NoSuchAlgorithmException e) {
477 return input;
478 }
479 }
480
481 /**
482 * Remove the HTML content from the given input, and un-html-ize the rest.
483 *
484 * @param html
485 * the HTML-encoded content
486 *
487 * @return the HTML-free equivalent content
488 */
489 public static String unhtml(String html) {
490 StringBuilder builder = new StringBuilder();
491
492 int inTag = 0;
493 for (char car : html.toCharArray()) {
494 if (car == '<') {
495 inTag++;
496 } else if (car == '>') {
497 inTag--;
498 } else if (inTag <= 0) {
499 builder.append(car);
500 }
501 }
502
503 char nbsp = ' '; // non-breakable space (a special char)
504 char space = ' ';
505 return HtmlEscape.unescapeHtml(builder.toString()).replace(nbsp, space);
506 }
507
508 /**
509 * Escape the given {@link String} so it can be used in XML, as content.
510 *
511 * @param input
512 * the input {@link String}
513 *
514 * @return the escaped {@link String}
515 */
516 public static String xmlEscape(String input) {
517 if (input == null) {
518 return "";
519 }
520
521 return HtmlEscape.escapeHtml(input,
522 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
523 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
524 }
525
526 /**
527 * Escape the given {@link String} so it can be used in XML, as text content
528 * inside double-quotes.
529 *
530 * @param input
531 * the input {@link String}
532 *
533 * @return the escaped {@link String}
534 */
535 public static String xmlEscapeQuote(String input) {
536 if (input == null) {
537 return "";
538 }
539
540 return HtmlEscape.escapeHtml(input,
541 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
542 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
543 }
544
545 /**
546 * Zip the data and then encode it into Base64.
547 *
548 * @param data
549 * the data
550 *
551 * @return the Base64 zipped version
552 *
553 * @throws IOException
554 * in case of I/O error
555 */
556 public static String zip64(String data) throws IOException {
557 try {
558 return zip64(getBytes(data));
559 } catch (UnsupportedEncodingException e) {
560 // All conforming JVM are required to support UTF-8
561 e.printStackTrace();
562 return null;
563 }
564 }
565
566 /**
567 * Zip the data and then encode it into Base64.
568 *
569 * @param data
570 * the data
571 *
572 * @return the Base64 zipped version
573 *
574 * @throws IOException
575 * in case of I/O error
576 */
577 public static String zip64(byte[] data) throws IOException {
578 // 1. compress
579 ByteArrayOutputStream bout = new ByteArrayOutputStream();
580 try {
581 OutputStream out = new GZIPOutputStream(bout);
582 try {
583 out.write(data);
584 } finally {
585 out.close();
586 }
587 } finally {
588 data = bout.toByteArray();
589 bout.close();
590 }
591
592 // 2. base64
593 InputStream in = new ByteArrayInputStream(data);
594 try {
595 in = new Base64InputStream(in, true);
596 return new String(IOUtils.toByteArray(in), "UTF-8");
597 } finally {
598 in.close();
599 }
600 }
601
602 /**
603 * Unconvert from Base64 then unzip the content, which is assumed to be a
604 * String.
605 *
606 * @param data
607 * the data in Base64 format
608 *
609 * @return the raw data
610 *
611 * @throws IOException
612 * in case of I/O error
613 */
614 public static String unzip64s(String data) throws IOException {
615 return new String(unzip64(data), "UTF-8");
616 }
617
618 /**
619 * Unconvert from Base64 then unzip the content.
620 *
621 * @param data
622 * the data in Base64 format
623 *
624 * @return the raw data
625 *
626 * @throws IOException
627 * in case of I/O error
628 */
629 public static byte[] unzip64(String data) throws IOException {
630 InputStream in = new Base64InputStream(new ByteArrayInputStream(
631 getBytes(data)), false);
632 try {
633 in = new GZIPInputStream(in);
634 return IOUtils.toByteArray(in);
635 } finally {
636 in.close();
637 }
638 }
639
640 /**
641 * Convert the given data to Base64 format.
642 *
643 * @param data
644 * the data to convert
645 *
646 * @return the Base64 {@link String} representation of the data
647 *
648 * @throws IOException
649 * in case of I/O errors
650 */
651 public static String base64(String data) throws IOException {
652 return base64(getBytes(data));
653 }
654
655 /**
656 * Convert the given data to Base64 format.
657 *
658 * @param data
659 * the data to convert
660 *
661 * @return the Base64 {@link String} representation of the data
662 *
663 * @throws IOException
664 * in case of I/O errors
665 */
666 public static String base64(byte[] data) throws IOException {
667 Base64InputStream in = new Base64InputStream(new ByteArrayInputStream(
668 data), true);
669 try {
670 return new String(IOUtils.toByteArray(in), "UTF-8");
671 } finally {
672 in.close();
673 }
674 }
675
676 /**
677 * Unconvert the given data from Base64 format back to a raw array of bytes.
678 *
679 * @param data
680 * the data to unconvert
681 *
682 * @return the raw data represented by the given Base64 {@link String},
683 *
684 * @throws IOException
685 * in case of I/O errors
686 */
687 public static byte[] unbase64(String data) throws IOException {
688 Base64InputStream in = new Base64InputStream(new ByteArrayInputStream(
689 getBytes(data)), false);
690 try {
691 return IOUtils.toByteArray(in);
692 } finally {
693 in.close();
694 }
695 }
696
697 /**
698 * Unonvert the given data from Base64 format back to a {@link String}.
699 *
700 * @param data
701 * the data to unconvert
702 *
703 * @return the {@link String} represented by the given Base64 {@link String}
704 *
705 * @throws IOException
706 * in case of I/O errors
707 */
708 public static String unbase64s(String data) throws IOException {
709 return new String(unbase64(data), "UTF-8");
710 }
711
712 /**
713 * Return a display {@link String} for the given value, which can be
714 * suffixed with "k" or "M" depending upon the number, if it is big enough.
715 * <p>
716 * <p>
717 * Examples:
718 * <ul>
719 * <li><tt>8 765</tt> becomes "8 k"</li>
720 * <li><tt>998 765</tt> becomes "998 k"</li>
721 * <li><tt>12 987 364</tt> becomes "12 M"</li>
722 * <li><tt>5 534 333 221</tt> becomes "5 G"</li>
723 * </ul>
724 *
725 * @param value
726 * the value to convert
727 *
728 * @return the display value
729 */
730 public static String formatNumber(long value) {
731 return formatNumber(value, 0);
732 }
733
734 /**
735 * Return a display {@link String} for the given value, which can be
736 * suffixed with "k" or "M" depending upon the number, if it is big enough.
737 * <p>
738 * Examples (assuming decimalPositions = 1):
739 * <ul>
740 * <li><tt>8 765</tt> becomes "8.7 k"</li>
741 * <li><tt>998 765</tt> becomes "998.7 k"</li>
742 * <li><tt>12 987 364</tt> becomes "12.9 M"</li>
743 * <li><tt>5 534 333 221</tt> becomes "5.5 G"</li>
744 * </ul>
745 *
746 * @param value
747 * the value to convert
748 * @param decimalPositions
749 * the number of decimal positions to keep
750 *
751 * @return the display value
752 */
753 public static String formatNumber(long value, int decimalPositions) {
754 long userValue = value;
755 String suffix = " ";
756 long mult = 1;
757
758 if (value >= 1000000000l) {
759 mult = 1000000000l;
760 userValue = value / 1000000000l;
761 suffix = " G";
762 } else if (value >= 1000000l) {
763 mult = 1000000l;
764 userValue = value / 1000000l;
765 suffix = " M";
766 } else if (value >= 1000l) {
767 mult = 1000l;
768 userValue = value / 1000l;
769 suffix = " k";
770 }
771
772 String deci = "";
773 if (decimalPositions > 0) {
774 deci = Long.toString(value % mult);
775 int size = Long.toString(mult).length() - 1;
776 while (deci.length() < size) {
777 deci = "0" + deci;
778 }
779
780 deci = deci.substring(0, Math.min(decimalPositions, deci.length()));
781 while (deci.length() < decimalPositions) {
782 deci += "0";
783 }
784
785 deci = "." + deci;
786 }
787
788 return Long.toString(userValue) + deci + suffix;
789 }
790
791 /**
792 * The reverse operation to {@link StringUtils#formatNumber(long)}: it will
793 * read a "display" number that can contain a "M" or "k" suffix and return
794 * the full value.
795 * <p>
796 * Of course, the conversion to and from display form is lossy (example:
797 * <tt>6870</tt> to "6.5k" to <tt>6500</tt>).
798 *
799 * @param value
800 * the value in display form with possible "M" and "k" suffixes,
801 * can be NULL
802 *
803 * @return the value as a number, or 0 if not possible to convert
804 */
805 public static long toNumber(String value) {
806 return toNumber(value, 0l);
807 }
808
809 /**
810 * The reverse operation to {@link StringUtils#formatNumber(long)}: it will
811 * read a "display" number that can contain a "M" or "k" suffix and return
812 * the full value.
813 * <p>
814 * Of course, the conversion to and from display form is lossy (example:
815 * <tt>6870</tt> to "6.5k" to <tt>6500</tt>).
816 *
817 * @param value
818 * the value in display form with possible "M" and "k" suffixes,
819 * can be NULL
820 * @param def
821 * the default value if it is not possible to convert the given
822 * value to a number
823 *
824 * @return the value as a number, or 0 if not possible to convert
825 */
826 public static long toNumber(String value, long def) {
827 long count = def;
828 if (value != null) {
829 value = value.trim().toLowerCase();
830 try {
831 long mult = 1;
832 if (value.endsWith("g")) {
833 value = value.substring(0, value.length() - 1).trim();
834 mult = 1000000000;
835 } else if (value.endsWith("m")) {
836 value = value.substring(0, value.length() - 1).trim();
837 mult = 1000000;
838 } else if (value.endsWith("k")) {
839 value = value.substring(0, value.length() - 1).trim();
840 mult = 1000;
841 }
842
843 long deci = 0;
844 if (value.contains(".")) {
845 String[] tab = value.split("\\.");
846 if (tab.length != 2) {
847 throw new NumberFormatException(value);
848 }
849 double decimal = Double.parseDouble("0."
850 + tab[tab.length - 1]);
851 deci = ((long) (mult * decimal));
852 value = tab[0];
853 }
854 count = mult * Long.parseLong(value) + deci;
855 } catch (Exception e) {
856 }
857 }
858
859 return count;
860 }
861
862 /**
863 * Return the bytes array representation of the given {@link String} in
864 * UTF-8.
865 *
866 * @param str
867 * the {@link String} to transform into bytes
868 * @return the content in bytes
869 */
870 static public byte[] getBytes(String str) {
871 try {
872 return str.getBytes("UTF-8");
873 } catch (UnsupportedEncodingException e) {
874 // All conforming JVM must support UTF-8
875 e.printStackTrace();
876 return null;
877 }
878 }
879
880 /**
881 * The "remove accents" pattern.
882 *
883 * @return the pattern, or NULL if a problem happens
884 */
885 private static Pattern getMarks() {
886 try {
887 return Pattern
888 .compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
889 } catch (Exception e) {
890 // Can fail on Android...
891 return null;
892 }
893 }
894
895 //
896 // justify List<String> related:
897 //
898
899 /**
900 * Check if this line ends as a complete line (ends with a "." or similar).
901 * <p>
902 * Note that we consider an empty line as full, and a line ending with
903 * spaces as not complete.
904 *
905 * @param line
906 * the line to check
907 *
908 * @return TRUE if it does
909 */
910 static private boolean isFullLine(StringBuilder line) {
911 if (line.length() == 0) {
912 return true;
913 }
914
915 char lastCar = line.charAt(line.length() - 1);
916 switch (lastCar) {
917 case '.': // points
918 case '?':
919 case '!':
920
921 case '\'': // quotes
922 case '‘':
923 case '’':
924
925 case '"': // double quotes
926 case '”':
927 case '“':
928 case '»':
929 case '«':
930 return true;
931 default:
932 return false;
933 }
934 }
935
936 /**
937 * Check if this line represent an item in a list or description (i.e.,
938 * check that the first non-space char is "-").
939 *
940 * @param line
941 * the line to check
942 *
943 * @return TRUE if it is
944 */
945 static private boolean isItemLine(String line) {
946 String spacing = getItemSpacing(line);
947 return spacing != null && !spacing.isEmpty()
948 && line.charAt(spacing.length()) == '-';
949 }
950
951 /**
952 * Return all the spaces that start this line (or Empty if none).
953 *
954 * @param line
955 * the line to get the starting spaces from
956 *
957 * @return the left spacing
958 */
959 static private String getItemSpacing(String line) {
960 int i;
961 for (i = 0; i < line.length(); i++) {
962 if (line.charAt(i) != ' ') {
963 return line.substring(0, i);
964 }
965 }
966
967 return "";
968 }
969
970 /**
971 * This line is an horizontal spacer line.
972 *
973 * @param line
974 * the line to test
975 *
976 * @return TRUE if it is
977 */
978 static private boolean isHrLine(CharSequence line) {
979 int count = 0;
980 if (line != null) {
981 for (int i = 0; i < line.length(); i++) {
982 char car = line.charAt(i);
983 if (car == ' ' || car == '\t' || car == '*' || car == '-'
984 || car == '_' || car == '~' || car == '=' || car == '/'
985 || car == '\\') {
986 count++;
987 } else {
988 return false;
989 }
990 }
991 }
992
993 return count > 2;
994 }
995
996 // Deprecated functions, please do not use //
997
998 /**
999 * @deprecated please use {@link StringUtils#zip64(byte[])} or
1000 * {@link StringUtils#base64(byte[])} instead.
1001 *
1002 * @param data
1003 * the data to encode
1004 * @param zip
1005 * TRUE to zip it before Base64 encoding it, FALSE for Base64
1006 * encoding only
1007 *
1008 * @return the encoded data
1009 *
1010 * @throws IOException
1011 * in case of I/O error
1012 */
1013 @Deprecated
1014 public static String base64(String data, boolean zip) throws IOException {
1015 return base64(getBytes(data), zip);
1016 }
1017
1018 /**
1019 * @deprecated please use {@link StringUtils#zip64(String)} or
1020 * {@link StringUtils#base64(String)} instead.
1021 *
1022 * @param data
1023 * the data to encode
1024 * @param zip
1025 * TRUE to zip it before Base64 encoding it, FALSE for Base64
1026 * encoding only
1027 *
1028 * @return the encoded data
1029 *
1030 * @throws IOException
1031 * in case of I/O error
1032 */
1033 @Deprecated
1034 public static String base64(byte[] data, boolean zip) throws IOException {
1035 if (zip) {
1036 return zip64(data);
1037 }
1038
1039 Base64InputStream b64 = new Base64InputStream(new ByteArrayInputStream(
1040 data), true);
1041 try {
1042 return IOUtils.readSmallStream(b64);
1043 } finally {
1044 b64.close();
1045 }
1046 }
1047
1048 /**
1049 * @deprecated please use {@link Base64OutputStream} and
1050 * {@link GZIPOutputStream} instead.
1051 *
1052 * @param breakLines
1053 * NOT USED ANYMORE, it is always considered FALSE now
1054 */
1055 @Deprecated
1056 public static OutputStream base64(OutputStream data, boolean zip,
1057 boolean breakLines) throws IOException {
1058 OutputStream out = new Base64OutputStream(data);
1059 if (zip) {
1060 out = new java.util.zip.GZIPOutputStream(out);
1061 }
1062
1063 return out;
1064 }
1065
1066 /**
1067 * Unconvert the given data from Base64 format back to a raw array of bytes.
1068 * <p>
1069 * Will automatically detect zipped data and also uncompress it before
1070 * returning, unless ZIP is false.
1071 *
1072 * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped)
1073 *
1074 * @param data
1075 * the data to unconvert
1076 * @param zip
1077 * TRUE to also uncompress the data from a GZIP format
1078 * automatically; if set to FALSE, zipped data can be returned
1079 *
1080 * @return the raw data represented by the given Base64 {@link String},
1081 * optionally compressed with GZIP
1082 *
1083 * @throws IOException
1084 * in case of I/O errors
1085 */
1086 @Deprecated
1087 public static byte[] unbase64(String data, boolean zip) throws IOException {
1088 byte[] buffer = unbase64(data);
1089 if (!zip) {
1090 return buffer;
1091 }
1092
1093 try {
1094 GZIPInputStream zipped = new GZIPInputStream(
1095 new ByteArrayInputStream(buffer));
1096 try {
1097 ByteArrayOutputStream out = new ByteArrayOutputStream();
1098 try {
1099 IOUtils.write(zipped, out);
1100 return out.toByteArray();
1101 } finally {
1102 out.close();
1103 }
1104 } finally {
1105 zipped.close();
1106 }
1107 } catch (Exception e) {
1108 return buffer;
1109 }
1110 }
1111
1112 /**
1113 * Unconvert the given data from Base64 format back to a raw array of bytes.
1114 * <p>
1115 * Will automatically detect zipped data and also uncompress it before
1116 * returning, unless ZIP is false.
1117 *
1118 * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped)
1119 *
1120 * @param data
1121 * the data to unconvert
1122 * @param zip
1123 * TRUE to also uncompress the data from a GZIP format
1124 * automatically; if set to FALSE, zipped data can be returned
1125 *
1126 * @return the raw data represented by the given Base64 {@link String},
1127 * optionally compressed with GZIP
1128 *
1129 * @throws IOException
1130 * in case of I/O errors
1131 */
1132 @Deprecated
1133 public static InputStream unbase64(InputStream data, boolean zip)
1134 throws IOException {
1135 return new ByteArrayInputStream(unbase64(IOUtils.readSmallStream(data),
1136 zip));
1137 }
1138
1139 /**
1140 * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped)
1141 */
1142 @Deprecated
1143 public static byte[] unbase64(byte[] data, int offset, int count,
1144 boolean zip) throws IOException {
1145 byte[] dataPart = Arrays.copyOfRange(data, offset, offset + count);
1146 return unbase64(new String(dataPart, "UTF-8"), zip);
1147 }
1148
1149 /**
1150 * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped)
1151 */
1152 @Deprecated
1153 public static String unbase64s(String data, boolean zip) throws IOException {
1154 return new String(unbase64(data, zip), "UTF-8");
1155 }
1156
1157 /**
1158 * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped)
1159 */
1160 @Deprecated
1161 public static String unbase64s(byte[] data, int offset, int count,
1162 boolean zip) throws IOException {
1163 return new String(unbase64(data, offset, count, zip), "UTF-8");
1164 }
1165 }