1 package be
.nikiroo
.utils
;
3 import java
.io
.IOException
;
4 import java
.io
.InputStream
;
5 import java
.io
.OutputStream
;
6 import java
.io
.UnsupportedEncodingException
;
7 import java
.security
.MessageDigest
;
8 import java
.security
.NoSuchAlgorithmException
;
9 import java
.text
.Normalizer
;
10 import java
.text
.Normalizer
.Form
;
11 import java
.text
.ParseException
;
12 import java
.text
.SimpleDateFormat
;
13 import java
.util
.AbstractMap
;
14 import java
.util
.ArrayList
;
15 import java
.util
.Date
;
16 import java
.util
.List
;
17 import java
.util
.Map
.Entry
;
18 import java
.util
.regex
.Pattern
;
20 import org
.unbescape
.html
.HtmlEscape
;
21 import org
.unbescape
.html
.HtmlEscapeLevel
;
22 import org
.unbescape
.html
.HtmlEscapeType
;
25 * This class offer some utilities based around {@link String}s.
29 public class StringUtils
{
31 * This enum type will decide the alignment of a {@link String} when padding
32 * or justification is applied (if there is enough horizontal space for it
35 public enum Alignment
{
36 /** Aligned at left. */
40 /** Aligned at right. */
42 /** Full justified (to both left and right). */
45 // Old Deprecated values:
47 /** DEPRECATED: please use LEFT. */
50 /** DEPRECATED: please use CENTER. */
53 /** DEPRECATED: please use RIGHT. */
58 * Return the non-deprecated version of this enum if needed (or return
61 * @return the non-deprecated value
63 Alignment
undeprecate() {
64 if (this == Beginning
)
74 static private Pattern marks
= getMarks();
77 * Fix the size of the given {@link String} either with space-padding or by
81 * the {@link String} to fix
83 * the size of the resulting {@link String} or -1 for a noop
85 * @return the resulting {@link String} of size <i>size</i>
87 static public String
padString(String text
, int width
) {
88 return padString(text
, width
, true, null);
92 * Fix the size of the given {@link String} either with space-padding or by
93 * optionally shortening it.
96 * the {@link String} to fix
98 * the size of the resulting {@link String} if the text fits or
99 * if cut is TRUE or -1 for a noop
101 * cut the {@link String} shorter if needed
103 * align the {@link String} in this position if we have enough
104 * space (default is Alignment.Beginning)
106 * @return the resulting {@link String} of size <i>size</i> minimum
108 static public String
padString(String text
, int width
, boolean cut
,
112 align
= Alignment
.LEFT
;
115 align
= align
.undeprecate();
121 int diff
= width
- text
.length();
125 text
= text
.substring(0, width
);
126 } else if (diff
> 0) {
127 if (diff
< 2 && align
!= Alignment
.RIGHT
)
128 align
= Alignment
.LEFT
;
132 text
= new String(new char[diff
]).replace('\0', ' ') + text
;
135 int pad1
= (diff
) / 2;
136 int pad2
= (diff
+ 1) / 2;
137 text
= new String(new char[pad1
]).replace('\0', ' ') + text
138 + new String(new char[pad2
]).replace('\0', ' ');
142 text
= text
+ new String(new char[diff
]).replace('\0', ' ');
152 * Justify a text into width-sized (at the maximum) lines.
155 * the {@link String} to justify
157 * the maximum size of the resulting lines
159 * @return a list of justified text lines
161 static public List
<String
> justifyText(String text
, int width
) {
162 return justifyText(text
, width
, null);
166 * Justify a text into width-sized (at the maximum) lines.
169 * the {@link String} to justify
171 * the maximum size of the resulting lines
173 * align the lines in this position (default is
174 * Alignment.Beginning)
176 * @return a list of justified text lines
178 static public List
<String
> justifyText(String text
, int width
,
181 align
= Alignment
.LEFT
;
184 align
= align
.undeprecate();
188 return StringJustifier
.center(text
, width
);
190 return StringJustifier
.right(text
, width
);
192 return StringJustifier
.full(text
, width
);
195 return StringJustifier
.left(text
, width
);
200 * Justify a text into width-sized (at the maximum) lines.
203 * the {@link String} to justify
205 * the maximum size of the resulting lines
207 * @return a list of justified text lines
209 static public List
<String
> justifyText(List
<String
> text
, int width
) {
210 return justifyText(text
, width
, null);
214 * Justify a text into width-sized (at the maximum) lines.
217 * the {@link String} to justify
219 * the maximum size of the resulting lines
221 * align the lines in this position (default is
222 * Alignment.Beginning)
224 * @return a list of justified text lines
226 static public List
<String
> justifyText(List
<String
> text
, int width
,
228 List
<String
> result
= new ArrayList
<String
>();
230 // Content <-> Bullet spacing (null = no spacing)
231 List
<Entry
<String
, String
>> lines
= new ArrayList
<Entry
<String
, String
>>();
232 StringBuilder previous
= null;
233 StringBuilder tmp
= new StringBuilder();
234 String previousItemBulletSpacing
= null;
235 String itemBulletSpacing
= null;
236 for (String inputLine
: text
) {
237 boolean previousLineComplete
= true;
239 String current
= inputLine
.replace("\t", " ");
240 itemBulletSpacing
= getItemSpacing(current
);
241 boolean bullet
= isItemLine(current
);
242 if ((previousItemBulletSpacing
== null || itemBulletSpacing
243 .length() <= previousItemBulletSpacing
.length()) && !bullet
) {
244 itemBulletSpacing
= null;
247 if (itemBulletSpacing
!= null) {
248 current
= current
.trim();
249 if (!current
.isEmpty() && bullet
) {
250 current
= current
.substring(1);
252 current
= current
.trim();
253 previousLineComplete
= bullet
;
256 for (String word
: current
.split(" ")) {
257 if (word
.isEmpty()) {
261 if (tmp
.length() > 0) {
264 tmp
.append(word
.trim());
266 current
= tmp
.toString();
268 previousLineComplete
= current
.isEmpty()
269 || previousItemBulletSpacing
!= null
270 || (previous
!= null && isFullLine(previous
))
271 || isHrLine(current
) || isHrLine(previous
);
274 if (previous
== null) {
275 previous
= new StringBuilder();
277 if (previousLineComplete
) {
278 lines
.add(new AbstractMap
.SimpleEntry
<String
, String
>(
279 previous
.toString(), previousItemBulletSpacing
));
280 previous
.setLength(0);
281 previousItemBulletSpacing
= itemBulletSpacing
;
283 previous
.append(' ');
287 previous
.append(current
);
291 if (previous
!= null) {
292 lines
.add(new AbstractMap
.SimpleEntry
<String
, String
>(previous
293 .toString(), previousItemBulletSpacing
));
296 for (Entry
<String
, String
> line
: lines
) {
297 String content
= line
.getKey();
298 String spacing
= line
.getValue();
300 String bullet
= "- ";
301 if (spacing
== null) {
306 if (spacing
.length() > width
+ 3) {
310 for (String subline
: StringUtils
.justifyText(content
, width
311 - (spacing
.length() + bullet
.length()), align
)) {
312 result
.add(spacing
+ bullet
+ subline
);
313 if (!bullet
.isEmpty()) {
323 * Sanitise the given input to make it more Terminal-friendly by removing
324 * combining characters.
327 * the input to sanitise
328 * @param allowUnicode
329 * allow Unicode or only allow ASCII Latin characters
331 * @return the sanitised {@link String}
333 static public String
sanitize(String input
, boolean allowUnicode
) {
334 return sanitize(input
, allowUnicode
, !allowUnicode
);
338 * Sanitise the given input to make it more Terminal-friendly by removing
339 * combining characters.
342 * the input to sanitise
343 * @param allowUnicode
344 * allow Unicode or only allow ASCII Latin characters
345 * @param removeAllAccents
346 * TRUE to replace all accentuated characters by their non
347 * accentuated counter-parts
349 * @return the sanitised {@link String}
351 static public String
sanitize(String input
, boolean allowUnicode
,
352 boolean removeAllAccents
) {
354 if (removeAllAccents
) {
355 input
= Normalizer
.normalize(input
, Form
.NFKD
);
357 input
= marks
.matcher(input
).replaceAll("");
361 input
= Normalizer
.normalize(input
, Form
.NFKC
);
364 StringBuilder builder
= new StringBuilder();
365 for (int index
= 0; index
< input
.length(); index
++) {
366 char car
= input
.charAt(index
);
367 // displayable chars in ASCII are in the range 32<->255,
369 if (car
>= 32 && car
<= 255 && car
!= 127) {
373 input
= builder
.toString();
380 * Convert between the time in milliseconds to a {@link String} in a "fixed"
381 * way (to exchange data over the wire, for instance).
383 * Precise to the second.
386 * the specified number of milliseconds since the standard base
387 * time known as "the epoch", namely January 1, 1970, 00:00:00
390 * @return the time as a {@link String}
392 static public String
fromTime(long time
) {
393 SimpleDateFormat sdf
= new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
394 return sdf
.format(new Date(time
));
398 * Convert between the time as a {@link String} to milliseconds in a "fixed"
399 * way (to exchange data over the wire, for instance).
401 * Precise to the second.
404 * the time as a {@link String}
406 * @return the number of milliseconds since the standard base time known as
407 * "the epoch", namely January 1, 1970, 00:00:00 GMT, or -1 in case
410 * @throws ParseException
411 * in case of parse error
413 static public long toTime(String displayTime
) throws ParseException
{
414 SimpleDateFormat sdf
= new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
415 return sdf
.parse(displayTime
).getTime();
419 * Return a hash of the given {@link String}.
426 static public String
getMd5Hash(String input
) {
428 MessageDigest md
= MessageDigest
.getInstance("MD5");
429 md
.update(input
.getBytes("UTF-8"));
430 byte byteData
[] = md
.digest();
432 StringBuffer hexString
= new StringBuffer();
433 for (int i
= 0; i
< byteData
.length
; i
++) {
434 String hex
= Integer
.toHexString(0xff & byteData
[i
]);
435 if (hex
.length() == 1)
436 hexString
.append('0');
437 hexString
.append(hex
);
440 return hexString
.toString();
441 } catch (NoSuchAlgorithmException e
) {
443 } catch (UnsupportedEncodingException e
) {
449 * Remove the HTML content from the given input, and un-html-ize the rest.
452 * the HTML-encoded content
454 * @return the HTML-free equivalent content
456 public static String
unhtml(String html
) {
457 StringBuilder builder
= new StringBuilder();
460 for (char car
: html
.toCharArray()) {
463 } else if (car
== '>') {
465 } else if (inTag
<= 0) {
470 char nbsp
= ' '; // non-breakable space (a special char)
472 return HtmlEscape
.unescapeHtml(builder
.toString()).replace(nbsp
, space
);
476 * Escape the given {@link String} so it can be used in XML, as content.
479 * the input {@link String}
481 * @return the escaped {@link String}
483 public static String
xmlEscape(String input
) {
488 return HtmlEscape
.escapeHtml(input
,
489 HtmlEscapeType
.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA
,
490 HtmlEscapeLevel
.LEVEL_1_ONLY_MARKUP_SIGNIFICANT
);
494 * Escape the given {@link String} so it can be used in XML, as text content
495 * inside double-quotes.
498 * the input {@link String}
500 * @return the escaped {@link String}
502 public static String
xmlEscapeQuote(String input
) {
507 return HtmlEscape
.escapeHtml(input
,
508 HtmlEscapeType
.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA
,
509 HtmlEscapeLevel
.LEVEL_1_ONLY_MARKUP_SIGNIFICANT
);
513 * Zip the data and then encode it into Base64.
515 * @deprecated use {@link StringUtils#base64(byte[], boolean)} with the
516 * correct parameter instead
521 * @return the Base64 zipped version
524 public static String
zip64(String data
) {
526 return Base64
.encodeBytes(data
.getBytes("UTF-8"), Base64
.GZIP
);
527 } catch (IOException e
) {
534 * Unconvert from Base64 then unzip the content.
536 * @deprecated use {@link StringUtils#unbase64s(String, boolean)} with the
537 * correct parameter instead
540 * the data in Base64 format
542 * @return the raw data
544 * @throws IOException
545 * in case of I/O error
548 public static String
unzip64(String data
) throws IOException
{
549 return new String(Base64
.decode(data
, Base64
.GZIP
), "UTF-8");
553 * Convert the given data to Base64 format.
556 * the data to convert
558 * TRUE to also compress the data in GZIP format; remember that
559 * compressed and not-compressed content are different; you need
560 * to know which is which when decoding
562 * @return the Base64 {@link String} representation of the data
564 * @throws IOException
565 * in case of I/O errors
567 public static String
base64(String data
, boolean zip
) throws IOException
{
568 return base64(data
.getBytes("UTF-8"), zip
);
572 * Convert the given data to Base64 format.
575 * the data to convert
577 * TRUE to also compress the data in GZIP format; remember that
578 * compressed and not-compressed content are different; you need
579 * to know which is which when decoding
581 * @return the Base64 {@link String} representation of the data
583 * @throws IOException
584 * in case of I/O errors
586 public static String
base64(byte[] data
, boolean zip
) throws IOException
{
587 return Base64
.encodeBytes(data
, zip ? Base64
.GZIP
: Base64
.NO_OPTIONS
);
591 * Convert the given data to Base64 format.
594 * the data to convert
596 * TRUE to also uncompress the data from a GZIP format; take care
597 * about this flag, as it could easily cause errors in the
598 * returned content or an {@link IOException}
600 * TRUE to break lines on every 76th character
602 * @return the Base64 {@link String} representation of the data
604 * @throws IOException
605 * in case of I/O errors
607 public static OutputStream
base64(OutputStream data
, boolean zip
,
608 boolean breakLines
) throws IOException
{
609 OutputStream out
= new Base64
.OutputStream(data
,
610 breakLines ? Base64
.DO_BREAK_LINES
& Base64
.ENCODE
614 out
= new java
.util
.zip
.GZIPOutputStream(out
);
621 * Convert the given data to Base64 format.
624 * the data to convert
626 * TRUE to also uncompress the data from a GZIP format; take care
627 * about this flag, as it could easily cause errors in the
628 * returned content or an {@link IOException}
630 * TRUE to break lines on every 76th character
632 * @return the Base64 {@link String} representation of the data
634 * @throws IOException
635 * in case of I/O errors
637 public static InputStream
base64(InputStream data
, boolean zip
,
638 boolean breakLines
) throws IOException
{
640 data
= new java
.util
.zip
.GZIPInputStream(data
);
643 return new Base64
.InputStream(data
, breakLines ? Base64
.DO_BREAK_LINES
644 & Base64
.ENCODE
: Base64
.ENCODE
);
648 * Unconvert the given data from Base64 format back to a raw array of bytes.
650 * Will automatically detect zipped data and also uncompress it before
651 * returning, unless ZIP is false.
654 * the data to unconvert
656 * TRUE to also uncompress the data from a GZIP format
657 * automatically; if set to FALSE, zipped data can be returned
659 * @return the raw data represented by the given Base64 {@link String},
660 * optionally compressed with GZIP
662 * @throws IOException
663 * in case of I/O errors
665 public static byte[] unbase64(String data
, boolean zip
) throws IOException
{
667 .decode(data
, zip ? Base64
.NO_OPTIONS
: Base64
.DONT_GUNZIP
);
671 * Unconvert the given data from Base64 format back to a raw array of bytes.
674 * the data to unconvert
676 * TRUE to also uncompress the data from a GZIP format; take care
677 * about this flag, as it could easily cause errors in the
678 * returned content or an {@link IOException}
680 * @return the raw data represented by the given Base64 {@link String}
682 * @throws IOException
683 * in case of I/O errors
685 public static OutputStream
unbase64(OutputStream data
, boolean zip
)
687 OutputStream out
= new Base64
.OutputStream(data
, Base64
.DECODE
);
690 out
= new java
.util
.zip
.GZIPOutputStream(out
);
697 * Unconvert the given data from Base64 format back to a raw array of bytes.
700 * the data to unconvert
702 * TRUE to also uncompress the data from a GZIP format; take care
703 * about this flag, as it could easily cause errors in the
704 * returned content or an {@link IOException}
706 * @return the raw data represented by the given Base64 {@link String}
708 * @throws IOException
709 * in case of I/O errors
711 public static InputStream
unbase64(InputStream data
, boolean zip
)
714 data
= new java
.util
.zip
.GZIPInputStream(data
);
717 return new Base64
.InputStream(data
, Base64
.DECODE
);
721 * Unconvert the given data from Base64 format back to a raw array of bytes.
723 * Will automatically detect zipped data and also uncompress it before
724 * returning, unless ZIP is false.
727 * the data to unconvert
729 * the offset at which to start taking the data (do not take the
730 * data before it into account)
732 * the number of bytes to take into account (do not process after
733 * this number of bytes has been processed)
735 * TRUE to also uncompress the data from a GZIP format
736 * automatically; if set to FALSE, zipped data can be returned
738 * @return the raw data represented by the given Base64 {@link String}
740 * @throws IOException
741 * in case of I/O errors
743 public static byte[] unbase64(byte[] data
, int offset
, int count
,
744 boolean zip
) throws IOException
{
745 return Base64
.niki_decode(data
, offset
, count
, zip ? Base64
.NO_OPTIONS
746 : Base64
.DONT_GUNZIP
);
750 * Unonvert the given data from Base64 format back to a {@link String}.
752 * Will automatically detect zipped data and also uncompress it before
753 * returning, unless ZIP is false.
756 * the data to unconvert
758 * TRUE to also uncompress the data from a GZIP format
759 * automatically; if set to FALSE, zipped data can be returned
761 * @return the {@link String} represented by the given Base64 {@link String}
762 * , optionally compressed with GZIP
764 * @throws IOException
765 * in case of I/O errors
767 public static String
unbase64s(String data
, boolean zip
) throws IOException
{
768 return new String(unbase64(data
, zip
), "UTF-8");
772 * Unconvert the given data from Base64 format back into a {@link String}.
775 * the data to unconvert
777 * the offset at which to start taking the data (do not take the
778 * data before it into account)
780 * the number of bytes to take into account (do not process after
781 * this number of bytes has been processed)
783 * TRUE to also uncompress the data from a GZIP format; take care
784 * about this flag, as it could easily cause errors in the
785 * returned content or an {@link IOException}
787 * @return the {@link String} represented by the given Base64 {@link String}
788 * , optionally compressed with GZIP
790 * @throws IOException
791 * in case of I/O errors
793 public static String
unbase64s(byte[] data
, int offset
, int count
,
794 boolean zip
) throws IOException
{
795 return new String(unbase64(data
, offset
, count
, zip
), "UTF-8");
799 * Return a display {@link String} for the given value, which can be
800 * suffixed with "k" or "M" depending upon the number, if it is big enough.
805 * <li><tt>8 765</tt> becomes "8k"</li>
806 * <li><tt>998 765</tt> becomes "998k"</li>
807 * <li><tt>12 987 364</tt> becomes "12M"</li>
808 * <li><tt>5 534 333 221</tt> becomes "5G"</li>
812 * the value to convert
814 * @return the display value
816 public static String
formatNumber(long value
) {
817 return formatNumber(value
, 0);
821 * Return a display {@link String} for the given value, which can be
822 * suffixed with "k" or "M" depending upon the number, if it is big enough.
824 * Examples (assuming decimalPositions = 1):
826 * <li><tt>8 765</tt> becomes "8.7k"</li>
827 * <li><tt>998 765</tt> becomes "998.7k"</li>
828 * <li><tt>12 987 364</tt> becomes "12.9M"</li>
829 * <li><tt>5 534 333 221</tt> becomes "5.5G"</li>
833 * the value to convert
834 * @param decimalPositions
835 * the number of decimal positions to keep
837 * @return the display value
839 public static String
formatNumber(long value
, int decimalPositions
) {
840 long userValue
= value
;
844 if (value
>= 1000000000l) {
846 userValue
= value
/ 1000000000l;
848 } else if (value
>= 1000000l) {
850 userValue
= value
/ 1000000l;
852 } else if (value
>= 1000l) {
854 userValue
= value
/ 1000l;
859 if (decimalPositions
> 0) {
860 deci
= Long
.toString(value
% mult
);
861 int size
= Long
.toString(mult
).length() - 1;
862 while (deci
.length() < size
) {
866 deci
= deci
.substring(0, Math
.min(decimalPositions
, deci
.length()));
867 while (deci
.length() < decimalPositions
) {
874 return Long
.toString(userValue
) + deci
+ suffix
;
878 * The reverse operation to {@link StringUtils#formatNumber(long)}: it will
879 * read a "display" number that can contain a "M" or "k" suffix and return
882 * Of course, the conversion to and from display form is lossy (example:
883 * <tt>6870</tt> to "6.5k" to <tt>6500</tt>).
886 * the value in display form with possible "M" and "k" suffixes,
889 * @return the value as a number, or 0 if not possible to convert
891 public static long toNumber(String value
) {
892 return toNumber(value
, 0l);
896 * The reverse operation to {@link StringUtils#formatNumber(long)}: it will
897 * read a "display" number that can contain a "M" or "k" suffix and return
900 * Of course, the conversion to and from display form is lossy (example:
901 * <tt>6870</tt> to "6.5k" to <tt>6500</tt>).
904 * the value in display form with possible "M" and "k" suffixes,
907 * the default value if it is not possible to convert the given
910 * @return the value as a number, or 0 if not possible to convert
912 public static long toNumber(String value
, long def
) {
915 value
= value
.trim().toLowerCase();
918 if (value
.endsWith("g")) {
919 value
= value
.substring(0, value
.length() - 1).trim();
921 } else if (value
.endsWith("m")) {
922 value
= value
.substring(0, value
.length() - 1).trim();
924 } else if (value
.endsWith("k")) {
925 value
= value
.substring(0, value
.length() - 1).trim();
930 if (value
.contains(".")) {
931 String
[] tab
= value
.split("\\.");
932 if (tab
.length
!= 2) {
933 throw new NumberFormatException(value
);
935 double decimal
= Double
.parseDouble("0."
936 + tab
[tab
.length
- 1]);
937 deci
= ((long) (mult
* decimal
));
940 count
= mult
* Long
.parseLong(value
) + deci
;
941 } catch (Exception e
) {
949 * The "remove accents" pattern.
951 * @return the pattern, or NULL if a problem happens
953 private static Pattern
getMarks() {
956 .compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
957 } catch (Exception e
) {
958 // Can fail on Android...
964 // justify List<String> related:
968 * Check if this line ends as a complete line (ends with a "." or similar).
970 * Note that we consider an empty line as full, and a line ending with
971 * spaces as not complete.
976 * @return TRUE if it does
978 static private boolean isFullLine(StringBuilder line
) {
979 if (line
.length() == 0) {
983 char lastCar
= line
.charAt(line
.length() - 1);
993 case '"': // double quotes
1005 * Check if this line represent an item in a list or description (i.e.,
1006 * check that the first non-space char is "-").
1011 * @return TRUE if it is
1013 static private boolean isItemLine(String line
) {
1014 String spacing
= getItemSpacing(line
);
1015 return spacing
!= null && !spacing
.isEmpty()
1016 && line
.charAt(spacing
.length()) == '-';
1020 * Return all the spaces that start this line (or Empty if none).
1023 * the line to get the starting spaces from
1025 * @return the left spacing
1027 static private String
getItemSpacing(String line
) {
1029 for (i
= 0; i
< line
.length(); i
++) {
1030 if (line
.charAt(i
) != ' ') {
1031 return line
.substring(0, i
);
1039 * This line is an horizontal spacer line.
1044 * @return TRUE if it is
1046 static private boolean isHrLine(CharSequence line
) {
1049 for (int i
= 0; i
< line
.length(); i
++) {
1050 char car
= line
.charAt(i
);
1051 if (car
== ' ' || car
== '\t' || car
== '*' || car
== '-'
1052 || car
== '_' || car
== '~' || car
== '=' || car
== '/'