1 package be
.nikiroo
.utils
;
3 import java
.io
.IOException
;
4 import java
.io
.InputStream
;
5 import java
.io
.OutputStream
;
6 import java
.io
.UnsupportedEncodingException
;
7 import java
.security
.MessageDigest
;
8 import java
.security
.NoSuchAlgorithmException
;
9 import java
.text
.Normalizer
;
10 import java
.text
.Normalizer
.Form
;
11 import java
.text
.ParseException
;
12 import java
.text
.SimpleDateFormat
;
13 import java
.util
.AbstractMap
;
14 import java
.util
.ArrayList
;
15 import java
.util
.Date
;
16 import java
.util
.List
;
17 import java
.util
.Map
.Entry
;
18 import java
.util
.regex
.Pattern
;
20 import org
.unbescape
.html
.HtmlEscape
;
21 import org
.unbescape
.html
.HtmlEscapeLevel
;
22 import org
.unbescape
.html
.HtmlEscapeType
;
24 import be
.nikiroo
.utils
.streams
.Base64InputStream
;
25 import be
.nikiroo
.utils
.streams
.Base64OutputStream
;
28 * This class offer some utilities based around {@link String}s.
32 public class StringUtils
{
34 * This enum type will decide the alignment of a {@link String} when padding
35 * or justification is applied (if there is enough horizontal space for it
38 public enum Alignment
{
39 /** Aligned at left. */
43 /** Aligned at right. */
45 /** Full justified (to both left and right). */
48 // Old Deprecated values:
50 /** DEPRECATED: please use LEFT. */
53 /** DEPRECATED: please use CENTER. */
56 /** DEPRECATED: please use RIGHT. */
61 * Return the non-deprecated version of this enum if needed (or return
64 * @return the non-deprecated value
66 Alignment
undeprecate() {
67 if (this == Beginning
)
77 static private Pattern marks
= getMarks();
80 * Fix the size of the given {@link String} either with space-padding or by
84 * the {@link String} to fix
86 * the size of the resulting {@link String} or -1 for a noop
88 * @return the resulting {@link String} of size <i>size</i>
90 static public String
padString(String text
, int width
) {
91 return padString(text
, width
, true, null);
95 * Fix the size of the given {@link String} either with space-padding or by
96 * optionally shortening it.
99 * the {@link String} to fix
101 * the size of the resulting {@link String} if the text fits or
102 * if cut is TRUE or -1 for a noop
104 * cut the {@link String} shorter if needed
106 * align the {@link String} in this position if we have enough
107 * space (default is Alignment.Beginning)
109 * @return the resulting {@link String} of size <i>size</i> minimum
111 static public String
padString(String text
, int width
, boolean cut
,
115 align
= Alignment
.LEFT
;
118 align
= align
.undeprecate();
124 int diff
= width
- text
.length();
128 text
= text
.substring(0, width
);
129 } else if (diff
> 0) {
130 if (diff
< 2 && align
!= Alignment
.RIGHT
)
131 align
= Alignment
.LEFT
;
135 text
= new String(new char[diff
]).replace('\0', ' ') + text
;
138 int pad1
= (diff
) / 2;
139 int pad2
= (diff
+ 1) / 2;
140 text
= new String(new char[pad1
]).replace('\0', ' ') + text
141 + new String(new char[pad2
]).replace('\0', ' ');
145 text
= text
+ new String(new char[diff
]).replace('\0', ' ');
155 * Justify a text into width-sized (at the maximum) lines.
158 * the {@link String} to justify
160 * the maximum size of the resulting lines
162 * @return a list of justified text lines
164 static public List
<String
> justifyText(String text
, int width
) {
165 return justifyText(text
, width
, null);
169 * Justify a text into width-sized (at the maximum) lines.
172 * the {@link String} to justify
174 * the maximum size of the resulting lines
176 * align the lines in this position (default is
177 * Alignment.Beginning)
179 * @return a list of justified text lines
181 static public List
<String
> justifyText(String text
, int width
,
184 align
= Alignment
.LEFT
;
187 align
= align
.undeprecate();
191 return StringJustifier
.center(text
, width
);
193 return StringJustifier
.right(text
, width
);
195 return StringJustifier
.full(text
, width
);
198 return StringJustifier
.left(text
, width
);
203 * Justify a text into width-sized (at the maximum) lines.
206 * the {@link String} to justify
208 * the maximum size of the resulting lines
210 * @return a list of justified text lines
212 static public List
<String
> justifyText(List
<String
> text
, int width
) {
213 return justifyText(text
, width
, null);
217 * Justify a text into width-sized (at the maximum) lines.
220 * the {@link String} to justify
222 * the maximum size of the resulting lines
224 * align the lines in this position (default is
225 * Alignment.Beginning)
227 * @return a list of justified text lines
229 static public List
<String
> justifyText(List
<String
> text
, int width
,
231 List
<String
> result
= new ArrayList
<String
>();
233 // Content <-> Bullet spacing (null = no spacing)
234 List
<Entry
<String
, String
>> lines
= new ArrayList
<Entry
<String
, String
>>();
235 StringBuilder previous
= null;
236 StringBuilder tmp
= new StringBuilder();
237 String previousItemBulletSpacing
= null;
238 String itemBulletSpacing
= null;
239 for (String inputLine
: text
) {
240 boolean previousLineComplete
= true;
242 String current
= inputLine
.replace("\t", " ");
243 itemBulletSpacing
= getItemSpacing(current
);
244 boolean bullet
= isItemLine(current
);
245 if ((previousItemBulletSpacing
== null || itemBulletSpacing
246 .length() <= previousItemBulletSpacing
.length()) && !bullet
) {
247 itemBulletSpacing
= null;
250 if (itemBulletSpacing
!= null) {
251 current
= current
.trim();
252 if (!current
.isEmpty() && bullet
) {
253 current
= current
.substring(1);
255 current
= current
.trim();
256 previousLineComplete
= bullet
;
259 for (String word
: current
.split(" ")) {
260 if (word
.isEmpty()) {
264 if (tmp
.length() > 0) {
267 tmp
.append(word
.trim());
269 current
= tmp
.toString();
271 previousLineComplete
= current
.isEmpty()
272 || previousItemBulletSpacing
!= null
273 || (previous
!= null && isFullLine(previous
))
274 || isHrLine(current
) || isHrLine(previous
);
277 if (previous
== null) {
278 previous
= new StringBuilder();
280 if (previousLineComplete
) {
281 lines
.add(new AbstractMap
.SimpleEntry
<String
, String
>(
282 previous
.toString(), previousItemBulletSpacing
));
283 previous
.setLength(0);
284 previousItemBulletSpacing
= itemBulletSpacing
;
286 previous
.append(' ');
290 previous
.append(current
);
294 if (previous
!= null) {
295 lines
.add(new AbstractMap
.SimpleEntry
<String
, String
>(previous
296 .toString(), previousItemBulletSpacing
));
299 for (Entry
<String
, String
> line
: lines
) {
300 String content
= line
.getKey();
301 String spacing
= line
.getValue();
303 String bullet
= "- ";
304 if (spacing
== null) {
309 if (spacing
.length() > width
+ 3) {
313 for (String subline
: StringUtils
.justifyText(content
, width
314 - (spacing
.length() + bullet
.length()), align
)) {
315 result
.add(spacing
+ bullet
+ subline
);
316 if (!bullet
.isEmpty()) {
326 * Sanitise the given input to make it more Terminal-friendly by removing
327 * combining characters.
330 * the input to sanitise
331 * @param allowUnicode
332 * allow Unicode or only allow ASCII Latin characters
334 * @return the sanitised {@link String}
336 static public String
sanitize(String input
, boolean allowUnicode
) {
337 return sanitize(input
, allowUnicode
, !allowUnicode
);
341 * Sanitise the given input to make it more Terminal-friendly by removing
342 * combining characters.
345 * the input to sanitise
346 * @param allowUnicode
347 * allow Unicode or only allow ASCII Latin characters
348 * @param removeAllAccents
349 * TRUE to replace all accentuated characters by their non
350 * accentuated counter-parts
352 * @return the sanitised {@link String}
354 static public String
sanitize(String input
, boolean allowUnicode
,
355 boolean removeAllAccents
) {
357 if (removeAllAccents
) {
358 input
= Normalizer
.normalize(input
, Form
.NFKD
);
360 input
= marks
.matcher(input
).replaceAll("");
364 input
= Normalizer
.normalize(input
, Form
.NFKC
);
367 StringBuilder builder
= new StringBuilder();
368 for (int index
= 0; index
< input
.length(); index
++) {
369 char car
= input
.charAt(index
);
370 // displayable chars in ASCII are in the range 32<->255,
372 if (car
>= 32 && car
<= 255 && car
!= 127) {
376 input
= builder
.toString();
383 * Convert between the time in milliseconds to a {@link String} in a "fixed"
384 * way (to exchange data over the wire, for instance).
386 * Precise to the second.
389 * the specified number of milliseconds since the standard base
390 * time known as "the epoch", namely January 1, 1970, 00:00:00
393 * @return the time as a {@link String}
395 static public String
fromTime(long time
) {
396 SimpleDateFormat sdf
= new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
397 return sdf
.format(new Date(time
));
401 * Convert between the time as a {@link String} to milliseconds in a "fixed"
402 * way (to exchange data over the wire, for instance).
404 * Precise to the second.
407 * the time as a {@link String}
409 * @return the number of milliseconds since the standard base time known as
410 * "the epoch", namely January 1, 1970, 00:00:00 GMT, or -1 in case
413 * @throws ParseException
414 * in case of parse error
416 static public long toTime(String displayTime
) throws ParseException
{
417 SimpleDateFormat sdf
= new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
418 return sdf
.parse(displayTime
).getTime();
422 * Return a hash of the given {@link String}.
429 static public String
getMd5Hash(String input
) {
431 MessageDigest md
= MessageDigest
.getInstance("MD5");
432 md
.update(input
.getBytes("UTF-8"));
433 byte byteData
[] = md
.digest();
435 StringBuffer hexString
= new StringBuffer();
436 for (int i
= 0; i
< byteData
.length
; i
++) {
437 String hex
= Integer
.toHexString(0xff & byteData
[i
]);
438 if (hex
.length() == 1)
439 hexString
.append('0');
440 hexString
.append(hex
);
443 return hexString
.toString();
444 } catch (NoSuchAlgorithmException e
) {
446 } catch (UnsupportedEncodingException e
) {
452 * Remove the HTML content from the given input, and un-html-ize the rest.
455 * the HTML-encoded content
457 * @return the HTML-free equivalent content
459 public static String
unhtml(String html
) {
460 StringBuilder builder
= new StringBuilder();
463 for (char car
: html
.toCharArray()) {
466 } else if (car
== '>') {
468 } else if (inTag
<= 0) {
473 char nbsp
= ' '; // non-breakable space (a special char)
475 return HtmlEscape
.unescapeHtml(builder
.toString()).replace(nbsp
, space
);
479 * Escape the given {@link String} so it can be used in XML, as content.
482 * the input {@link String}
484 * @return the escaped {@link String}
486 public static String
xmlEscape(String input
) {
491 return HtmlEscape
.escapeHtml(input
,
492 HtmlEscapeType
.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA
,
493 HtmlEscapeLevel
.LEVEL_1_ONLY_MARKUP_SIGNIFICANT
);
497 * Escape the given {@link String} so it can be used in XML, as text content
498 * inside double-quotes.
501 * the input {@link String}
503 * @return the escaped {@link String}
505 public static String
xmlEscapeQuote(String input
) {
510 return HtmlEscape
.escapeHtml(input
,
511 HtmlEscapeType
.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA
,
512 HtmlEscapeLevel
.LEVEL_1_ONLY_MARKUP_SIGNIFICANT
);
516 * Zip the data and then encode it into Base64.
518 * @deprecated use {@link StringUtils#base64(byte[], boolean)} with the
519 * correct parameter instead
524 * @return the Base64 zipped version
527 public static String
zip64(String data
) {
529 return Base64
.encodeBytes(data
.getBytes("UTF-8"), Base64
.GZIP
);
530 } catch (IOException e
) {
537 * Unconvert from Base64 then unzip the content.
539 * @deprecated use {@link StringUtils#unbase64s(String, boolean)} with the
540 * correct parameter instead
543 * the data in Base64 format
545 * @return the raw data
547 * @throws IOException
548 * in case of I/O error
551 public static String
unzip64(String data
) throws IOException
{
552 return new String(Base64
.decode(data
, Base64
.GZIP
), "UTF-8");
556 * Convert the given data to Base64 format.
559 * the data to convert
561 * TRUE to also compress the data in GZIP format; remember that
562 * compressed and not-compressed content are different; you need
563 * to know which is which when decoding
565 * @return the Base64 {@link String} representation of the data
567 * @throws IOException
568 * in case of I/O errors
570 public static String
base64(String data
, boolean zip
) throws IOException
{
571 return base64(data
.getBytes("UTF-8"), zip
);
575 * Convert the given data to Base64 format.
578 * the data to convert
580 * TRUE to also compress the data in GZIP format; remember that
581 * compressed and not-compressed content are different; you need
582 * to know which is which when decoding
584 * @return the Base64 {@link String} representation of the data
586 * @throws IOException
587 * in case of I/O errors
589 public static String
base64(byte[] data
, boolean zip
) throws IOException
{
590 return Base64
.encodeBytes(data
, zip ? Base64
.GZIP
: Base64
.NO_OPTIONS
);
594 * Convert the given data to Base64 format.
597 * the data to convert
599 * TRUE to also uncompress the data from a GZIP format; take care
600 * about this flag, as it could easily cause errors in the
601 * returned content or an {@link IOException}
603 * TRUE to break lines on every 76th character
605 * @return the Base64 {@link String} representation of the data
607 * @throws IOException
608 * in case of I/O errors
611 public static OutputStream
base64(OutputStream data
, boolean zip
,
612 boolean breakLines
) throws IOException
{
613 OutputStream out
= new Base64OutputStream(data
, true);
616 out
= new java
.util
.zip
.GZIPOutputStream(out
);
623 * Convert the given data to Base64 format.
626 * the data to convert
628 * TRUE to also uncompress the data from a GZIP format; take care
629 * about this flag, as it could easily cause errors in the
630 * returned content or an {@link IOException}
632 * TRUE to break lines on every 76th character
634 * @return the Base64 {@link String} representation of the data
636 * @throws IOException
637 * in case of I/O errors
640 public static InputStream
base64(InputStream data
, boolean zip
,
641 boolean breakLines
) throws IOException
{
643 data
= new java
.util
.zip
.GZIPInputStream(data
);
646 return new Base64InputStream(data
, true);
650 * Unconvert the given data from Base64 format back to a raw array of bytes.
652 * Will automatically detect zipped data and also uncompress it before
653 * returning, unless ZIP is false.
656 * the data to unconvert
658 * TRUE to also uncompress the data from a GZIP format
659 * automatically; if set to FALSE, zipped data can be returned
661 * @return the raw data represented by the given Base64 {@link String},
662 * optionally compressed with GZIP
664 * @throws IOException
665 * in case of I/O errors
667 public static byte[] unbase64(String data
, boolean zip
) throws IOException
{
669 .decode(data
, zip ? Base64
.NO_OPTIONS
: Base64
.DONT_GUNZIP
);
673 * Unconvert the given data from Base64 format back to a raw array of bytes.
676 * the data to unconvert
678 * TRUE to also uncompress the data from a GZIP format; take care
679 * about this flag, as it could easily cause errors in the
680 * returned content or an {@link IOException}
682 * @return the raw data represented by the given Base64 {@link String}
684 * @throws IOException
685 * in case of I/O errors
688 public static OutputStream
unbase64(OutputStream data
, boolean zip
)
690 OutputStream out
= new Base64OutputStream(data
, false);
694 out
= new java
.util
.zip
.GZIPOutputStream(out
);
701 * Unconvert the given data from Base64 format back to a raw array of bytes.
704 * the data to unconvert
706 * TRUE to also uncompress the data from a GZIP format; take care
707 * about this flag, as it could easily cause errors in the
708 * returned content or an {@link IOException}
710 * @return the raw data represented by the given Base64 {@link String}
712 * @throws IOException
713 * in case of I/O errors
716 public static InputStream
unbase64(InputStream data
, boolean zip
)
719 data
= new java
.util
.zip
.GZIPInputStream(data
);
722 return new Base64InputStream(data
, false);
726 * Unconvert the given data from Base64 format back to a raw array of bytes.
728 * Will automatically detect zipped data and also uncompress it before
729 * returning, unless ZIP is false.
732 * the data to unconvert
734 * the offset at which to start taking the data (do not take the
735 * data before it into account)
737 * the number of bytes to take into account (do not process after
738 * this number of bytes has been processed)
740 * TRUE to also uncompress the data from a GZIP format
741 * automatically; if set to FALSE, zipped data can be returned
743 * @return the raw data represented by the given Base64 {@link String}
745 * @throws IOException
746 * in case of I/O errors
748 public static byte[] unbase64(byte[] data
, int offset
, int count
,
749 boolean zip
) throws IOException
{
750 return Base64
.niki_decode(data
, offset
, count
, zip ? Base64
.NO_OPTIONS
751 : Base64
.DONT_GUNZIP
);
755 * Unonvert the given data from Base64 format back to a {@link String}.
757 * Will automatically detect zipped data and also uncompress it before
758 * returning, unless ZIP is false.
761 * the data to unconvert
763 * TRUE to also uncompress the data from a GZIP format
764 * automatically; if set to FALSE, zipped data can be returned
766 * @return the {@link String} represented by the given Base64 {@link String}
767 * , optionally compressed with GZIP
769 * @throws IOException
770 * in case of I/O errors
772 public static String
unbase64s(String data
, boolean zip
) throws IOException
{
773 return new String(unbase64(data
, zip
), "UTF-8");
777 * Unconvert the given data from Base64 format back into a {@link String}.
780 * the data to unconvert
782 * the offset at which to start taking the data (do not take the
783 * data before it into account)
785 * the number of bytes to take into account (do not process after
786 * this number of bytes has been processed)
788 * TRUE to also uncompress the data from a GZIP format; take care
789 * about this flag, as it could easily cause errors in the
790 * returned content or an {@link IOException}
792 * @return the {@link String} represented by the given Base64 {@link String}
793 * , optionally compressed with GZIP
795 * @throws IOException
796 * in case of I/O errors
798 public static String
unbase64s(byte[] data
, int offset
, int count
,
799 boolean zip
) throws IOException
{
800 return new String(unbase64(data
, offset
, count
, zip
), "UTF-8");
804 * Return a display {@link String} for the given value, which can be
805 * suffixed with "k" or "M" depending upon the number, if it is big enough.
810 * <li><tt>8 765</tt> becomes "8k"</li>
811 * <li><tt>998 765</tt> becomes "998k"</li>
812 * <li><tt>12 987 364</tt> becomes "12M"</li>
813 * <li><tt>5 534 333 221</tt> becomes "5G"</li>
817 * the value to convert
819 * @return the display value
821 public static String
formatNumber(long value
) {
822 return formatNumber(value
, 0);
826 * Return a display {@link String} for the given value, which can be
827 * suffixed with "k" or "M" depending upon the number, if it is big enough.
829 * Examples (assuming decimalPositions = 1):
831 * <li><tt>8 765</tt> becomes "8.7k"</li>
832 * <li><tt>998 765</tt> becomes "998.7k"</li>
833 * <li><tt>12 987 364</tt> becomes "12.9M"</li>
834 * <li><tt>5 534 333 221</tt> becomes "5.5G"</li>
838 * the value to convert
839 * @param decimalPositions
840 * the number of decimal positions to keep
842 * @return the display value
844 public static String
formatNumber(long value
, int decimalPositions
) {
845 long userValue
= value
;
849 if (value
>= 1000000000l) {
851 userValue
= value
/ 1000000000l;
853 } else if (value
>= 1000000l) {
855 userValue
= value
/ 1000000l;
857 } else if (value
>= 1000l) {
859 userValue
= value
/ 1000l;
864 if (decimalPositions
> 0) {
865 deci
= Long
.toString(value
% mult
);
866 int size
= Long
.toString(mult
).length() - 1;
867 while (deci
.length() < size
) {
871 deci
= deci
.substring(0, Math
.min(decimalPositions
, deci
.length()));
872 while (deci
.length() < decimalPositions
) {
879 return Long
.toString(userValue
) + deci
+ suffix
;
883 * The reverse operation to {@link StringUtils#formatNumber(long)}: it will
884 * read a "display" number that can contain a "M" or "k" suffix and return
887 * Of course, the conversion to and from display form is lossy (example:
888 * <tt>6870</tt> to "6.5k" to <tt>6500</tt>).
891 * the value in display form with possible "M" and "k" suffixes,
894 * @return the value as a number, or 0 if not possible to convert
896 public static long toNumber(String value
) {
897 return toNumber(value
, 0l);
901 * The reverse operation to {@link StringUtils#formatNumber(long)}: it will
902 * read a "display" number that can contain a "M" or "k" suffix and return
905 * Of course, the conversion to and from display form is lossy (example:
906 * <tt>6870</tt> to "6.5k" to <tt>6500</tt>).
909 * the value in display form with possible "M" and "k" suffixes,
912 * the default value if it is not possible to convert the given
915 * @return the value as a number, or 0 if not possible to convert
917 public static long toNumber(String value
, long def
) {
920 value
= value
.trim().toLowerCase();
923 if (value
.endsWith("g")) {
924 value
= value
.substring(0, value
.length() - 1).trim();
926 } else if (value
.endsWith("m")) {
927 value
= value
.substring(0, value
.length() - 1).trim();
929 } else if (value
.endsWith("k")) {
930 value
= value
.substring(0, value
.length() - 1).trim();
935 if (value
.contains(".")) {
936 String
[] tab
= value
.split("\\.");
937 if (tab
.length
!= 2) {
938 throw new NumberFormatException(value
);
940 double decimal
= Double
.parseDouble("0."
941 + tab
[tab
.length
- 1]);
942 deci
= ((long) (mult
* decimal
));
945 count
= mult
* Long
.parseLong(value
) + deci
;
946 } catch (Exception e
) {
954 * The "remove accents" pattern.
956 * @return the pattern, or NULL if a problem happens
958 private static Pattern
getMarks() {
961 .compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
962 } catch (Exception e
) {
963 // Can fail on Android...
969 // justify List<String> related:
973 * Check if this line ends as a complete line (ends with a "." or similar).
975 * Note that we consider an empty line as full, and a line ending with
976 * spaces as not complete.
981 * @return TRUE if it does
983 static private boolean isFullLine(StringBuilder line
) {
984 if (line
.length() == 0) {
988 char lastCar
= line
.charAt(line
.length() - 1);
998 case '"': // double quotes
1010 * Check if this line represent an item in a list or description (i.e.,
1011 * check that the first non-space char is "-").
1016 * @return TRUE if it is
1018 static private boolean isItemLine(String line
) {
1019 String spacing
= getItemSpacing(line
);
1020 return spacing
!= null && !spacing
.isEmpty()
1021 && line
.charAt(spacing
.length()) == '-';
1025 * Return all the spaces that start this line (or Empty if none).
1028 * the line to get the starting spaces from
1030 * @return the left spacing
1032 static private String
getItemSpacing(String line
) {
1034 for (i
= 0; i
< line
.length(); i
++) {
1035 if (line
.charAt(i
) != ' ') {
1036 return line
.substring(0, i
);
1044 * This line is an horizontal spacer line.
1049 * @return TRUE if it is
1051 static private boolean isHrLine(CharSequence line
) {
1054 for (int i
= 0; i
< line
.length(); i
++) {
1055 char car
= line
.charAt(i
);
1056 if (car
== ' ' || car
== '\t' || car
== '*' || car
== '-'
1057 || car
== '_' || car
== '~' || car
== '=' || car
== '/'