1 package be
.nikiroo
.utils
;
3 import java
.io
.ByteArrayInputStream
;
4 import java
.io
.ByteArrayOutputStream
;
5 import java
.io
.IOException
;
6 import java
.io
.InputStream
;
7 import java
.io
.OutputStream
;
8 import java
.io
.UnsupportedEncodingException
;
9 import java
.security
.MessageDigest
;
10 import java
.security
.NoSuchAlgorithmException
;
11 import java
.text
.Normalizer
;
12 import java
.text
.Normalizer
.Form
;
13 import java
.text
.ParseException
;
14 import java
.text
.SimpleDateFormat
;
15 import java
.util
.AbstractMap
;
16 import java
.util
.ArrayList
;
17 import java
.util
.Arrays
;
18 import java
.util
.Date
;
19 import java
.util
.List
;
20 import java
.util
.Map
.Entry
;
21 import java
.util
.regex
.Pattern
;
22 import java
.util
.zip
.GZIPInputStream
;
23 import java
.util
.zip
.GZIPOutputStream
;
25 import org
.unbescape
.html
.HtmlEscape
;
26 import org
.unbescape
.html
.HtmlEscapeLevel
;
27 import org
.unbescape
.html
.HtmlEscapeType
;
29 import be
.nikiroo
.utils
.streams
.Base64InputStream
;
30 import be
.nikiroo
.utils
.streams
.Base64OutputStream
;
33 * This class offer some utilities based around {@link String}s.
37 public class StringUtils
{
39 * This enum type will decide the alignment of a {@link String} when padding
40 * or justification is applied (if there is enough horizontal space for it
43 public enum Alignment
{
44 /** Aligned at left. */
48 /** Aligned at right. */
50 /** Full justified (to both left and right). */
53 // Old Deprecated values:
55 /** DEPRECATED: please use LEFT. */
58 /** DEPRECATED: please use CENTER. */
61 /** DEPRECATED: please use RIGHT. */
66 * Return the non-deprecated version of this enum if needed (or return
69 * @return the non-deprecated value
71 Alignment
undeprecate() {
72 if (this == Beginning
)
82 static private Pattern marks
= getMarks();
85 * Fix the size of the given {@link String} either with space-padding or by
89 * the {@link String} to fix
91 * the size of the resulting {@link String} or -1 for a noop
93 * @return the resulting {@link String} of size <i>size</i>
95 static public String
padString(String text
, int width
) {
96 return padString(text
, width
, true, null);
100 * Fix the size of the given {@link String} either with space-padding or by
101 * optionally shortening it.
104 * the {@link String} to fix
106 * the size of the resulting {@link String} if the text fits or
107 * if cut is TRUE or -1 for a noop
109 * cut the {@link String} shorter if needed
111 * align the {@link String} in this position if we have enough
112 * space (default is Alignment.Beginning)
114 * @return the resulting {@link String} of size <i>size</i> minimum
116 static public String
padString(String text
, int width
, boolean cut
,
120 align
= Alignment
.LEFT
;
123 align
= align
.undeprecate();
129 int diff
= width
- text
.length();
133 text
= text
.substring(0, width
);
134 } else if (diff
> 0) {
135 if (diff
< 2 && align
!= Alignment
.RIGHT
)
136 align
= Alignment
.LEFT
;
140 text
= new String(new char[diff
]).replace('\0', ' ') + text
;
143 int pad1
= (diff
) / 2;
144 int pad2
= (diff
+ 1) / 2;
145 text
= new String(new char[pad1
]).replace('\0', ' ') + text
146 + new String(new char[pad2
]).replace('\0', ' ');
150 text
= text
+ new String(new char[diff
]).replace('\0', ' ');
160 * Justify a text into width-sized (at the maximum) lines.
163 * the {@link String} to justify
165 * the maximum size of the resulting lines
167 * @return a list of justified text lines
169 static public List
<String
> justifyText(String text
, int width
) {
170 return justifyText(text
, width
, null);
174 * Justify a text into width-sized (at the maximum) lines.
177 * the {@link String} to justify
179 * the maximum size of the resulting lines
181 * align the lines in this position (default is
182 * Alignment.Beginning)
184 * @return a list of justified text lines
186 static public List
<String
> justifyText(String text
, int width
,
189 align
= Alignment
.LEFT
;
192 align
= align
.undeprecate();
196 return StringJustifier
.center(text
, width
);
198 return StringJustifier
.right(text
, width
);
200 return StringJustifier
.full(text
, width
);
203 return StringJustifier
.left(text
, width
);
208 * Justify a text into width-sized (at the maximum) lines.
211 * the {@link String} to justify
213 * the maximum size of the resulting lines
215 * @return a list of justified text lines
217 static public List
<String
> justifyText(List
<String
> text
, int width
) {
218 return justifyText(text
, width
, null);
222 * Justify a text into width-sized (at the maximum) lines.
225 * the {@link String} to justify
227 * the maximum size of the resulting lines
229 * align the lines in this position (default is
230 * Alignment.Beginning)
232 * @return a list of justified text lines
234 static public List
<String
> justifyText(List
<String
> text
, int width
,
236 List
<String
> result
= new ArrayList
<String
>();
238 // Content <-> Bullet spacing (null = no spacing)
239 List
<Entry
<String
, String
>> lines
= new ArrayList
<Entry
<String
, String
>>();
240 StringBuilder previous
= null;
241 StringBuilder tmp
= new StringBuilder();
242 String previousItemBulletSpacing
= null;
243 String itemBulletSpacing
= null;
244 for (String inputLine
: text
) {
245 boolean previousLineComplete
= true;
247 String current
= inputLine
.replace("\t", " ");
248 itemBulletSpacing
= getItemSpacing(current
);
249 boolean bullet
= isItemLine(current
);
250 if ((previousItemBulletSpacing
== null || itemBulletSpacing
251 .length() <= previousItemBulletSpacing
.length()) && !bullet
) {
252 itemBulletSpacing
= null;
255 if (itemBulletSpacing
!= null) {
256 current
= current
.trim();
257 if (!current
.isEmpty() && bullet
) {
258 current
= current
.substring(1);
260 current
= current
.trim();
261 previousLineComplete
= bullet
;
264 for (String word
: current
.split(" ")) {
265 if (word
.isEmpty()) {
269 if (tmp
.length() > 0) {
272 tmp
.append(word
.trim());
274 current
= tmp
.toString();
276 previousLineComplete
= current
.isEmpty()
277 || previousItemBulletSpacing
!= null
278 || (previous
!= null && isFullLine(previous
))
279 || isHrLine(current
) || isHrLine(previous
);
282 if (previous
== null) {
283 previous
= new StringBuilder();
285 if (previousLineComplete
) {
286 lines
.add(new AbstractMap
.SimpleEntry
<String
, String
>(
287 previous
.toString(), previousItemBulletSpacing
));
288 previous
.setLength(0);
289 previousItemBulletSpacing
= itemBulletSpacing
;
291 previous
.append(' ');
295 previous
.append(current
);
299 if (previous
!= null) {
300 lines
.add(new AbstractMap
.SimpleEntry
<String
, String
>(previous
301 .toString(), previousItemBulletSpacing
));
304 for (Entry
<String
, String
> line
: lines
) {
305 String content
= line
.getKey();
306 String spacing
= line
.getValue();
308 String bullet
= "- ";
309 if (spacing
== null) {
314 if (spacing
.length() > width
+ 3) {
318 for (String subline
: StringUtils
.justifyText(content
, width
319 - (spacing
.length() + bullet
.length()), align
)) {
320 result
.add(spacing
+ bullet
+ subline
);
321 if (!bullet
.isEmpty()) {
331 * Sanitise the given input to make it more Terminal-friendly by removing
332 * combining characters.
335 * the input to sanitise
336 * @param allowUnicode
337 * allow Unicode or only allow ASCII Latin characters
339 * @return the sanitised {@link String}
341 static public String
sanitize(String input
, boolean allowUnicode
) {
342 return sanitize(input
, allowUnicode
, !allowUnicode
);
346 * Sanitise the given input to make it more Terminal-friendly by removing
347 * combining characters.
350 * the input to sanitise
351 * @param allowUnicode
352 * allow Unicode or only allow ASCII Latin characters
353 * @param removeAllAccents
354 * TRUE to replace all accentuated characters by their non
355 * accentuated counter-parts
357 * @return the sanitised {@link String}
359 static public String
sanitize(String input
, boolean allowUnicode
,
360 boolean removeAllAccents
) {
362 if (removeAllAccents
) {
363 input
= Normalizer
.normalize(input
, Form
.NFKD
);
365 input
= marks
.matcher(input
).replaceAll("");
369 input
= Normalizer
.normalize(input
, Form
.NFKC
);
372 StringBuilder builder
= new StringBuilder();
373 for (int index
= 0; index
< input
.length(); index
++) {
374 char car
= input
.charAt(index
);
375 // displayable chars in ASCII are in the range 32<->255,
377 if (car
>= 32 && car
<= 255 && car
!= 127) {
381 input
= builder
.toString();
388 * Convert between the time in milliseconds to a {@link String} in a "fixed"
389 * way (to exchange data over the wire, for instance).
391 * Precise to the second.
394 * the specified number of milliseconds since the standard base
395 * time known as "the epoch", namely January 1, 1970, 00:00:00
398 * @return the time as a {@link String}
400 static public String
fromTime(long time
) {
401 SimpleDateFormat sdf
= new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
402 return sdf
.format(new Date(time
));
406 * Convert between the time as a {@link String} to milliseconds in a "fixed"
407 * way (to exchange data over the wire, for instance).
409 * Precise to the second.
412 * the time as a {@link String}
414 * @return the number of milliseconds since the standard base time known as
415 * "the epoch", namely January 1, 1970, 00:00:00 GMT, or -1 in case
418 * @throws ParseException
419 * in case of parse error
421 static public long toTime(String displayTime
) throws ParseException
{
422 SimpleDateFormat sdf
= new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
423 return sdf
.parse(displayTime
).getTime();
427 * Return a hash of the given {@link String}.
434 static public String
getMd5Hash(String input
) {
436 MessageDigest md
= MessageDigest
.getInstance("MD5");
437 md
.update(getBytes(input
));
438 byte byteData
[] = md
.digest();
440 StringBuffer hexString
= new StringBuffer();
441 for (int i
= 0; i
< byteData
.length
; i
++) {
442 String hex
= Integer
.toHexString(0xff & byteData
[i
]);
443 if (hex
.length() == 1)
444 hexString
.append('0');
445 hexString
.append(hex
);
448 return hexString
.toString();
449 } catch (NoSuchAlgorithmException e
) {
455 * Remove the HTML content from the given input, and un-html-ize the rest.
458 * the HTML-encoded content
460 * @return the HTML-free equivalent content
462 public static String
unhtml(String html
) {
463 StringBuilder builder
= new StringBuilder();
466 for (char car
: html
.toCharArray()) {
469 } else if (car
== '>') {
471 } else if (inTag
<= 0) {
476 char nbsp
= ' '; // non-breakable space (a special char)
478 return HtmlEscape
.unescapeHtml(builder
.toString()).replace(nbsp
, space
);
482 * Escape the given {@link String} so it can be used in XML, as content.
485 * the input {@link String}
487 * @return the escaped {@link String}
489 public static String
xmlEscape(String input
) {
494 return HtmlEscape
.escapeHtml(input
,
495 HtmlEscapeType
.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA
,
496 HtmlEscapeLevel
.LEVEL_1_ONLY_MARKUP_SIGNIFICANT
);
500 * Escape the given {@link String} so it can be used in XML, as text content
501 * inside double-quotes.
504 * the input {@link String}
506 * @return the escaped {@link String}
508 public static String
xmlEscapeQuote(String input
) {
513 return HtmlEscape
.escapeHtml(input
,
514 HtmlEscapeType
.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA
,
515 HtmlEscapeLevel
.LEVEL_1_ONLY_MARKUP_SIGNIFICANT
);
519 * Zip the data and then encode it into Base64.
524 * @return the Base64 zipped version
526 * @throws IOException
527 * in case of I/O error
529 public static String
zip64(String data
) throws IOException
{
531 return zip64(getBytes(data
));
532 } catch (UnsupportedEncodingException e
) {
533 // All conforming JVM are required to support UTF-8
540 * Zip the data and then encode it into Base64.
545 * @return the Base64 zipped version
547 * @throws IOException
548 * in case of I/O error
550 public static String
zip64(byte[] data
) throws IOException
{
552 ByteArrayOutputStream bout
= new ByteArrayOutputStream();
554 OutputStream out
= new GZIPOutputStream(bout
);
561 data
= bout
.toByteArray();
566 InputStream in
= new ByteArrayInputStream(data
);
568 in
= new Base64InputStream(in
, true);
569 return new String(IOUtils
.toByteArray(in
), "UTF-8");
576 * Unconvert from Base64 then unzip the content, which is assumed to be a
580 * the data in Base64 format
582 * @return the raw data
584 * @throws IOException
585 * in case of I/O error
587 public static String
unzip64s(String data
) throws IOException
{
588 return new String(unzip64(data
), "UTF-8");
592 * Unconvert from Base64 then unzip the content.
595 * the data in Base64 format
597 * @return the raw data
599 * @throws IOException
600 * in case of I/O error
602 public static byte[] unzip64(String data
) throws IOException
{
603 InputStream in
= new Base64InputStream(new ByteArrayInputStream(
604 getBytes(data
)), false);
606 in
= new GZIPInputStream(in
);
607 return IOUtils
.toByteArray(in
);
614 * Convert the given data to Base64 format.
617 * the data to convert
619 * @return the Base64 {@link String} representation of the data
621 * @throws IOException
622 * in case of I/O errors
624 public static String
base64(String data
) throws IOException
{
625 return base64(getBytes(data
));
629 * Convert the given data to Base64 format.
632 * the data to convert
634 * @return the Base64 {@link String} representation of the data
636 * @throws IOException
637 * in case of I/O errors
639 public static String
base64(byte[] data
) throws IOException
{
640 Base64InputStream in
= new Base64InputStream(new ByteArrayInputStream(
643 return new String(IOUtils
.toByteArray(in
), "UTF-8");
650 * Unconvert the given data from Base64 format back to a raw array of bytes.
653 * the data to unconvert
655 * @return the raw data represented by the given Base64 {@link String},
657 * @throws IOException
658 * in case of I/O errors
660 public static byte[] unbase64(String data
) throws IOException
{
661 Base64InputStream in
= new Base64InputStream(new ByteArrayInputStream(
662 getBytes(data
)), false);
664 return IOUtils
.toByteArray(in
);
671 * Unonvert the given data from Base64 format back to a {@link String}.
674 * the data to unconvert
676 * @return the {@link String} represented by the given Base64 {@link String}
678 * @throws IOException
679 * in case of I/O errors
681 public static String
unbase64s(String data
) throws IOException
{
682 return new String(unbase64(data
), "UTF-8");
686 * Return a display {@link String} for the given value, which can be
687 * suffixed with "k" or "M" depending upon the number, if it is big enough.
692 * <li><tt>8 765</tt> becomes "8k"</li>
693 * <li><tt>998 765</tt> becomes "998k"</li>
694 * <li><tt>12 987 364</tt> becomes "12M"</li>
695 * <li><tt>5 534 333 221</tt> becomes "5G"</li>
699 * the value to convert
701 * @return the display value
703 public static String
formatNumber(long value
) {
704 return formatNumber(value
, 0);
708 * Return a display {@link String} for the given value, which can be
709 * suffixed with "k" or "M" depending upon the number, if it is big enough.
711 * Examples (assuming decimalPositions = 1):
713 * <li><tt>8 765</tt> becomes "8.7k"</li>
714 * <li><tt>998 765</tt> becomes "998.7k"</li>
715 * <li><tt>12 987 364</tt> becomes "12.9M"</li>
716 * <li><tt>5 534 333 221</tt> becomes "5.5G"</li>
720 * the value to convert
721 * @param decimalPositions
722 * the number of decimal positions to keep
724 * @return the display value
726 public static String
formatNumber(long value
, int decimalPositions
) {
727 long userValue
= value
;
731 if (value
>= 1000000000l) {
733 userValue
= value
/ 1000000000l;
735 } else if (value
>= 1000000l) {
737 userValue
= value
/ 1000000l;
739 } else if (value
>= 1000l) {
741 userValue
= value
/ 1000l;
746 if (decimalPositions
> 0) {
747 deci
= Long
.toString(value
% mult
);
748 int size
= Long
.toString(mult
).length() - 1;
749 while (deci
.length() < size
) {
753 deci
= deci
.substring(0, Math
.min(decimalPositions
, deci
.length()));
754 while (deci
.length() < decimalPositions
) {
761 return Long
.toString(userValue
) + deci
+ suffix
;
765 * The reverse operation to {@link StringUtils#formatNumber(long)}: it will
766 * read a "display" number that can contain a "M" or "k" suffix and return
769 * Of course, the conversion to and from display form is lossy (example:
770 * <tt>6870</tt> to "6.5k" to <tt>6500</tt>).
773 * the value in display form with possible "M" and "k" suffixes,
776 * @return the value as a number, or 0 if not possible to convert
778 public static long toNumber(String value
) {
779 return toNumber(value
, 0l);
783 * The reverse operation to {@link StringUtils#formatNumber(long)}: it will
784 * read a "display" number that can contain a "M" or "k" suffix and return
787 * Of course, the conversion to and from display form is lossy (example:
788 * <tt>6870</tt> to "6.5k" to <tt>6500</tt>).
791 * the value in display form with possible "M" and "k" suffixes,
794 * the default value if it is not possible to convert the given
797 * @return the value as a number, or 0 if not possible to convert
799 public static long toNumber(String value
, long def
) {
802 value
= value
.trim().toLowerCase();
805 if (value
.endsWith("g")) {
806 value
= value
.substring(0, value
.length() - 1).trim();
808 } else if (value
.endsWith("m")) {
809 value
= value
.substring(0, value
.length() - 1).trim();
811 } else if (value
.endsWith("k")) {
812 value
= value
.substring(0, value
.length() - 1).trim();
817 if (value
.contains(".")) {
818 String
[] tab
= value
.split("\\.");
819 if (tab
.length
!= 2) {
820 throw new NumberFormatException(value
);
822 double decimal
= Double
.parseDouble("0."
823 + tab
[tab
.length
- 1]);
824 deci
= ((long) (mult
* decimal
));
827 count
= mult
* Long
.parseLong(value
) + deci
;
828 } catch (Exception e
) {
836 * Return the bytes array representation of the given {@link String} in
840 * the {@link String} to transform into bytes
841 * @return the content in bytes
843 static public byte[] getBytes(String str
) {
845 return str
.getBytes("UTF-8");
846 } catch (UnsupportedEncodingException e
) {
847 // All conforming JVM must support UTF-8
854 * The "remove accents" pattern.
856 * @return the pattern, or NULL if a problem happens
858 private static Pattern
getMarks() {
861 .compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
862 } catch (Exception e
) {
863 // Can fail on Android...
869 // justify List<String> related:
873 * Check if this line ends as a complete line (ends with a "." or similar).
875 * Note that we consider an empty line as full, and a line ending with
876 * spaces as not complete.
881 * @return TRUE if it does
883 static private boolean isFullLine(StringBuilder line
) {
884 if (line
.length() == 0) {
888 char lastCar
= line
.charAt(line
.length() - 1);
898 case '"': // double quotes
910 * Check if this line represent an item in a list or description (i.e.,
911 * check that the first non-space char is "-").
916 * @return TRUE if it is
918 static private boolean isItemLine(String line
) {
919 String spacing
= getItemSpacing(line
);
920 return spacing
!= null && !spacing
.isEmpty()
921 && line
.charAt(spacing
.length()) == '-';
925 * Return all the spaces that start this line (or Empty if none).
928 * the line to get the starting spaces from
930 * @return the left spacing
932 static private String
getItemSpacing(String line
) {
934 for (i
= 0; i
< line
.length(); i
++) {
935 if (line
.charAt(i
) != ' ') {
936 return line
.substring(0, i
);
944 * This line is an horizontal spacer line.
949 * @return TRUE if it is
951 static private boolean isHrLine(CharSequence line
) {
954 for (int i
= 0; i
< line
.length(); i
++) {
955 char car
= line
.charAt(i
);
956 if (car
== ' ' || car
== '\t' || car
== '*' || car
== '-'
957 || car
== '_' || car
== '~' || car
== '=' || car
== '/'
969 // Deprecated functions, please do not use //
972 * @deprecated please use {@link StringUtils#zip64(byte[])} or
973 * {@link StringUtils#base64(byte[])} instead.
978 * TRUE to zip it before Base64 encoding it, FALSE for Base64
981 * @return the encoded data
983 * @throws IOException
984 * in case of I/O error
987 public static String
base64(String data
, boolean zip
) throws IOException
{
988 return base64(getBytes(data
), zip
);
992 * @deprecated please use {@link StringUtils#zip64(String)} or
993 * {@link StringUtils#base64(String)} instead.
998 * TRUE to zip it before Base64 encoding it, FALSE for Base64
1001 * @return the encoded data
1003 * @throws IOException
1004 * in case of I/O error
1007 public static String
base64(byte[] data
, boolean zip
) throws IOException
{
1012 Base64InputStream b64
= new Base64InputStream(new ByteArrayInputStream(
1015 return IOUtils
.readSmallStream(b64
);
1022 * @deprecated please use {@link Base64OutputStream} and
1023 * {@link GZIPOutputStream} instead.
1026 * NOT USED ANYMORE, it is always considered FALSE now
1029 public static OutputStream
base64(OutputStream data
, boolean zip
,
1030 boolean breakLines
) throws IOException
{
1031 OutputStream out
= new Base64OutputStream(data
);
1033 out
= new java
.util
.zip
.GZIPOutputStream(out
);
1040 * Unconvert the given data from Base64 format back to a raw array of bytes.
1042 * Will automatically detect zipped data and also uncompress it before
1043 * returning, unless ZIP is false.
1045 * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped)
1048 * the data to unconvert
1050 * TRUE to also uncompress the data from a GZIP format
1051 * automatically; if set to FALSE, zipped data can be returned
1053 * @return the raw data represented by the given Base64 {@link String},
1054 * optionally compressed with GZIP
1056 * @throws IOException
1057 * in case of I/O errors
1060 public static byte[] unbase64(String data
, boolean zip
) throws IOException
{
1061 byte[] buffer
= unbase64(data
);
1067 GZIPInputStream zipped
= new GZIPInputStream(
1068 new ByteArrayInputStream(buffer
));
1070 ByteArrayOutputStream out
= new ByteArrayOutputStream();
1072 IOUtils
.write(zipped
, out
);
1073 return out
.toByteArray();
1080 } catch (Exception e
) {
1086 * Unconvert the given data from Base64 format back to a raw array of bytes.
1088 * Will automatically detect zipped data and also uncompress it before
1089 * returning, unless ZIP is false.
1091 * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped)
1094 * the data to unconvert
1096 * TRUE to also uncompress the data from a GZIP format
1097 * automatically; if set to FALSE, zipped data can be returned
1099 * @return the raw data represented by the given Base64 {@link String},
1100 * optionally compressed with GZIP
1102 * @throws IOException
1103 * in case of I/O errors
1106 public static InputStream
unbase64(InputStream data
, boolean zip
)
1107 throws IOException
{
1108 return new ByteArrayInputStream(unbase64(IOUtils
.readSmallStream(data
),
1113 * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped)
1116 public static byte[] unbase64(byte[] data
, int offset
, int count
,
1117 boolean zip
) throws IOException
{
1118 byte[] dataPart
= Arrays
.copyOfRange(data
, offset
, offset
+ count
);
1119 return unbase64(new String(dataPart
, "UTF-8"), zip
);
1123 * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped)
1126 public static String
unbase64s(String data
, boolean zip
) throws IOException
{
1127 return new String(unbase64(data
, zip
), "UTF-8");
1131 * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped)
1134 public static String
unbase64s(byte[] data
, int offset
, int count
,
1135 boolean zip
) throws IOException
{
1136 return new String(unbase64(data
, offset
, count
, zip
), "UTF-8");