1 package be
.nikiroo
.utils
;
3 import java
.io
.ByteArrayInputStream
;
4 import java
.io
.ByteArrayOutputStream
;
5 import java
.io
.IOException
;
6 import java
.io
.InputStream
;
7 import java
.io
.OutputStream
;
8 import java
.io
.UnsupportedEncodingException
;
9 import java
.security
.MessageDigest
;
10 import java
.security
.NoSuchAlgorithmException
;
11 import java
.text
.Normalizer
;
12 import java
.text
.Normalizer
.Form
;
13 import java
.text
.ParseException
;
14 import java
.text
.SimpleDateFormat
;
15 import java
.util
.AbstractMap
;
16 import java
.util
.ArrayList
;
17 import java
.util
.Date
;
18 import java
.util
.List
;
19 import java
.util
.Map
.Entry
;
20 import java
.util
.regex
.Pattern
;
21 import java
.util
.zip
.GZIPInputStream
;
22 import java
.util
.zip
.GZIPOutputStream
;
24 import org
.unbescape
.html
.HtmlEscape
;
25 import org
.unbescape
.html
.HtmlEscapeLevel
;
26 import org
.unbescape
.html
.HtmlEscapeType
;
28 import be
.nikiroo
.utils
.streams
.Base64InputStream
;
31 * This class offer some utilities based around {@link String}s.
35 public class StringUtils
{
37 * This enum type will decide the alignment of a {@link String} when padding
38 * or justification is applied (if there is enough horizontal space for it
41 public enum Alignment
{
42 /** Aligned at left. */
46 /** Aligned at right. */
48 /** Full justified (to both left and right). */
51 // Old Deprecated values:
53 /** DEPRECATED: please use LEFT. */
56 /** DEPRECATED: please use CENTER. */
59 /** DEPRECATED: please use RIGHT. */
64 * Return the non-deprecated version of this enum if needed (or return
67 * @return the non-deprecated value
69 Alignment
undeprecate() {
70 if (this == Beginning
)
80 static private Pattern marks
= getMarks();
83 * Fix the size of the given {@link String} either with space-padding or by
87 * the {@link String} to fix
89 * the size of the resulting {@link String} or -1 for a noop
91 * @return the resulting {@link String} of size <i>size</i>
93 static public String
padString(String text
, int width
) {
94 return padString(text
, width
, true, null);
98 * Fix the size of the given {@link String} either with space-padding or by
99 * optionally shortening it.
102 * the {@link String} to fix
104 * the size of the resulting {@link String} if the text fits or
105 * if cut is TRUE or -1 for a noop
107 * cut the {@link String} shorter if needed
109 * align the {@link String} in this position if we have enough
110 * space (default is Alignment.Beginning)
112 * @return the resulting {@link String} of size <i>size</i> minimum
114 static public String
padString(String text
, int width
, boolean cut
,
118 align
= Alignment
.LEFT
;
121 align
= align
.undeprecate();
127 int diff
= width
- text
.length();
131 text
= text
.substring(0, width
);
132 } else if (diff
> 0) {
133 if (diff
< 2 && align
!= Alignment
.RIGHT
)
134 align
= Alignment
.LEFT
;
138 text
= new String(new char[diff
]).replace('\0', ' ') + text
;
141 int pad1
= (diff
) / 2;
142 int pad2
= (diff
+ 1) / 2;
143 text
= new String(new char[pad1
]).replace('\0', ' ') + text
144 + new String(new char[pad2
]).replace('\0', ' ');
148 text
= text
+ new String(new char[diff
]).replace('\0', ' ');
158 * Justify a text into width-sized (at the maximum) lines.
161 * the {@link String} to justify
163 * the maximum size of the resulting lines
165 * @return a list of justified text lines
167 static public List
<String
> justifyText(String text
, int width
) {
168 return justifyText(text
, width
, null);
172 * Justify a text into width-sized (at the maximum) lines.
175 * the {@link String} to justify
177 * the maximum size of the resulting lines
179 * align the lines in this position (default is
180 * Alignment.Beginning)
182 * @return a list of justified text lines
184 static public List
<String
> justifyText(String text
, int width
,
187 align
= Alignment
.LEFT
;
190 align
= align
.undeprecate();
194 return StringJustifier
.center(text
, width
);
196 return StringJustifier
.right(text
, width
);
198 return StringJustifier
.full(text
, width
);
201 return StringJustifier
.left(text
, width
);
206 * Justify a text into width-sized (at the maximum) lines.
209 * the {@link String} to justify
211 * the maximum size of the resulting lines
213 * @return a list of justified text lines
215 static public List
<String
> justifyText(List
<String
> text
, int width
) {
216 return justifyText(text
, width
, null);
220 * Justify a text into width-sized (at the maximum) lines.
223 * the {@link String} to justify
225 * the maximum size of the resulting lines
227 * align the lines in this position (default is
228 * Alignment.Beginning)
230 * @return a list of justified text lines
232 static public List
<String
> justifyText(List
<String
> text
, int width
,
234 List
<String
> result
= new ArrayList
<String
>();
236 // Content <-> Bullet spacing (null = no spacing)
237 List
<Entry
<String
, String
>> lines
= new ArrayList
<Entry
<String
, String
>>();
238 StringBuilder previous
= null;
239 StringBuilder tmp
= new StringBuilder();
240 String previousItemBulletSpacing
= null;
241 String itemBulletSpacing
= null;
242 for (String inputLine
: text
) {
243 boolean previousLineComplete
= true;
245 String current
= inputLine
.replace("\t", " ");
246 itemBulletSpacing
= getItemSpacing(current
);
247 boolean bullet
= isItemLine(current
);
248 if ((previousItemBulletSpacing
== null || itemBulletSpacing
249 .length() <= previousItemBulletSpacing
.length()) && !bullet
) {
250 itemBulletSpacing
= null;
253 if (itemBulletSpacing
!= null) {
254 current
= current
.trim();
255 if (!current
.isEmpty() && bullet
) {
256 current
= current
.substring(1);
258 current
= current
.trim();
259 previousLineComplete
= bullet
;
262 for (String word
: current
.split(" ")) {
263 if (word
.isEmpty()) {
267 if (tmp
.length() > 0) {
270 tmp
.append(word
.trim());
272 current
= tmp
.toString();
274 previousLineComplete
= current
.isEmpty()
275 || previousItemBulletSpacing
!= null
276 || (previous
!= null && isFullLine(previous
))
277 || isHrLine(current
) || isHrLine(previous
);
280 if (previous
== null) {
281 previous
= new StringBuilder();
283 if (previousLineComplete
) {
284 lines
.add(new AbstractMap
.SimpleEntry
<String
, String
>(
285 previous
.toString(), previousItemBulletSpacing
));
286 previous
.setLength(0);
287 previousItemBulletSpacing
= itemBulletSpacing
;
289 previous
.append(' ');
293 previous
.append(current
);
297 if (previous
!= null) {
298 lines
.add(new AbstractMap
.SimpleEntry
<String
, String
>(previous
299 .toString(), previousItemBulletSpacing
));
302 for (Entry
<String
, String
> line
: lines
) {
303 String content
= line
.getKey();
304 String spacing
= line
.getValue();
306 String bullet
= "- ";
307 if (spacing
== null) {
312 if (spacing
.length() > width
+ 3) {
316 for (String subline
: StringUtils
.justifyText(content
, width
317 - (spacing
.length() + bullet
.length()), align
)) {
318 result
.add(spacing
+ bullet
+ subline
);
319 if (!bullet
.isEmpty()) {
329 * Sanitise the given input to make it more Terminal-friendly by removing
330 * combining characters.
333 * the input to sanitise
334 * @param allowUnicode
335 * allow Unicode or only allow ASCII Latin characters
337 * @return the sanitised {@link String}
339 static public String
sanitize(String input
, boolean allowUnicode
) {
340 return sanitize(input
, allowUnicode
, !allowUnicode
);
344 * Sanitise the given input to make it more Terminal-friendly by removing
345 * combining characters.
348 * the input to sanitise
349 * @param allowUnicode
350 * allow Unicode or only allow ASCII Latin characters
351 * @param removeAllAccents
352 * TRUE to replace all accentuated characters by their non
353 * accentuated counter-parts
355 * @return the sanitised {@link String}
357 static public String
sanitize(String input
, boolean allowUnicode
,
358 boolean removeAllAccents
) {
360 if (removeAllAccents
) {
361 input
= Normalizer
.normalize(input
, Form
.NFKD
);
363 input
= marks
.matcher(input
).replaceAll("");
367 input
= Normalizer
.normalize(input
, Form
.NFKC
);
370 StringBuilder builder
= new StringBuilder();
371 for (int index
= 0; index
< input
.length(); index
++) {
372 char car
= input
.charAt(index
);
373 // displayable chars in ASCII are in the range 32<->255,
375 if (car
>= 32 && car
<= 255 && car
!= 127) {
379 input
= builder
.toString();
386 * Convert between the time in milliseconds to a {@link String} in a "fixed"
387 * way (to exchange data over the wire, for instance).
389 * Precise to the second.
392 * the specified number of milliseconds since the standard base
393 * time known as "the epoch", namely January 1, 1970, 00:00:00
396 * @return the time as a {@link String}
398 static public String
fromTime(long time
) {
399 SimpleDateFormat sdf
= new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
400 return sdf
.format(new Date(time
));
404 * Convert between the time as a {@link String} to milliseconds in a "fixed"
405 * way (to exchange data over the wire, for instance).
407 * Precise to the second.
410 * the time as a {@link String}
412 * @return the number of milliseconds since the standard base time known as
413 * "the epoch", namely January 1, 1970, 00:00:00 GMT, or -1 in case
416 * @throws ParseException
417 * in case of parse error
419 static public long toTime(String displayTime
) throws ParseException
{
420 SimpleDateFormat sdf
= new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
421 return sdf
.parse(displayTime
).getTime();
425 * Return a hash of the given {@link String}.
432 static public String
getMd5Hash(String input
) {
434 MessageDigest md
= MessageDigest
.getInstance("MD5");
435 md
.update(getBytes(input
));
436 byte byteData
[] = md
.digest();
438 StringBuffer hexString
= new StringBuffer();
439 for (int i
= 0; i
< byteData
.length
; i
++) {
440 String hex
= Integer
.toHexString(0xff & byteData
[i
]);
441 if (hex
.length() == 1)
442 hexString
.append('0');
443 hexString
.append(hex
);
446 return hexString
.toString();
447 } catch (NoSuchAlgorithmException e
) {
453 * Remove the HTML content from the given input, and un-html-ize the rest.
456 * the HTML-encoded content
458 * @return the HTML-free equivalent content
460 public static String
unhtml(String html
) {
461 StringBuilder builder
= new StringBuilder();
464 for (char car
: html
.toCharArray()) {
467 } else if (car
== '>') {
469 } else if (inTag
<= 0) {
474 char nbsp
= ' '; // non-breakable space (a special char)
476 return HtmlEscape
.unescapeHtml(builder
.toString()).replace(nbsp
, space
);
480 * Escape the given {@link String} so it can be used in XML, as content.
483 * the input {@link String}
485 * @return the escaped {@link String}
487 public static String
xmlEscape(String input
) {
492 return HtmlEscape
.escapeHtml(input
,
493 HtmlEscapeType
.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA
,
494 HtmlEscapeLevel
.LEVEL_1_ONLY_MARKUP_SIGNIFICANT
);
498 * Escape the given {@link String} so it can be used in XML, as text content
499 * inside double-quotes.
502 * the input {@link String}
504 * @return the escaped {@link String}
506 public static String
xmlEscapeQuote(String input
) {
511 return HtmlEscape
.escapeHtml(input
,
512 HtmlEscapeType
.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA
,
513 HtmlEscapeLevel
.LEVEL_1_ONLY_MARKUP_SIGNIFICANT
);
517 * Zip the data and then encode it into Base64.
522 * @return the Base64 zipped version
524 * @throws IOException
525 * in case of I/O error
527 public static String
zip64s(String data
) throws IOException
{
529 return zip64(getBytes(data
));
530 } catch (UnsupportedEncodingException e
) {
531 // All conforming JVM are required to support UTF-8
538 * Zip the data and then encode it into Base64.
543 * @return the Base64 zipped version
545 * @throws IOException
546 * in case of I/O error
548 public static String
zip64(byte[] data
) throws IOException
{
550 ByteArrayOutputStream bout
= new ByteArrayOutputStream();
552 OutputStream out
= new GZIPOutputStream(bout
);
559 data
= bout
.toByteArray();
564 InputStream in
= new ByteArrayInputStream(data
);
566 in
= new Base64InputStream(in
, true);
567 return new String(IOUtils
.toByteArray(in
), "UTF-8");
574 * Unconvert from Base64 then unzip the content, which is assumed to be a
578 * the data in Base64 format
580 * @return the raw data
582 * @throws IOException
583 * in case of I/O error
585 public static String
unzip64s(String data
) throws IOException
{
586 return new String(unzip64(data
), "UTF-8");
590 * Unconvert from Base64 then unzip the content.
593 * the data in Base64 format
595 * @return the raw data
597 * @throws IOException
598 * in case of I/O error
600 public static byte[] unzip64(String data
) throws IOException
{
601 InputStream in
= new Base64InputStream(new ByteArrayInputStream(
602 getBytes(data
)), false);
604 in
= new GZIPInputStream(in
);
605 return IOUtils
.toByteArray(in
);
612 * Convert the given data to Base64 format.
615 * the data to convert
617 * @return the Base64 {@link String} representation of the data
619 * @throws IOException
620 * in case of I/O errors
622 public static String
base64(String data
) throws IOException
{
623 return base64(getBytes(data
));
627 * Convert the given data to Base64 format.
630 * the data to convert
632 * @return the Base64 {@link String} representation of the data
634 * @throws IOException
635 * in case of I/O errors
637 public static String
base64(byte[] data
) throws IOException
{
638 Base64InputStream in
= new Base64InputStream(new ByteArrayInputStream(
641 return new String(IOUtils
.toByteArray(in
), "UTF-8");
648 * Unconvert the given data from Base64 format back to a raw array of bytes.
651 * the data to unconvert
653 * @return the raw data represented by the given Base64 {@link String},
655 * @throws IOException
656 * in case of I/O errors
658 public static byte[] unbase64(String data
) throws IOException
{
659 Base64InputStream in
= new Base64InputStream(new ByteArrayInputStream(
660 getBytes(data
)), false);
662 return IOUtils
.toByteArray(in
);
669 * Unonvert the given data from Base64 format back to a {@link String}.
672 * the data to unconvert
674 * @return the {@link String} represented by the given Base64 {@link String}
676 * @throws IOException
677 * in case of I/O errors
679 public static String
unbase64s(String data
) throws IOException
{
680 return new String(unbase64(data
), "UTF-8");
684 * Return a display {@link String} for the given value, which can be
685 * suffixed with "k" or "M" depending upon the number, if it is big enough.
690 * <li><tt>8 765</tt> becomes "8k"</li>
691 * <li><tt>998 765</tt> becomes "998k"</li>
692 * <li><tt>12 987 364</tt> becomes "12M"</li>
693 * <li><tt>5 534 333 221</tt> becomes "5G"</li>
697 * the value to convert
699 * @return the display value
701 public static String
formatNumber(long value
) {
702 return formatNumber(value
, 0);
706 * Return a display {@link String} for the given value, which can be
707 * suffixed with "k" or "M" depending upon the number, if it is big enough.
709 * Examples (assuming decimalPositions = 1):
711 * <li><tt>8 765</tt> becomes "8.7k"</li>
712 * <li><tt>998 765</tt> becomes "998.7k"</li>
713 * <li><tt>12 987 364</tt> becomes "12.9M"</li>
714 * <li><tt>5 534 333 221</tt> becomes "5.5G"</li>
718 * the value to convert
719 * @param decimalPositions
720 * the number of decimal positions to keep
722 * @return the display value
724 public static String
formatNumber(long value
, int decimalPositions
) {
725 long userValue
= value
;
729 if (value
>= 1000000000l) {
731 userValue
= value
/ 1000000000l;
733 } else if (value
>= 1000000l) {
735 userValue
= value
/ 1000000l;
737 } else if (value
>= 1000l) {
739 userValue
= value
/ 1000l;
744 if (decimalPositions
> 0) {
745 deci
= Long
.toString(value
% mult
);
746 int size
= Long
.toString(mult
).length() - 1;
747 while (deci
.length() < size
) {
751 deci
= deci
.substring(0, Math
.min(decimalPositions
, deci
.length()));
752 while (deci
.length() < decimalPositions
) {
759 return Long
.toString(userValue
) + deci
+ suffix
;
763 * The reverse operation to {@link StringUtils#formatNumber(long)}: it will
764 * read a "display" number that can contain a "M" or "k" suffix and return
767 * Of course, the conversion to and from display form is lossy (example:
768 * <tt>6870</tt> to "6.5k" to <tt>6500</tt>).
771 * the value in display form with possible "M" and "k" suffixes,
774 * @return the value as a number, or 0 if not possible to convert
776 public static long toNumber(String value
) {
777 return toNumber(value
, 0l);
781 * The reverse operation to {@link StringUtils#formatNumber(long)}: it will
782 * read a "display" number that can contain a "M" or "k" suffix and return
785 * Of course, the conversion to and from display form is lossy (example:
786 * <tt>6870</tt> to "6.5k" to <tt>6500</tt>).
789 * the value in display form with possible "M" and "k" suffixes,
792 * the default value if it is not possible to convert the given
795 * @return the value as a number, or 0 if not possible to convert
797 public static long toNumber(String value
, long def
) {
800 value
= value
.trim().toLowerCase();
803 if (value
.endsWith("g")) {
804 value
= value
.substring(0, value
.length() - 1).trim();
806 } else if (value
.endsWith("m")) {
807 value
= value
.substring(0, value
.length() - 1).trim();
809 } else if (value
.endsWith("k")) {
810 value
= value
.substring(0, value
.length() - 1).trim();
815 if (value
.contains(".")) {
816 String
[] tab
= value
.split("\\.");
817 if (tab
.length
!= 2) {
818 throw new NumberFormatException(value
);
820 double decimal
= Double
.parseDouble("0."
821 + tab
[tab
.length
- 1]);
822 deci
= ((long) (mult
* decimal
));
825 count
= mult
* Long
.parseLong(value
) + deci
;
826 } catch (Exception e
) {
834 * Return the bytes array representation of the given {@link String} in
838 * the {@link String} to transform into bytes
839 * @return the content in bytes
841 static public byte[] getBytes(String str
) {
843 return str
.getBytes("UTF-8");
844 } catch (UnsupportedEncodingException e
) {
845 // All conforming JVM must support UTF-8
852 * The "remove accents" pattern.
854 * @return the pattern, or NULL if a problem happens
856 private static Pattern
getMarks() {
859 .compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
860 } catch (Exception e
) {
861 // Can fail on Android...
867 // justify List<String> related:
871 * Check if this line ends as a complete line (ends with a "." or similar).
873 * Note that we consider an empty line as full, and a line ending with
874 * spaces as not complete.
879 * @return TRUE if it does
881 static private boolean isFullLine(StringBuilder line
) {
882 if (line
.length() == 0) {
886 char lastCar
= line
.charAt(line
.length() - 1);
896 case '"': // double quotes
908 * Check if this line represent an item in a list or description (i.e.,
909 * check that the first non-space char is "-").
914 * @return TRUE if it is
916 static private boolean isItemLine(String line
) {
917 String spacing
= getItemSpacing(line
);
918 return spacing
!= null && !spacing
.isEmpty()
919 && line
.charAt(spacing
.length()) == '-';
923 * Return all the spaces that start this line (or Empty if none).
926 * the line to get the starting spaces from
928 * @return the left spacing
930 static private String
getItemSpacing(String line
) {
932 for (i
= 0; i
< line
.length(); i
++) {
933 if (line
.charAt(i
) != ' ') {
934 return line
.substring(0, i
);
942 * This line is an horizontal spacer line.
947 * @return TRUE if it is
949 static private boolean isHrLine(CharSequence line
) {
952 for (int i
= 0; i
< line
.length(); i
++) {
953 char car
= line
.charAt(i
);
954 if (car
== ' ' || car
== '\t' || car
== '*' || car
== '-'
955 || car
== '_' || car
== '~' || car
== '=' || car
== '/'