1 package be
.nikiroo
.utils
;
3 import java
.io
.ByteArrayInputStream
;
4 import java
.io
.IOException
;
5 import java
.io
.InputStream
;
6 import java
.io
.OutputStream
;
7 import java
.io
.UnsupportedEncodingException
;
8 import java
.security
.MessageDigest
;
9 import java
.security
.NoSuchAlgorithmException
;
10 import java
.text
.Normalizer
;
11 import java
.text
.Normalizer
.Form
;
12 import java
.text
.ParseException
;
13 import java
.text
.SimpleDateFormat
;
14 import java
.util
.AbstractMap
;
15 import java
.util
.ArrayList
;
16 import java
.util
.Date
;
17 import java
.util
.List
;
18 import java
.util
.Map
.Entry
;
19 import java
.util
.Scanner
;
20 import java
.util
.regex
.Pattern
;
22 import org
.unbescape
.html
.HtmlEscape
;
23 import org
.unbescape
.html
.HtmlEscapeLevel
;
24 import org
.unbescape
.html
.HtmlEscapeType
;
27 * This class offer some utilities based around {@link String}s.
31 public class StringUtils
{
33 * This enum type will decide the alignment of a {@link String} when padding
34 * or justification is applied (if there is enough horizontal space for it
37 public enum Alignment
{
38 /** Aligned at left. */
42 /** Aligned at right. */
44 /** Full justified (to both left and right). */
47 // Old Deprecated values:
49 /** DEPRECATED: please use LEFT. */
52 /** DEPRECATED: please use CENTER. */
55 /** DEPRECATED: please use RIGHT. */
60 * Return the non-deprecated version of this enum if needed (or return
63 * @return the non-deprecated value
65 Alignment
undeprecate() {
66 if (this == Beginning
)
76 static private Pattern marks
= getMarks();
79 * Fix the size of the given {@link String} either with space-padding or by
83 * the {@link String} to fix
85 * the size of the resulting {@link String} or -1 for a noop
87 * @return the resulting {@link String} of size <i>size</i>
89 static public String
padString(String text
, int width
) {
90 return padString(text
, width
, true, null);
94 * Fix the size of the given {@link String} either with space-padding or by
95 * optionally shortening it.
98 * the {@link String} to fix
100 * the size of the resulting {@link String} if the text fits or
101 * if cut is TRUE or -1 for a noop
103 * cut the {@link String} shorter if needed
105 * align the {@link String} in this position if we have enough
106 * space (default is Alignment.Beginning)
108 * @return the resulting {@link String} of size <i>size</i> minimum
110 static public String
padString(String text
, int width
, boolean cut
,
114 align
= Alignment
.LEFT
;
117 align
= align
.undeprecate();
123 int diff
= width
- text
.length();
127 text
= text
.substring(0, width
);
128 } else if (diff
> 0) {
129 if (diff
< 2 && align
!= Alignment
.RIGHT
)
130 align
= Alignment
.LEFT
;
134 text
= new String(new char[diff
]).replace('\0', ' ') + text
;
137 int pad1
= (diff
) / 2;
138 int pad2
= (diff
+ 1) / 2;
139 text
= new String(new char[pad1
]).replace('\0', ' ') + text
140 + new String(new char[pad2
]).replace('\0', ' ');
144 text
= text
+ new String(new char[diff
]).replace('\0', ' ');
154 * Justify a text into width-sized (at the maximum) lines.
157 * the {@link String} to justify
159 * the maximum size of the resulting lines
161 * @return a list of justified text lines
163 static public List
<String
> justifyText(String text
, int width
) {
164 return justifyText(text
, width
, null);
168 * Justify a text into width-sized (at the maximum) lines.
171 * the {@link String} to justify
173 * the maximum size of the resulting lines
175 * align the lines in this position (default is
176 * Alignment.Beginning)
178 * @return a list of justified text lines
180 static public List
<String
> justifyText(String text
, int width
,
183 align
= Alignment
.LEFT
;
186 align
= align
.undeprecate();
190 return StringJustifier
.center(text
, width
);
192 return StringJustifier
.right(text
, width
);
194 return StringJustifier
.full(text
, width
);
197 return StringJustifier
.left(text
, width
);
202 * Justify a text into width-sized (at the maximum) lines.
205 * the {@link String} to justify
207 * the maximum size of the resulting lines
209 * @return a list of justified text lines
211 static public List
<String
> justifyText(List
<String
> text
, int width
) {
212 return justifyText(text
, width
, null);
216 * Justify a text into width-sized (at the maximum) lines.
219 * the {@link String} to justify
221 * the maximum size of the resulting lines
223 * align the lines in this position (default is
224 * Alignment.Beginning)
226 * @return a list of justified text lines
228 static public List
<String
> justifyText(List
<String
> text
, int width
,
230 List
<String
> result
= new ArrayList
<String
>();
232 // Content <-> Bullet spacing (null = no spacing)
233 List
<Entry
<String
, String
>> lines
= new ArrayList
<Entry
<String
, String
>>();
234 StringBuilder previous
= null;
235 StringBuilder tmp
= new StringBuilder();
236 String previousItemBulletSpacing
= null;
237 String itemBulletSpacing
= null;
238 for (String inputLine
: text
) {
239 boolean previousLineComplete
= true;
241 String current
= inputLine
.replace("\t", " ");
242 itemBulletSpacing
= getItemSpacing(current
);
243 boolean bullet
= isItemLine(current
);
244 if ((previousItemBulletSpacing
== null || itemBulletSpacing
245 .length() <= previousItemBulletSpacing
.length()) && !bullet
) {
246 itemBulletSpacing
= null;
249 if (itemBulletSpacing
!= null) {
250 current
= current
.trim();
251 if (!current
.isEmpty() && bullet
) {
252 current
= current
.substring(1);
254 current
= current
.trim();
255 previousLineComplete
= bullet
;
258 for (String word
: current
.split(" ")) {
259 if (word
.isEmpty()) {
263 if (tmp
.length() > 0) {
266 tmp
.append(word
.trim());
268 current
= tmp
.toString();
270 previousLineComplete
= current
.isEmpty()
271 || previousItemBulletSpacing
!= null
272 || (previous
!= null && isFullLine(previous
))
273 || isHrLine(current
) || isHrLine(previous
);
276 if (previous
== null) {
277 previous
= new StringBuilder();
279 if (previousLineComplete
) {
280 lines
.add(new AbstractMap
.SimpleEntry
<String
, String
>(
281 previous
.toString(), previousItemBulletSpacing
));
282 previous
.setLength(0);
283 previousItemBulletSpacing
= itemBulletSpacing
;
285 previous
.append(' ');
289 previous
.append(current
);
293 if (previous
!= null) {
294 lines
.add(new AbstractMap
.SimpleEntry
<String
, String
>(previous
295 .toString(), previousItemBulletSpacing
));
298 for (Entry
<String
, String
> line
: lines
) {
299 String content
= line
.getKey();
300 String spacing
= line
.getValue();
302 String bullet
= "- ";
303 if (spacing
== null) {
308 if (spacing
.length() > width
+ 3) {
312 for (String subline
: StringUtils
.justifyText(content
, width
313 - (spacing
.length() + bullet
.length()), align
)) {
314 result
.add(spacing
+ bullet
+ subline
);
315 if (!bullet
.isEmpty()) {
325 * Sanitise the given input to make it more Terminal-friendly by removing
326 * combining characters.
329 * the input to sanitise
330 * @param allowUnicode
331 * allow Unicode or only allow ASCII Latin characters
333 * @return the sanitised {@link String}
335 static public String
sanitize(String input
, boolean allowUnicode
) {
336 return sanitize(input
, allowUnicode
, !allowUnicode
);
340 * Sanitise the given input to make it more Terminal-friendly by removing
341 * combining characters.
344 * the input to sanitise
345 * @param allowUnicode
346 * allow Unicode or only allow ASCII Latin characters
347 * @param removeAllAccents
348 * TRUE to replace all accentuated characters by their non
349 * accentuated counter-parts
351 * @return the sanitised {@link String}
353 static public String
sanitize(String input
, boolean allowUnicode
,
354 boolean removeAllAccents
) {
356 if (removeAllAccents
) {
357 input
= Normalizer
.normalize(input
, Form
.NFKD
);
359 input
= marks
.matcher(input
).replaceAll("");
363 input
= Normalizer
.normalize(input
, Form
.NFKC
);
366 StringBuilder builder
= new StringBuilder();
367 for (int index
= 0; index
< input
.length(); index
++) {
368 char car
= input
.charAt(index
);
369 // displayable chars in ASCII are in the range 32<->255,
371 if (car
>= 32 && car
<= 255 && car
!= 127) {
375 input
= builder
.toString();
382 * Convert between the time in milliseconds to a {@link String} in a "fixed"
383 * way (to exchange data over the wire, for instance).
385 * Precise to the second.
388 * the specified number of milliseconds since the standard base
389 * time known as "the epoch", namely January 1, 1970, 00:00:00
392 * @return the time as a {@link String}
394 static public String
fromTime(long time
) {
395 SimpleDateFormat sdf
= new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
396 return sdf
.format(new Date(time
));
400 * Convert between the time as a {@link String} to milliseconds in a "fixed"
401 * way (to exchange data over the wire, for instance).
403 * Precise to the second.
406 * the time as a {@link String}
408 * @return the number of milliseconds since the standard base time known as
409 * "the epoch", namely January 1, 1970, 00:00:00 GMT, or -1 in case
412 * @throws ParseException
413 * in case of parse error
415 static public long toTime(String displayTime
) throws ParseException
{
416 SimpleDateFormat sdf
= new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
417 return sdf
.parse(displayTime
).getTime();
421 * Return a hash of the given {@link String}.
428 static public String
getMd5Hash(String input
) {
430 MessageDigest md
= MessageDigest
.getInstance("MD5");
431 md
.update(input
.getBytes("UTF-8"));
432 byte byteData
[] = md
.digest();
434 StringBuffer hexString
= new StringBuffer();
435 for (int i
= 0; i
< byteData
.length
; i
++) {
436 String hex
= Integer
.toHexString(0xff & byteData
[i
]);
437 if (hex
.length() == 1)
438 hexString
.append('0');
439 hexString
.append(hex
);
442 return hexString
.toString();
443 } catch (NoSuchAlgorithmException e
) {
445 } catch (UnsupportedEncodingException e
) {
451 * Remove the HTML content from the given input, and un-html-ize the rest.
454 * the HTML-encoded content
456 * @return the HTML-free equivalent content
458 public static String
unhtml(String html
) {
459 StringBuilder builder
= new StringBuilder();
462 for (char car
: html
.toCharArray()) {
465 } else if (car
== '>') {
467 } else if (inTag
<= 0) {
472 char nbsp
= ' '; // non-breakable space (a special char)
474 return HtmlEscape
.unescapeHtml(builder
.toString()).replace(nbsp
, space
);
478 * Escape the given {@link String} so it can be used in XML, as content.
481 * the input {@link String}
483 * @return the escaped {@link String}
485 public static String
xmlEscape(String input
) {
490 return HtmlEscape
.escapeHtml(input
,
491 HtmlEscapeType
.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA
,
492 HtmlEscapeLevel
.LEVEL_1_ONLY_MARKUP_SIGNIFICANT
);
496 * Escape the given {@link String} so it can be used in XML, as text content
497 * inside double-quotes.
500 * the input {@link String}
502 * @return the escaped {@link String}
504 public static String
xmlEscapeQuote(String input
) {
509 return HtmlEscape
.escapeHtml(input
,
510 HtmlEscapeType
.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA
,
511 HtmlEscapeLevel
.LEVEL_1_ONLY_MARKUP_SIGNIFICANT
);
515 * Zip the data and then encode it into Base64.
517 * @deprecated use {@link StringUtils#base64(byte[], boolean)} with the
518 * correct parameter instead
523 * @return the Base64 zipped version
526 public static String
zip64(String data
) {
528 return Base64
.encodeBytes(data
.getBytes("UTF-8"), Base64
.GZIP
);
529 } catch (IOException e
) {
536 * Unconvert from Base64 then unzip the content.
538 * @deprecated use {@link StringUtils#unbase64s(String, boolean)} with the
539 * correct parameter instead
542 * the data in Base64 format
544 * @return the raw data
546 * @throws IOException
547 * in case of I/O error
550 public static String
unzip64(String data
) throws IOException
{
551 ByteArrayInputStream in
= new ByteArrayInputStream(Base64
.decode(data
,
554 Scanner scan
= new Scanner(in
);
555 scan
.useDelimiter("\\A");
564 * Convert the given data to Base64 format.
567 * the data to convert
569 * TRUE to also compress the data in GZIP format; remember that
570 * compressed and not-compressed content are different; you need
571 * to know which is which when decoding
573 * @return the Base64 {@link String} representation of the data
575 * @throws IOException
576 * in case of I/O errors
578 public static String
base64(String data
, boolean zip
) throws IOException
{
579 return base64(data
.getBytes("UTF-8"), zip
);
583 * Convert the given data to Base64 format.
586 * the data to convert
588 * TRUE to also compress the data in GZIP format; remember that
589 * compressed and not-compressed content are different; you need
590 * to know which is which when decoding
592 * @return the Base64 {@link String} representation of the data
594 * @throws IOException
595 * in case of I/O errors
597 public static String
base64(byte[] data
, boolean zip
) throws IOException
{
598 return Base64
.encodeBytes(data
, zip ? Base64
.GZIP
: Base64
.NO_OPTIONS
);
602 * Convert the given data to Base64 format.
605 * the data to convert
607 * TRUE to also uncompress the data from a GZIP format; take care
608 * about this flag, as it could easily cause errors in the
609 * returned content or an {@link IOException}
611 * TRUE to break lines on every 76th character
613 * @return the Base64 {@link String} representation of the data
615 * @throws IOException
616 * in case of I/O errors
618 public static OutputStream
base64(OutputStream data
, boolean zip
,
619 boolean breakLines
) throws IOException
{
620 OutputStream out
= new Base64
.OutputStream(data
,
621 breakLines ? Base64
.DO_BREAK_LINES
& Base64
.ENCODE
625 out
= new java
.util
.zip
.GZIPOutputStream(out
);
632 * Convert the given data to Base64 format.
635 * the data to convert
637 * TRUE to also uncompress the data from a GZIP format; take care
638 * about this flag, as it could easily cause errors in the
639 * returned content or an {@link IOException}
641 * TRUE to break lines on every 76th character
643 * @return the Base64 {@link String} representation of the data
645 * @throws IOException
646 * in case of I/O errors
648 public static InputStream
base64(InputStream data
, boolean zip
,
649 boolean breakLines
) throws IOException
{
651 data
= new java
.util
.zip
.GZIPInputStream(data
);
654 return new Base64
.InputStream(data
, breakLines ? Base64
.DO_BREAK_LINES
655 & Base64
.ENCODE
: Base64
.ENCODE
);
659 * Unconvert the given data from Base64 format back to a raw array of bytes.
662 * the data to unconvert
664 * TRUE to also uncompress the data from a GZIP format; take care
665 * about this flag, as it could easily cause errors in the
666 * returned content or an {@link IOException}
668 * @return the raw data represented by the given Base64 {@link String},
669 * optionally compressed with GZIP
671 * @throws IOException
672 * in case of I/O errors
674 public static byte[] unbase64(String data
, boolean zip
) throws IOException
{
675 return Base64
.decode(data
, zip ? Base64
.GZIP
: Base64
.NO_OPTIONS
);
679 * Unconvert the given data from Base64 format back to a raw array of bytes.
682 * the data to unconvert
684 * TRUE to also uncompress the data from a GZIP format; take care
685 * about this flag, as it could easily cause errors in the
686 * returned content or an {@link IOException}
688 * TRUE to break lines on every 76th character
690 * @return the raw data represented by the given Base64 {@link String}
692 * @throws IOException
693 * in case of I/O errors
695 public static OutputStream
unbase64(OutputStream data
, boolean zip
,
696 boolean breakLines
) throws IOException
{
697 OutputStream out
= new Base64
.OutputStream(data
,
698 breakLines ? Base64
.DO_BREAK_LINES
& Base64
.ENCODE
702 out
= new java
.util
.zip
.GZIPOutputStream(out
);
709 * Unconvert the given data from Base64 format back to a raw array of bytes.
712 * the data to unconvert
714 * TRUE to also uncompress the data from a GZIP format; take care
715 * about this flag, as it could easily cause errors in the
716 * returned content or an {@link IOException}
718 * TRUE to break lines on every 76th character
720 * @return the raw data represented by the given Base64 {@link String}
722 * @throws IOException
723 * in case of I/O errors
725 public static InputStream
unbase64(InputStream data
, boolean zip
,
726 boolean breakLines
) throws IOException
{
728 data
= new java
.util
.zip
.GZIPInputStream(data
);
731 return new Base64
.InputStream(data
, breakLines ? Base64
.DO_BREAK_LINES
732 & Base64
.ENCODE
: Base64
.ENCODE
);
736 * Unonvert the given data from Base64 format back to a {@link String}.
739 * the data to unconvert
741 * TRUE to also uncompress the data from a GZIP format; take care
742 * about this flag, as it could easily cause errors in the
743 * returned content or an {@link IOException}
745 * @return the {@link String} represented by the given Base64 {@link String}
746 * , optionally compressed with GZIP
748 * @throws IOException
749 * in case of I/O errors
751 public static String
unbase64s(String data
, boolean zip
) throws IOException
{
752 ByteArrayInputStream in
= new ByteArrayInputStream(unbase64(data
, zip
));
754 Scanner scan
= new Scanner(in
, "UTF-8");
755 scan
.useDelimiter("\\A");
764 * The "remove accents" pattern.
766 * @return the pattern, or NULL if a problem happens
768 private static Pattern
getMarks() {
771 .compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
772 } catch (Exception e
) {
773 // Can fail on Android...
779 // justify List<String> related:
783 * Check if this line ends as a complete line (ends with a "." or similar).
785 * Note that we consider an empty line as full, and a line ending with
786 * spaces as not complete.
791 * @return TRUE if it does
793 static private boolean isFullLine(StringBuilder line
) {
794 if (line
.length() == 0) {
798 char lastCar
= line
.charAt(line
.length() - 1);
808 case '"': // double quotes
820 * Check if this line represent an item in a list or description (i.e.,
821 * check that the first non-space char is "-").
826 * @return TRUE if it is
828 static private boolean isItemLine(String line
) {
829 String spacing
= getItemSpacing(line
);
830 return spacing
!= null && !spacing
.isEmpty()
831 && line
.charAt(spacing
.length()) == '-';
835 * Return all the spaces that start this line (or Empty if none).
838 * the line to get the starting spaces from
840 * @return the left spacing
842 static private String
getItemSpacing(String line
) {
844 for (i
= 0; i
< line
.length(); i
++) {
845 if (line
.charAt(i
) != ' ') {
846 return line
.substring(0, i
);
854 * This line is an horizontal spacer line.
859 * @return TRUE if it is
861 static private boolean isHrLine(CharSequence line
) {
864 for (int i
= 0; i
< line
.length(); i
++) {
865 char car
= line
.charAt(i
);
866 if (car
== ' ' || car
== '\t' || car
== '*' || car
== '-'
867 || car
== '_' || car
== '~' || car
== '=' || car
== '/'