1 package be
.nikiroo
.utils
;
3 import java
.io
.ByteArrayInputStream
;
4 import java
.io
.IOException
;
5 import java
.io
.UnsupportedEncodingException
;
6 import java
.security
.MessageDigest
;
7 import java
.security
.NoSuchAlgorithmException
;
8 import java
.text
.Normalizer
;
9 import java
.text
.Normalizer
.Form
;
10 import java
.text
.ParseException
;
11 import java
.text
.SimpleDateFormat
;
12 import java
.util
.AbstractMap
;
13 import java
.util
.ArrayList
;
14 import java
.util
.Date
;
15 import java
.util
.List
;
16 import java
.util
.Map
.Entry
;
17 import java
.util
.Scanner
;
18 import java
.util
.regex
.Pattern
;
20 import org
.unbescape
.html
.HtmlEscape
;
21 import org
.unbescape
.html
.HtmlEscapeLevel
;
22 import org
.unbescape
.html
.HtmlEscapeType
;
25 * This class offer some utilities based around {@link String}s.
29 public class StringUtils
{
31 * This enum type will decide the alignment of a {@link String} when padding
32 * or justification is applied (if there is enough horizontal space for it
35 public enum Alignment
{
36 /** Aligned at left. */
40 /** Aligned at right. */
42 /** Full justified (to both left and right). */
45 // Old Deprecated values:
47 /** DEPRECATED: please use LEFT. */
50 /** DEPRECATED: please use CENTER. */
53 /** DEPRECATED: please use RIGHT. */
58 * Return the non-deprecated version of this enum if needed (or return
61 * @return the non-deprecated value
63 Alignment
undeprecate() {
64 if (this == Beginning
)
74 static private Pattern marks
= getMarks();
77 * Fix the size of the given {@link String} either with space-padding or by
81 * the {@link String} to fix
83 * the size of the resulting {@link String} or -1 for a noop
85 * @return the resulting {@link String} of size <i>size</i>
87 static public String
padString(String text
, int width
) {
88 return padString(text
, width
, true, null);
92 * Fix the size of the given {@link String} either with space-padding or by
93 * optionally shortening it.
96 * the {@link String} to fix
98 * the size of the resulting {@link String} if the text fits or
99 * if cut is TRUE or -1 for a noop
101 * cut the {@link String} shorter if needed
103 * align the {@link String} in this position if we have enough
104 * space (default is Alignment.Beginning)
106 * @return the resulting {@link String} of size <i>size</i> minimum
108 static public String
padString(String text
, int width
, boolean cut
,
112 align
= Alignment
.LEFT
;
115 align
= align
.undeprecate();
121 int diff
= width
- text
.length();
125 text
= text
.substring(0, width
);
126 } else if (diff
> 0) {
127 if (diff
< 2 && align
!= Alignment
.RIGHT
)
128 align
= Alignment
.LEFT
;
132 text
= new String(new char[diff
]).replace('\0', ' ') + text
;
135 int pad1
= (diff
) / 2;
136 int pad2
= (diff
+ 1) / 2;
137 text
= new String(new char[pad1
]).replace('\0', ' ') + text
138 + new String(new char[pad2
]).replace('\0', ' ');
142 text
= text
+ new String(new char[diff
]).replace('\0', ' ');
152 * Justify a text into width-sized (at the maximum) lines.
155 * the {@link String} to justify
157 * the maximum size of the resulting lines
159 * @return a list of justified text lines
161 static public List
<String
> justifyText(String text
, int width
) {
162 return justifyText(text
, width
, null);
166 * Justify a text into width-sized (at the maximum) lines.
169 * the {@link String} to justify
171 * the maximum size of the resulting lines
173 * align the lines in this position (default is
174 * Alignment.Beginning)
176 * @return a list of justified text lines
178 static public List
<String
> justifyText(String text
, int width
,
181 align
= Alignment
.LEFT
;
184 align
= align
.undeprecate();
188 return StringJustifier
.center(text
, width
);
190 return StringJustifier
.right(text
, width
);
192 return StringJustifier
.full(text
, width
);
195 return StringJustifier
.left(text
, width
);
200 * Justify a text into width-sized (at the maximum) lines.
203 * the {@link String} to justify
205 * the maximum size of the resulting lines
207 * @return a list of justified text lines
209 static public List
<String
> justifyText(List
<String
> text
, int width
) {
210 return justifyText(text
, width
, null);
214 * Justify a text into width-sized (at the maximum) lines.
217 * the {@link String} to justify
219 * the maximum size of the resulting lines
221 * align the lines in this position (default is
222 * Alignment.Beginning)
224 * @return a list of justified text lines
226 static public List
<String
> justifyText(List
<String
> text
, int width
,
228 List
<String
> result
= new ArrayList
<String
>();
230 // Content <-> Bullet spacing (null = no spacing)
231 List
<Entry
<String
, String
>> lines
= new ArrayList
<Entry
<String
, String
>>();
232 StringBuilder previous
= null;
233 StringBuilder tmp
= new StringBuilder();
234 String previousItemBulletSpacing
= null;
235 String itemBulletSpacing
= null;
236 for (String inputLine
: text
) {
237 boolean previousLineComplete
= true;
239 String current
= inputLine
.replace("\t", " ");
240 itemBulletSpacing
= getItemSpacing(current
);
241 boolean bullet
= isItemLine(current
);
242 if ((previousItemBulletSpacing
== null || itemBulletSpacing
243 .length() <= previousItemBulletSpacing
.length()) && !bullet
) {
244 itemBulletSpacing
= null;
247 if (itemBulletSpacing
!= null) {
248 current
= current
.trim();
249 if (!current
.isEmpty() && bullet
) {
250 current
= current
.substring(1);
252 current
= current
.trim();
253 previousLineComplete
= bullet
;
256 for (String word
: current
.split(" ")) {
257 if (word
.isEmpty()) {
261 if (tmp
.length() > 0) {
264 tmp
.append(word
.trim());
266 current
= tmp
.toString();
268 previousLineComplete
= current
.isEmpty()
269 || previousItemBulletSpacing
!= null
270 || (previous
!= null && isFullLine(previous
))
271 || isHrLine(current
) || isHrLine(previous
);
274 if (previous
== null) {
275 previous
= new StringBuilder();
277 if (previousLineComplete
) {
278 lines
.add(new AbstractMap
.SimpleEntry
<String
, String
>(
279 previous
.toString(), previousItemBulletSpacing
));
280 previous
.setLength(0);
281 previousItemBulletSpacing
= itemBulletSpacing
;
283 previous
.append(' ');
287 previous
.append(current
);
291 if (previous
!= null) {
292 lines
.add(new AbstractMap
.SimpleEntry
<String
, String
>(previous
293 .toString(), previousItemBulletSpacing
));
296 for (Entry
<String
, String
> line
: lines
) {
297 String content
= line
.getKey();
298 String spacing
= line
.getValue();
300 String bullet
= "- ";
301 if (spacing
== null) {
306 if (spacing
.length() > width
+ 3) {
310 for (String subline
: StringUtils
.justifyText(content
, width
311 - (spacing
.length() + bullet
.length()), align
)) {
312 result
.add(spacing
+ bullet
+ subline
);
313 if (!bullet
.isEmpty()) {
323 * Sanitise the given input to make it more Terminal-friendly by removing
324 * combining characters.
327 * the input to sanitise
328 * @param allowUnicode
329 * allow Unicode or only allow ASCII Latin characters
331 * @return the sanitised {@link String}
333 static public String
sanitize(String input
, boolean allowUnicode
) {
334 return sanitize(input
, allowUnicode
, !allowUnicode
);
338 * Sanitise the given input to make it more Terminal-friendly by removing
339 * combining characters.
342 * the input to sanitise
343 * @param allowUnicode
344 * allow Unicode or only allow ASCII Latin characters
345 * @param removeAllAccents
346 * TRUE to replace all accentuated characters by their non
347 * accentuated counter-parts
349 * @return the sanitised {@link String}
351 static public String
sanitize(String input
, boolean allowUnicode
,
352 boolean removeAllAccents
) {
354 if (removeAllAccents
) {
355 input
= Normalizer
.normalize(input
, Form
.NFKD
);
357 input
= marks
.matcher(input
).replaceAll("");
361 input
= Normalizer
.normalize(input
, Form
.NFKC
);
364 StringBuilder builder
= new StringBuilder();
365 for (int index
= 0; index
< input
.length(); index
++) {
366 char car
= input
.charAt(index
);
367 // displayable chars in ASCII are in the range 32<->255,
369 if (car
>= 32 && car
<= 255 && car
!= 127) {
373 input
= builder
.toString();
380 * Convert between the time in milliseconds to a {@link String} in a "fixed"
381 * way (to exchange data over the wire, for instance).
383 * Precise to the second.
386 * the specified number of milliseconds since the standard base
387 * time known as "the epoch", namely January 1, 1970, 00:00:00
390 * @return the time as a {@link String}
392 static public String
fromTime(long time
) {
393 SimpleDateFormat sdf
= new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
394 return sdf
.format(new Date(time
));
398 * Convert between the time as a {@link String} to milliseconds in a "fixed"
399 * way (to exchange data over the wire, for instance).
401 * Precise to the second.
404 * the time as a {@link String}
406 * @return the number of milliseconds since the standard base time known as
407 * "the epoch", namely January 1, 1970, 00:00:00 GMT, or -1 in case
410 * @throws ParseException
411 * in case of parse error
413 static public long toTime(String displayTime
) throws ParseException
{
414 SimpleDateFormat sdf
= new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
415 return sdf
.parse(displayTime
).getTime();
419 * Return a hash of the given {@link String}.
426 static public String
getMd5Hash(String input
) {
428 MessageDigest md
= MessageDigest
.getInstance("MD5");
429 md
.update(input
.getBytes("UTF-8"));
430 byte byteData
[] = md
.digest();
432 StringBuffer hexString
= new StringBuffer();
433 for (int i
= 0; i
< byteData
.length
; i
++) {
434 String hex
= Integer
.toHexString(0xff & byteData
[i
]);
435 if (hex
.length() == 1)
436 hexString
.append('0');
437 hexString
.append(hex
);
440 return hexString
.toString();
441 } catch (NoSuchAlgorithmException e
) {
443 } catch (UnsupportedEncodingException e
) {
449 * Remove the HTML content from the given input, and un-html-ize the rest.
452 * the HTML-encoded content
454 * @return the HTML-free equivalent content
456 public static String
unhtml(String html
) {
457 StringBuilder builder
= new StringBuilder();
460 for (char car
: html
.toCharArray()) {
463 } else if (car
== '>') {
465 } else if (inTag
<= 0) {
470 char nbsp
= ' '; // non-breakable space (a special char)
472 return HtmlEscape
.unescapeHtml(builder
.toString()).replace(nbsp
, space
);
476 * Escape the given {@link String} so it can be used in XML, as content.
479 * the input {@link String}
481 * @return the escaped {@link String}
483 public static String
xmlEscape(String input
) {
488 return HtmlEscape
.escapeHtml(input
,
489 HtmlEscapeType
.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA
,
490 HtmlEscapeLevel
.LEVEL_1_ONLY_MARKUP_SIGNIFICANT
);
494 * Escape the given {@link String} so it can be used in XML, as text content
495 * inside double-quotes.
498 * the input {@link String}
500 * @return the escaped {@link String}
502 public static String
xmlEscapeQuote(String input
) {
507 return HtmlEscape
.escapeHtml(input
,
508 HtmlEscapeType
.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA
,
509 HtmlEscapeLevel
.LEVEL_1_ONLY_MARKUP_SIGNIFICANT
);
513 * Zip the data and then encode it into Base64.
515 * @deprecated use {@link StringUtils#base64(byte[], boolean)} with the
516 * correct parameter instead
521 * @return the Base64 zipped version
524 public static String
zip64(String data
) {
526 return Base64
.encodeBytes(data
.getBytes(), Base64
.GZIP
);
527 } catch (IOException e
) {
534 * Unconvert from Base64 then unzip the content.
536 * @deprecated use {@link StringUtils#unbase64s(String, boolean)} with the
537 * correct parameter instead
540 * the data in Base64 format
542 * @return the raw data
544 * @throws IOException
545 * in case of I/O error
548 public static String
unzip64(String data
) throws IOException
{
549 ByteArrayInputStream in
= new ByteArrayInputStream(Base64
.decode(data
,
552 Scanner scan
= new Scanner(in
);
553 scan
.useDelimiter("\\A");
562 * Convert the given data to Base64 format.
565 * the data to convert
567 * TRUE to also compress the data in GZIP format; remember that
568 * compressed and not-compressed content are different; you need
569 * to know which is which when decoding
571 * @return the Base64 {@link String} representation of the data
573 * @throws IOException
574 * in case of I/O errors
576 public static String
base64(byte[] data
, boolean zip
) throws IOException
{
577 return Base64
.encodeBytes(data
, zip ? Base64
.GZIP
: Base64
.NO_OPTIONS
);
581 * Unonvert the given data from Base64 format back to a raw array of bytes.
584 * the data to unconvert
586 * TRUE to also uncompress the data from a GZIP format; take care
587 * about this flag, as it could easily cause errors in the
588 * returned content or an {@link IOException}
590 * @return the raw data represented by the given Base64 {@link String},
591 * optionally compressed with GZIP
593 * @throws IOException
594 * in case of I/O errors
596 public static byte[] unbase64(String data
, boolean zip
) throws IOException
{
597 return Base64
.decode(data
, zip ? Base64
.GZIP
: Base64
.NO_OPTIONS
);
601 * Unonvert the given data from Base64 format back to a {@link String}.
604 * the data to unconvert
606 * TRUE to also uncompress the data from a GZIP format; take care
607 * about this flag, as it could easily cause errors in the
608 * returned content or an {@link IOException}
610 * @return the {@link String} represented by the given Base64 {@link String}
611 * , optionally compressed with GZIP
613 * @throws IOException
614 * in case of I/O errors
616 public static String
unbase64s(String data
, boolean zip
) throws IOException
{
617 ByteArrayInputStream in
= new ByteArrayInputStream(unbase64(data
, zip
));
619 Scanner scan
= new Scanner(in
);
620 scan
.useDelimiter("\\A");
629 * The "remove accents" pattern.
631 * @return the pattern, or NULL if a problem happens
633 private static Pattern
getMarks() {
636 .compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
637 } catch (Exception e
) {
638 // Can fail on Android...
644 // justify List<String> related:
648 * Check if this line ends as a complete line (ends with a "." or similar).
650 * Note that we consider an empty line as full, and a line ending with
651 * spaces as not complete.
656 * @return TRUE if it does
658 static private boolean isFullLine(StringBuilder line
) {
659 if (line
.length() == 0) {
663 char lastCar
= line
.charAt(line
.length() - 1);
673 case '"': // double quotes
685 * Check if this line represent an item in a list or description (i.e.,
686 * check that the first non-space char is "-").
691 * @return TRUE if it is
693 static private boolean isItemLine(String line
) {
694 String spacing
= getItemSpacing(line
);
695 return spacing
!= null && !spacing
.isEmpty()
696 && line
.charAt(spacing
.length()) == '-';
700 * Return all the spaces that start this line (or Empty if none).
703 * the line to get the starting spaces from
705 * @return the left spacing
707 static private String
getItemSpacing(String line
) {
709 for (i
= 0; i
< line
.length(); i
++) {
710 if (line
.charAt(i
) != ' ') {
711 return line
.substring(0, i
);
719 * This line is an horizontal spacer line.
724 * @return TRUE if it is
726 static private boolean isHrLine(CharSequence line
) {
729 for (int i
= 0; i
< line
.length(); i
++) {
730 char car
= line
.charAt(i
);
731 if (car
== ' ' || car
== '\t' || car
== '*' || car
== '-'
732 || car
== '_' || car
== '~' || car
== '=' || car
== '/'