Version 4.4.5
[nikiroo-utils.git] / src / be / nikiroo / utils / StringUtils.java
1 package be.nikiroo.utils;
2
3 import java.io.ByteArrayInputStream;
4 import java.io.IOException;
5 import java.io.UnsupportedEncodingException;
6 import java.security.MessageDigest;
7 import java.security.NoSuchAlgorithmException;
8 import java.text.Normalizer;
9 import java.text.Normalizer.Form;
10 import java.text.ParseException;
11 import java.text.SimpleDateFormat;
12 import java.util.AbstractMap;
13 import java.util.ArrayList;
14 import java.util.Date;
15 import java.util.List;
16 import java.util.Map.Entry;
17 import java.util.Scanner;
18 import java.util.regex.Pattern;
19
20 import org.unbescape.html.HtmlEscape;
21 import org.unbescape.html.HtmlEscapeLevel;
22 import org.unbescape.html.HtmlEscapeType;
23
24 /**
25 * This class offer some utilities based around {@link String}s.
26 *
27 * @author niki
28 */
29 public class StringUtils {
30 /**
31 * This enum type will decide the alignment of a {@link String} when padding
32 * or justification is applied (if there is enough horizontal space for it
33 * to be aligned).
34 */
35 public enum Alignment {
36 /** Aligned at left. */
37 LEFT,
38 /** Centered. */
39 CENTER,
40 /** Aligned at right. */
41 RIGHT,
42 /** Full justified (to both left and right). */
43 JUSTIFY,
44
45 // Old Deprecated values:
46
47 /** DEPRECATED: please use LEFT. */
48 @Deprecated
49 Beginning,
50 /** DEPRECATED: please use CENTER. */
51 @Deprecated
52 Center,
53 /** DEPRECATED: please use RIGHT. */
54 @Deprecated
55 End;
56
57 /**
58 * Return the non-deprecated version of this enum if needed (or return
59 * self if not).
60 *
61 * @return the non-deprecated value
62 */
63 Alignment undeprecate() {
64 if (this == Beginning)
65 return LEFT;
66 if (this == Center)
67 return CENTER;
68 if (this == End)
69 return RIGHT;
70 return this;
71 }
72 }
73
74 static private Pattern marks = getMarks();
75
76 /**
77 * Fix the size of the given {@link String} either with space-padding or by
78 * shortening it.
79 *
80 * @param text
81 * the {@link String} to fix
82 * @param width
83 * the size of the resulting {@link String} or -1 for a noop
84 *
85 * @return the resulting {@link String} of size <i>size</i>
86 */
87 static public String padString(String text, int width) {
88 return padString(text, width, true, null);
89 }
90
91 /**
92 * Fix the size of the given {@link String} either with space-padding or by
93 * optionally shortening it.
94 *
95 * @param text
96 * the {@link String} to fix
97 * @param width
98 * the size of the resulting {@link String} if the text fits or
99 * if cut is TRUE or -1 for a noop
100 * @param cut
101 * cut the {@link String} shorter if needed
102 * @param align
103 * align the {@link String} in this position if we have enough
104 * space (default is Alignment.Beginning)
105 *
106 * @return the resulting {@link String} of size <i>size</i> minimum
107 */
108 static public String padString(String text, int width, boolean cut,
109 Alignment align) {
110
111 if (align == null) {
112 align = Alignment.LEFT;
113 }
114
115 align = align.undeprecate();
116
117 if (width >= 0) {
118 if (text == null)
119 text = "";
120
121 int diff = width - text.length();
122
123 if (diff < 0) {
124 if (cut)
125 text = text.substring(0, width);
126 } else if (diff > 0) {
127 if (diff < 2 && align != Alignment.RIGHT)
128 align = Alignment.LEFT;
129
130 switch (align) {
131 case RIGHT:
132 text = new String(new char[diff]).replace('\0', ' ') + text;
133 break;
134 case CENTER:
135 int pad1 = (diff) / 2;
136 int pad2 = (diff + 1) / 2;
137 text = new String(new char[pad1]).replace('\0', ' ') + text
138 + new String(new char[pad2]).replace('\0', ' ');
139 break;
140 case LEFT:
141 default:
142 text = text + new String(new char[diff]).replace('\0', ' ');
143 break;
144 }
145 }
146 }
147
148 return text;
149 }
150
151 /**
152 * Justify a text into width-sized (at the maximum) lines.
153 *
154 * @param text
155 * the {@link String} to justify
156 * @param width
157 * the maximum size of the resulting lines
158 *
159 * @return a list of justified text lines
160 */
161 static public List<String> justifyText(String text, int width) {
162 return justifyText(text, width, null);
163 }
164
165 /**
166 * Justify a text into width-sized (at the maximum) lines.
167 *
168 * @param text
169 * the {@link String} to justify
170 * @param width
171 * the maximum size of the resulting lines
172 * @param align
173 * align the lines in this position (default is
174 * Alignment.Beginning)
175 *
176 * @return a list of justified text lines
177 */
178 static public List<String> justifyText(String text, int width,
179 Alignment align) {
180 if (align == null) {
181 align = Alignment.LEFT;
182 }
183
184 align = align.undeprecate();
185
186 switch (align) {
187 case CENTER:
188 return StringJustifier.center(text, width);
189 case RIGHT:
190 return StringJustifier.right(text, width);
191 case JUSTIFY:
192 return StringJustifier.full(text, width);
193 case LEFT:
194 default:
195 return StringJustifier.left(text, width);
196 }
197 }
198
199 /**
200 * Justify a text into width-sized (at the maximum) lines.
201 *
202 * @param text
203 * the {@link String} to justify
204 * @param width
205 * the maximum size of the resulting lines
206 *
207 * @return a list of justified text lines
208 */
209 static public List<String> justifyText(List<String> text, int width) {
210 return justifyText(text, width, null);
211 }
212
213 /**
214 * Justify a text into width-sized (at the maximum) lines.
215 *
216 * @param text
217 * the {@link String} to justify
218 * @param width
219 * the maximum size of the resulting lines
220 * @param align
221 * align the lines in this position (default is
222 * Alignment.Beginning)
223 *
224 * @return a list of justified text lines
225 */
226 static public List<String> justifyText(List<String> text, int width,
227 Alignment align) {
228 List<String> result = new ArrayList<String>();
229
230 // Content <-> Bullet spacing (null = no spacing)
231 List<Entry<String, String>> lines = new ArrayList<Entry<String, String>>();
232 StringBuilder previous = null;
233 StringBuilder tmp = new StringBuilder();
234 String previousItemBulletSpacing = null;
235 String itemBulletSpacing = null;
236 for (String inputLine : text) {
237 boolean previousLineComplete = true;
238
239 String current = inputLine.replace("\t", " ");
240 itemBulletSpacing = getItemSpacing(current);
241 boolean bullet = isItemLine(current);
242 if ((previousItemBulletSpacing == null || itemBulletSpacing
243 .length() <= previousItemBulletSpacing.length()) && !bullet) {
244 itemBulletSpacing = null;
245 }
246
247 if (itemBulletSpacing != null) {
248 current = current.trim();
249 if (!current.isEmpty() && bullet) {
250 current = current.substring(1);
251 }
252 current = current.trim();
253 previousLineComplete = bullet;
254 } else {
255 tmp.setLength(0);
256 for (String word : current.split(" ")) {
257 if (word.isEmpty()) {
258 continue;
259 }
260
261 if (tmp.length() > 0) {
262 tmp.append(' ');
263 }
264 tmp.append(word.trim());
265 }
266 current = tmp.toString();
267
268 previousLineComplete = current.isEmpty()
269 || previousItemBulletSpacing != null
270 || (previous != null && isFullLine(previous))
271 || isHrLine(current) || isHrLine(previous);
272 }
273
274 if (previous == null) {
275 previous = new StringBuilder();
276 } else {
277 if (previousLineComplete) {
278 lines.add(new AbstractMap.SimpleEntry<String, String>(
279 previous.toString(), previousItemBulletSpacing));
280 previous.setLength(0);
281 previousItemBulletSpacing = itemBulletSpacing;
282 } else {
283 previous.append(' ');
284 }
285 }
286
287 previous.append(current);
288
289 }
290
291 if (previous != null) {
292 lines.add(new AbstractMap.SimpleEntry<String, String>(previous
293 .toString(), previousItemBulletSpacing));
294 }
295
296 for (Entry<String, String> line : lines) {
297 String content = line.getKey();
298 String spacing = line.getValue();
299
300 String bullet = "- ";
301 if (spacing == null) {
302 bullet = "";
303 spacing = "";
304 }
305
306 if (spacing.length() > width + 3) {
307 spacing = "";
308 }
309
310 for (String subline : StringUtils.justifyText(content, width
311 - (spacing.length() + bullet.length()), align)) {
312 result.add(spacing + bullet + subline);
313 if (!bullet.isEmpty()) {
314 bullet = " ";
315 }
316 }
317 }
318
319 return result;
320 }
321
322 /**
323 * Sanitise the given input to make it more Terminal-friendly by removing
324 * combining characters.
325 *
326 * @param input
327 * the input to sanitise
328 * @param allowUnicode
329 * allow Unicode or only allow ASCII Latin characters
330 *
331 * @return the sanitised {@link String}
332 */
333 static public String sanitize(String input, boolean allowUnicode) {
334 return sanitize(input, allowUnicode, !allowUnicode);
335 }
336
337 /**
338 * Sanitise the given input to make it more Terminal-friendly by removing
339 * combining characters.
340 *
341 * @param input
342 * the input to sanitise
343 * @param allowUnicode
344 * allow Unicode or only allow ASCII Latin characters
345 * @param removeAllAccents
346 * TRUE to replace all accentuated characters by their non
347 * accentuated counter-parts
348 *
349 * @return the sanitised {@link String}
350 */
351 static public String sanitize(String input, boolean allowUnicode,
352 boolean removeAllAccents) {
353
354 if (removeAllAccents) {
355 input = Normalizer.normalize(input, Form.NFKD);
356 if (marks != null) {
357 input = marks.matcher(input).replaceAll("");
358 }
359 }
360
361 input = Normalizer.normalize(input, Form.NFKC);
362
363 if (!allowUnicode) {
364 StringBuilder builder = new StringBuilder();
365 for (int index = 0; index < input.length(); index++) {
366 char car = input.charAt(index);
367 // displayable chars in ASCII are in the range 32<->255,
368 // except DEL (127)
369 if (car >= 32 && car <= 255 && car != 127) {
370 builder.append(car);
371 }
372 }
373 input = builder.toString();
374 }
375
376 return input;
377 }
378
379 /**
380 * Convert between the time in milliseconds to a {@link String} in a "fixed"
381 * way (to exchange data over the wire, for instance).
382 * <p>
383 * Precise to the second.
384 *
385 * @param time
386 * the specified number of milliseconds since the standard base
387 * time known as "the epoch", namely January 1, 1970, 00:00:00
388 * GMT
389 *
390 * @return the time as a {@link String}
391 */
392 static public String fromTime(long time) {
393 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
394 return sdf.format(new Date(time));
395 }
396
397 /**
398 * Convert between the time as a {@link String} to milliseconds in a "fixed"
399 * way (to exchange data over the wire, for instance).
400 * <p>
401 * Precise to the second.
402 *
403 * @param displayTime
404 * the time as a {@link String}
405 *
406 * @return the number of milliseconds since the standard base time known as
407 * "the epoch", namely January 1, 1970, 00:00:00 GMT, or -1 in case
408 * of error
409 *
410 * @throws ParseException
411 * in case of parse error
412 */
413 static public long toTime(String displayTime) throws ParseException {
414 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
415 return sdf.parse(displayTime).getTime();
416 }
417
418 /**
419 * Return a hash of the given {@link String}.
420 *
421 * @param input
422 * the input data
423 *
424 * @return the hash
425 */
426 static public String getMd5Hash(String input) {
427 try {
428 MessageDigest md = MessageDigest.getInstance("MD5");
429 md.update(input.getBytes("UTF-8"));
430 byte byteData[] = md.digest();
431
432 StringBuffer hexString = new StringBuffer();
433 for (int i = 0; i < byteData.length; i++) {
434 String hex = Integer.toHexString(0xff & byteData[i]);
435 if (hex.length() == 1)
436 hexString.append('0');
437 hexString.append(hex);
438 }
439
440 return hexString.toString();
441 } catch (NoSuchAlgorithmException e) {
442 return input;
443 } catch (UnsupportedEncodingException e) {
444 return input;
445 }
446 }
447
448 /**
449 * Remove the HTML content from the given input, and un-html-ize the rest.
450 *
451 * @param html
452 * the HTML-encoded content
453 *
454 * @return the HTML-free equivalent content
455 */
456 public static String unhtml(String html) {
457 StringBuilder builder = new StringBuilder();
458
459 int inTag = 0;
460 for (char car : html.toCharArray()) {
461 if (car == '<') {
462 inTag++;
463 } else if (car == '>') {
464 inTag--;
465 } else if (inTag <= 0) {
466 builder.append(car);
467 }
468 }
469
470 char nbsp = ' '; // non-breakable space (a special char)
471 char space = ' ';
472 return HtmlEscape.unescapeHtml(builder.toString()).replace(nbsp, space);
473 }
474
475 /**
476 * Escape the given {@link String} so it can be used in XML, as content.
477 *
478 * @param input
479 * the input {@link String}
480 *
481 * @return the escaped {@link String}
482 */
483 public static String xmlEscape(String input) {
484 if (input == null) {
485 return "";
486 }
487
488 return HtmlEscape.escapeHtml(input,
489 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
490 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
491 }
492
493 /**
494 * Escape the given {@link String} so it can be used in XML, as text content
495 * inside double-quotes.
496 *
497 * @param input
498 * the input {@link String}
499 *
500 * @return the escaped {@link String}
501 */
502 public static String xmlEscapeQuote(String input) {
503 if (input == null) {
504 return "";
505 }
506
507 return HtmlEscape.escapeHtml(input,
508 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
509 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
510 }
511
512 /**
513 * Zip the data and then encode it into Base64.
514 *
515 * @deprecated use {@link StringUtils#base64(byte[], boolean)} with the
516 * correct parameter instead
517 *
518 * @param data
519 * the data
520 *
521 * @return the Base64 zipped version
522 */
523 @Deprecated
524 public static String zip64(String data) {
525 try {
526 return Base64.encodeBytes(data.getBytes(), Base64.GZIP);
527 } catch (IOException e) {
528 e.printStackTrace();
529 return null;
530 }
531 }
532
533 /**
534 * Unconvert from Base64 then unzip the content.
535 *
536 * @deprecated use {@link StringUtils#unbase64s(String, boolean)} with the
537 * correct parameter instead
538 *
539 * @param data
540 * the data in Base64 format
541 *
542 * @return the raw data
543 *
544 * @throws IOException
545 * in case of I/O error
546 */
547 @Deprecated
548 public static String unzip64(String data) throws IOException {
549 ByteArrayInputStream in = new ByteArrayInputStream(Base64.decode(data,
550 Base64.GZIP));
551
552 Scanner scan = new Scanner(in);
553 scan.useDelimiter("\\A");
554 try {
555 return scan.next();
556 } finally {
557 scan.close();
558 }
559 }
560
561 /**
562 * Convert the given data to Base64 format.
563 *
564 * @param data
565 * the data to convert
566 * @param zip
567 * TRUE to also compress the data in GZIP format; remember that
568 * compressed and not-compressed content are different; you need
569 * to know which is which when decoding
570 *
571 * @return the Base64 {@link String} representation of the data
572 *
573 * @throws IOException
574 * in case of I/O errors
575 */
576 public static String base64(byte[] data, boolean zip) throws IOException {
577 return Base64.encodeBytes(data, zip ? Base64.GZIP : Base64.NO_OPTIONS);
578 }
579
580 /**
581 * Unonvert the given data from Base64 format back to a raw array of bytes.
582 *
583 * @param data
584 * the data to unconvert
585 * @param zip
586 * TRUE to also uncompress the data from a GZIP format; take care
587 * about this flag, as it could easily cause errors in the
588 * returned content or an {@link IOException}
589 *
590 * @return the raw data represented by the given Base64 {@link String},
591 * optionally compressed with GZIP
592 *
593 * @throws IOException
594 * in case of I/O errors
595 */
596 public static byte[] unbase64(String data, boolean zip) throws IOException {
597 return Base64.decode(data, zip ? Base64.GZIP : Base64.NO_OPTIONS);
598 }
599
600 /**
601 * Unonvert the given data from Base64 format back to a {@link String}.
602 *
603 * @param data
604 * the data to unconvert
605 * @param zip
606 * TRUE to also uncompress the data from a GZIP format; take care
607 * about this flag, as it could easily cause errors in the
608 * returned content or an {@link IOException}
609 *
610 * @return the {@link String} represented by the given Base64 {@link String}
611 * , optionally compressed with GZIP
612 *
613 * @throws IOException
614 * in case of I/O errors
615 */
616 public static String unbase64s(String data, boolean zip) throws IOException {
617 ByteArrayInputStream in = new ByteArrayInputStream(unbase64(data, zip));
618
619 Scanner scan = new Scanner(in);
620 scan.useDelimiter("\\A");
621 try {
622 return scan.next();
623 } finally {
624 scan.close();
625 }
626 }
627
628 /**
629 * The "remove accents" pattern.
630 *
631 * @return the pattern, or NULL if a problem happens
632 */
633 private static Pattern getMarks() {
634 try {
635 return Pattern
636 .compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
637 } catch (Exception e) {
638 // Can fail on Android...
639 return null;
640 }
641 }
642
643 //
644 // justify List<String> related:
645 //
646
647 /**
648 * Check if this line ends as a complete line (ends with a "." or similar).
649 * <p>
650 * Note that we consider an empty line as full, and a line ending with
651 * spaces as not complete.
652 *
653 * @param line
654 * the line to check
655 *
656 * @return TRUE if it does
657 */
658 static private boolean isFullLine(StringBuilder line) {
659 if (line.length() == 0) {
660 return true;
661 }
662
663 char lastCar = line.charAt(line.length() - 1);
664 switch (lastCar) {
665 case '.': // points
666 case '?':
667 case '!':
668
669 case '\'': // quotes
670 case '‘':
671 case '’':
672
673 case '"': // double quotes
674 case '”':
675 case '“':
676 case '»':
677 case '«':
678 return true;
679 default:
680 return false;
681 }
682 }
683
684 /**
685 * Check if this line represent an item in a list or description (i.e.,
686 * check that the first non-space char is "-").
687 *
688 * @param line
689 * the line to check
690 *
691 * @return TRUE if it is
692 */
693 static private boolean isItemLine(String line) {
694 String spacing = getItemSpacing(line);
695 return spacing != null && !spacing.isEmpty()
696 && line.charAt(spacing.length()) == '-';
697 }
698
699 /**
700 * Return all the spaces that start this line (or Empty if none).
701 *
702 * @param line
703 * the line to get the starting spaces from
704 *
705 * @return the left spacing
706 */
707 static private String getItemSpacing(String line) {
708 int i;
709 for (i = 0; i < line.length(); i++) {
710 if (line.charAt(i) != ' ') {
711 return line.substring(0, i);
712 }
713 }
714
715 return "";
716 }
717
718 /**
719 * This line is an horizontal spacer line.
720 *
721 * @param line
722 * the line to test
723 *
724 * @return TRUE if it is
725 */
726 static private boolean isHrLine(CharSequence line) {
727 int count = 0;
728 if (line != null) {
729 for (int i = 0; i < line.length(); i++) {
730 char car = line.charAt(i);
731 if (car == ' ' || car == '\t' || car == '*' || car == '-'
732 || car == '_' || car == '~' || car == '=' || car == '/'
733 || car == '\\') {
734 count++;
735 } else {
736 return false;
737 }
738 }
739 }
740
741 return count > 2;
742 }
743 }