Version 4.4.4
[nikiroo-utils.git] / src / be / nikiroo / utils / StringUtils.java
CommitLineData
ec1f3444
NR
1package be.nikiroo.utils;
2
ec1f3444 3import java.io.ByteArrayInputStream;
ec1f3444 4import java.io.IOException;
3f8349b7 5import java.io.UnsupportedEncodingException;
ec1f3444
NR
6import java.security.MessageDigest;
7import java.security.NoSuchAlgorithmException;
8import java.text.Normalizer;
9import java.text.Normalizer.Form;
10import java.text.ParseException;
11import java.text.SimpleDateFormat;
dc22eb95
NR
12import java.util.AbstractMap;
13import java.util.ArrayList;
ec1f3444 14import java.util.Date;
cc3e7291 15import java.util.List;
dc22eb95 16import java.util.Map.Entry;
db31c358 17import java.util.Scanner;
ec1f3444
NR
18import java.util.regex.Pattern;
19
ec1f3444
NR
20import org.unbescape.html.HtmlEscape;
21import org.unbescape.html.HtmlEscapeLevel;
22import org.unbescape.html.HtmlEscapeType;
23
24/**
25 * This class offer some utilities based around {@link String}s.
26 *
27 * @author niki
28 */
29public class StringUtils {
30 /**
31 * This enum type will decide the alignment of a {@link String} when padding
cc3e7291
NR
32 * or justification is applied (if there is enough horizontal space for it
33 * to be aligned).
ec1f3444
NR
34 */
35 public enum Alignment {
36 /** Aligned at left. */
cc3e7291 37 LEFT,
ec1f3444 38 /** Centered. */
cc3e7291 39 CENTER,
ec1f3444 40 /** Aligned at right. */
cc3e7291
NR
41 RIGHT,
42 /** Full justified (to both left and right). */
43 JUSTIFY,
44
45 // Old Deprecated values:
46
47 /** DEPRECATED: please use LEFT. */
48 @Deprecated
49 Beginning,
50 /** DEPRECATED: please use CENTER. */
51 @Deprecated
52 Center,
53 /** DEPRECATED: please use RIGHT. */
54 @Deprecated
55 End;
56
57 /**
58 * Return the non-deprecated version of this enum if needed (or return
59 * self if not).
60 *
61 * @return the non-deprecated value
62 */
63 Alignment undeprecate() {
64 if (this == Beginning)
65 return LEFT;
66 if (this == Center)
67 return CENTER;
68 if (this == End)
69 return RIGHT;
70 return this;
71 }
ec1f3444
NR
72 }
73
e8aa5bf9 74 static private Pattern marks = getMarks();
ec1f3444
NR
75
76 /**
77 * Fix the size of the given {@link String} either with space-padding or by
78 * shortening it.
79 *
80 * @param text
81 * the {@link String} to fix
82 * @param width
83 * the size of the resulting {@link String} or -1 for a noop
84 *
85 * @return the resulting {@link String} of size <i>size</i>
86 */
87 static public String padString(String text, int width) {
451f434b 88 return padString(text, width, true, null);
ec1f3444
NR
89 }
90
91 /**
92 * Fix the size of the given {@link String} either with space-padding or by
93 * optionally shortening it.
94 *
95 * @param text
96 * the {@link String} to fix
97 * @param width
98 * the size of the resulting {@link String} if the text fits or
99 * if cut is TRUE or -1 for a noop
100 * @param cut
101 * cut the {@link String} shorter if needed
102 * @param align
103 * align the {@link String} in this position if we have enough
451f434b 104 * space (default is Alignment.Beginning)
ec1f3444
NR
105 *
106 * @return the resulting {@link String} of size <i>size</i> minimum
107 */
108 static public String padString(String text, int width, boolean cut,
109 Alignment align) {
110
451f434b 111 if (align == null) {
cc3e7291 112 align = Alignment.LEFT;
451f434b
NR
113 }
114
cc3e7291
NR
115 align = align.undeprecate();
116
ec1f3444
NR
117 if (width >= 0) {
118 if (text == null)
119 text = "";
120
121 int diff = width - text.length();
122
123 if (diff < 0) {
124 if (cut)
125 text = text.substring(0, width);
126 } else if (diff > 0) {
cc3e7291
NR
127 if (diff < 2 && align != Alignment.RIGHT)
128 align = Alignment.LEFT;
ec1f3444
NR
129
130 switch (align) {
cc3e7291 131 case RIGHT:
ec1f3444
NR
132 text = new String(new char[diff]).replace('\0', ' ') + text;
133 break;
cc3e7291 134 case CENTER:
ec1f3444
NR
135 int pad1 = (diff) / 2;
136 int pad2 = (diff + 1) / 2;
137 text = new String(new char[pad1]).replace('\0', ' ') + text
138 + new String(new char[pad2]).replace('\0', ' ');
139 break;
cc3e7291
NR
140 case LEFT:
141 default:
142 text = text + new String(new char[diff]).replace('\0', ' ');
143 break;
ec1f3444
NR
144 }
145 }
146 }
147
148 return text;
149 }
150
cc3e7291
NR
151 /**
152 * Justify a text into width-sized (at the maximum) lines.
153 *
154 * @param text
155 * the {@link String} to justify
156 * @param width
157 * the maximum size of the resulting lines
158 *
159 * @return a list of justified text lines
160 */
161 static public List<String> justifyText(String text, int width) {
162 return justifyText(text, width, null);
163 }
164
165 /**
166 * Justify a text into width-sized (at the maximum) lines.
167 *
168 * @param text
169 * the {@link String} to justify
170 * @param width
171 * the maximum size of the resulting lines
172 * @param align
173 * align the lines in this position (default is
174 * Alignment.Beginning)
175 *
176 * @return a list of justified text lines
177 */
178 static public List<String> justifyText(String text, int width,
179 Alignment align) {
180 if (align == null) {
181 align = Alignment.LEFT;
182 }
183
184 align = align.undeprecate();
185
186 switch (align) {
187 case CENTER:
188 return StringJustifier.center(text, width);
189 case RIGHT:
190 return StringJustifier.right(text, width);
191 case JUSTIFY:
192 return StringJustifier.full(text, width);
193 case LEFT:
194 default:
195 return StringJustifier.left(text, width);
196 }
197 }
198
dc22eb95
NR
199 /**
200 * Justify a text into width-sized (at the maximum) lines.
201 *
202 * @param text
203 * the {@link String} to justify
204 * @param width
205 * the maximum size of the resulting lines
206 *
207 * @return a list of justified text lines
208 */
209 static public List<String> justifyText(List<String> text, int width) {
210 return justifyText(text, width, null);
211 }
212
213 /**
214 * Justify a text into width-sized (at the maximum) lines.
215 *
216 * @param text
217 * the {@link String} to justify
218 * @param width
219 * the maximum size of the resulting lines
220 * @param align
221 * align the lines in this position (default is
222 * Alignment.Beginning)
223 *
224 * @return a list of justified text lines
225 */
226 static public List<String> justifyText(List<String> text, int width,
227 Alignment align) {
228 List<String> result = new ArrayList<String>();
229
230 // Content <-> Bullet spacing (null = no spacing)
231 List<Entry<String, String>> lines = new ArrayList<Entry<String, String>>();
232 StringBuilder previous = null;
233 StringBuilder tmp = new StringBuilder();
234 String previousItemBulletSpacing = null;
235 String itemBulletSpacing = null;
236 for (String inputLine : text) {
237 boolean previousLineComplete = true;
238
239 String current = inputLine.replace("\t", " ");
240 itemBulletSpacing = getItemSpacing(current);
241 boolean bullet = isItemLine(current);
242 if ((previousItemBulletSpacing == null || itemBulletSpacing
243 .length() <= previousItemBulletSpacing.length()) && !bullet) {
244 itemBulletSpacing = null;
245 }
246
247 if (itemBulletSpacing != null) {
248 current = current.trim();
249 if (!current.isEmpty() && bullet) {
250 current = current.substring(1);
251 }
252 current = current.trim();
253 previousLineComplete = bullet;
254 } else {
255 tmp.setLength(0);
256 for (String word : current.split(" ")) {
257 if (word.isEmpty()) {
258 continue;
259 }
260
261 if (tmp.length() > 0) {
262 tmp.append(' ');
263 }
264 tmp.append(word.trim());
265 }
266 current = tmp.toString();
267
268 previousLineComplete = current.isEmpty()
269 || previousItemBulletSpacing != null
c0c091af
NR
270 || (previous != null && isFullLine(previous))
271 || isHrLine(current) || isHrLine(previous);
dc22eb95
NR
272 }
273
274 if (previous == null) {
275 previous = new StringBuilder();
276 } else {
277 if (previousLineComplete) {
278 lines.add(new AbstractMap.SimpleEntry<String, String>(
279 previous.toString(), previousItemBulletSpacing));
280 previous.setLength(0);
281 previousItemBulletSpacing = itemBulletSpacing;
282 } else {
283 previous.append(' ');
284 }
285 }
286
287 previous.append(current);
288
289 }
290
291 if (previous != null) {
292 lines.add(new AbstractMap.SimpleEntry<String, String>(previous
293 .toString(), previousItemBulletSpacing));
294 }
295
296 for (Entry<String, String> line : lines) {
297 String content = line.getKey();
298 String spacing = line.getValue();
299
300 String bullet = "- ";
301 if (spacing == null) {
302 bullet = "";
303 spacing = "";
304 }
305
306 if (spacing.length() > width + 3) {
307 spacing = "";
308 }
309
310 for (String subline : StringUtils.justifyText(content, width
311 - (spacing.length() + bullet.length()), align)) {
312 result.add(spacing + bullet + subline);
313 if (!bullet.isEmpty()) {
314 bullet = " ";
315 }
316 }
317 }
318
319 return result;
320 }
321
ec1f3444
NR
322 /**
323 * Sanitise the given input to make it more Terminal-friendly by removing
324 * combining characters.
325 *
326 * @param input
327 * the input to sanitise
328 * @param allowUnicode
329 * allow Unicode or only allow ASCII Latin characters
330 *
331 * @return the sanitised {@link String}
332 */
333 static public String sanitize(String input, boolean allowUnicode) {
334 return sanitize(input, allowUnicode, !allowUnicode);
335 }
336
337 /**
338 * Sanitise the given input to make it more Terminal-friendly by removing
339 * combining characters.
340 *
341 * @param input
342 * the input to sanitise
343 * @param allowUnicode
344 * allow Unicode or only allow ASCII Latin characters
345 * @param removeAllAccents
346 * TRUE to replace all accentuated characters by their non
347 * accentuated counter-parts
348 *
349 * @return the sanitised {@link String}
350 */
351 static public String sanitize(String input, boolean allowUnicode,
352 boolean removeAllAccents) {
353
354 if (removeAllAccents) {
355 input = Normalizer.normalize(input, Form.NFKD);
e8aa5bf9
NR
356 if (marks != null) {
357 input = marks.matcher(input).replaceAll("");
358 }
ec1f3444
NR
359 }
360
361 input = Normalizer.normalize(input, Form.NFKC);
362
363 if (!allowUnicode) {
364 StringBuilder builder = new StringBuilder();
365 for (int index = 0; index < input.length(); index++) {
366 char car = input.charAt(index);
367 // displayable chars in ASCII are in the range 32<->255,
368 // except DEL (127)
369 if (car >= 32 && car <= 255 && car != 127) {
370 builder.append(car);
371 }
372 }
373 input = builder.toString();
374 }
375
376 return input;
377 }
378
379 /**
451f434b
NR
380 * Convert between the time in milliseconds to a {@link String} in a "fixed"
381 * way (to exchange data over the wire, for instance).
382 * <p>
383 * Precise to the second.
ec1f3444
NR
384 *
385 * @param time
451f434b
NR
386 * the specified number of milliseconds since the standard base
387 * time known as "the epoch", namely January 1, 1970, 00:00:00
388 * GMT
ec1f3444
NR
389 *
390 * @return the time as a {@link String}
391 */
392 static public String fromTime(long time) {
393 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
394 return sdf.format(new Date(time));
395 }
396
397 /**
451f434b 398 * Convert between the time as a {@link String} to milliseconds in a "fixed"
ec1f3444 399 * way (to exchange data over the wire, for instance).
451f434b
NR
400 * <p>
401 * Precise to the second.
ec1f3444 402 *
db31c358 403 * @param displayTime
ec1f3444
NR
404 * the time as a {@link String}
405 *
451f434b 406 * @return the number of milliseconds since the standard base time known as
e8aa5bf9
NR
407 * "the epoch", namely January 1, 1970, 00:00:00 GMT, or -1 in case
408 * of error
409 *
410 * @throws ParseException
411 * in case of parse error
ec1f3444 412 */
e8aa5bf9 413 static public long toTime(String displayTime) throws ParseException {
ec1f3444 414 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
e8aa5bf9 415 return sdf.parse(displayTime).getTime();
ec1f3444
NR
416 }
417
ec1f3444
NR
418 /**
419 * Return a hash of the given {@link String}.
420 *
421 * @param input
422 * the input data
423 *
424 * @return the hash
425 */
b771aed5 426 static public String getMd5Hash(String input) {
ec1f3444
NR
427 try {
428 MessageDigest md = MessageDigest.getInstance("MD5");
3f8349b7 429 md.update(input.getBytes("UTF-8"));
ec1f3444
NR
430 byte byteData[] = md.digest();
431
432 StringBuffer hexString = new StringBuffer();
433 for (int i = 0; i < byteData.length; i++) {
434 String hex = Integer.toHexString(0xff & byteData[i]);
435 if (hex.length() == 1)
436 hexString.append('0');
437 hexString.append(hex);
438 }
439
440 return hexString.toString();
441 } catch (NoSuchAlgorithmException e) {
442 return input;
3f8349b7
NR
443 } catch (UnsupportedEncodingException e) {
444 return input;
ec1f3444
NR
445 }
446 }
447
ec1f3444
NR
448 /**
449 * Remove the HTML content from the given input, and un-html-ize the rest.
450 *
451 * @param html
452 * the HTML-encoded content
453 *
454 * @return the HTML-free equivalent content
455 */
456 public static String unhtml(String html) {
457 StringBuilder builder = new StringBuilder();
458
459 int inTag = 0;
460 for (char car : html.toCharArray()) {
461 if (car == '<') {
462 inTag++;
463 } else if (car == '>') {
464 inTag--;
465 } else if (inTag <= 0) {
466 builder.append(car);
467 }
468 }
469
7ee9568b
NR
470 char nbsp = ' '; // non-breakable space (a special char)
471 char space = ' ';
472 return HtmlEscape.unescapeHtml(builder.toString()).replace(nbsp, space);
ec1f3444
NR
473 }
474
475 /**
476 * Escape the given {@link String} so it can be used in XML, as content.
477 *
478 * @param input
479 * the input {@link String}
480 *
481 * @return the escaped {@link String}
482 */
483 public static String xmlEscape(String input) {
484 if (input == null) {
485 return "";
486 }
487
488 return HtmlEscape.escapeHtml(input,
489 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
490 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
491 }
492
493 /**
494 * Escape the given {@link String} so it can be used in XML, as text content
495 * inside double-quotes.
496 *
497 * @param input
498 * the input {@link String}
499 *
500 * @return the escaped {@link String}
501 */
502 public static String xmlEscapeQuote(String input) {
503 if (input == null) {
504 return "";
505 }
506
507 return HtmlEscape.escapeHtml(input,
508 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
509 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
510 }
db31c358 511
80500544
NR
512 /**
513 * Zip the data and then encode it into Base64.
514 *
515 * @param data
516 * the data
517 *
518 * @return the Base64 zipped version
519 */
db31c358
NR
520 public static String zip64(String data) {
521 try {
522 return Base64.encodeBytes(data.getBytes(), Base64.GZIP);
523 } catch (IOException e) {
524 e.printStackTrace();
525 return null;
526 }
527 }
528
80500544
NR
529 /**
530 * Unconvert from Base64 then unzip the content.
531 *
532 * @param data
533 * the data in Base64 format
534 *
535 * @return the raw data
536 *
537 * @throws IOException
538 * in case of I/O error
539 */
db31c358
NR
540 public static String unzip64(String data) throws IOException {
541 ByteArrayInputStream in = new ByteArrayInputStream(Base64.decode(data,
542 Base64.GZIP));
543
544 Scanner scan = new Scanner(in);
545 scan.useDelimiter("\\A");
546 try {
547 return scan.next();
548 } finally {
549 scan.close();
550 }
551 }
e8aa5bf9
NR
552
553 /**
554 * The "remove accents" pattern.
555 *
556 * @return the pattern, or NULL if a problem happens
557 */
558 private static Pattern getMarks() {
559 try {
560 return Pattern
561 .compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
562 } catch (Exception e) {
563 // Can fail on Android...
564 return null;
565 }
566 }
dc22eb95
NR
567
568 // justify List<String> related:
569
570 static private boolean isFullLine(StringBuilder line) {
571 return line.length() == 0 //
572 || line.charAt(line.length() - 1) == '.'
573 || line.charAt(line.length() - 1) == '"'
574 || line.charAt(line.length() - 1) == '»';
575 }
576
577 static private boolean isItemLine(String line) {
578 String spacing = getItemSpacing(line);
c0c091af
NR
579 return spacing != null && !spacing.isEmpty()
580 && line.charAt(spacing.length()) == '-';
dc22eb95
NR
581 }
582
583 static private String getItemSpacing(String line) {
584 int i;
585 for (i = 0; i < line.length(); i++) {
586 if (line.charAt(i) != ' ') {
587 return line.substring(0, i);
588 }
589 }
590
591 return "";
592 }
c0c091af
NR
593
594 static private boolean isHrLine(CharSequence line) {
595 int count = 0;
596 if (line != null) {
597 for (int i = 0; i < line.length(); i++) {
598 char car = line.charAt(i);
599 if (car == ' ' || car == '\t' || car == '*' || car == '-'
600 || car == '_' || car == '~' || car == '=' || car == '/'
601 || car == '\\') {
602 count++;
603 } else {
604 return false;
605 }
606 }
607 }
608
609 return count > 2;
610 }
ec1f3444 611}