Commit | Line | Data |
---|---|---|
ec1f3444 NR |
1 | package be.nikiroo.utils; |
2 | ||
ec1f3444 | 3 | import java.io.ByteArrayInputStream; |
ec1f3444 | 4 | import java.io.IOException; |
3f8349b7 | 5 | import java.io.UnsupportedEncodingException; |
ec1f3444 NR |
6 | import java.security.MessageDigest; |
7 | import java.security.NoSuchAlgorithmException; | |
8 | import java.text.Normalizer; | |
9 | import java.text.Normalizer.Form; | |
10 | import java.text.ParseException; | |
11 | import java.text.SimpleDateFormat; | |
dc22eb95 NR |
12 | import java.util.AbstractMap; |
13 | import java.util.ArrayList; | |
ec1f3444 | 14 | import java.util.Date; |
cc3e7291 | 15 | import java.util.List; |
dc22eb95 | 16 | import java.util.Map.Entry; |
db31c358 | 17 | import java.util.Scanner; |
ec1f3444 NR |
18 | import java.util.regex.Pattern; |
19 | ||
ec1f3444 NR |
20 | import org.unbescape.html.HtmlEscape; |
21 | import org.unbescape.html.HtmlEscapeLevel; | |
22 | import org.unbescape.html.HtmlEscapeType; | |
23 | ||
24 | /** | |
25 | * This class offer some utilities based around {@link String}s. | |
26 | * | |
27 | * @author niki | |
28 | */ | |
29 | public class StringUtils { | |
30 | /** | |
31 | * This enum type will decide the alignment of a {@link String} when padding | |
cc3e7291 NR |
32 | * or justification is applied (if there is enough horizontal space for it |
33 | * to be aligned). | |
ec1f3444 NR |
34 | */ |
35 | public enum Alignment { | |
36 | /** Aligned at left. */ | |
cc3e7291 | 37 | LEFT, |
ec1f3444 | 38 | /** Centered. */ |
cc3e7291 | 39 | CENTER, |
ec1f3444 | 40 | /** Aligned at right. */ |
cc3e7291 NR |
41 | RIGHT, |
42 | /** Full justified (to both left and right). */ | |
43 | JUSTIFY, | |
44 | ||
45 | // Old Deprecated values: | |
46 | ||
47 | /** DEPRECATED: please use LEFT. */ | |
48 | @Deprecated | |
49 | Beginning, | |
50 | /** DEPRECATED: please use CENTER. */ | |
51 | @Deprecated | |
52 | Center, | |
53 | /** DEPRECATED: please use RIGHT. */ | |
54 | @Deprecated | |
55 | End; | |
56 | ||
57 | /** | |
58 | * Return the non-deprecated version of this enum if needed (or return | |
59 | * self if not). | |
60 | * | |
61 | * @return the non-deprecated value | |
62 | */ | |
63 | Alignment undeprecate() { | |
64 | if (this == Beginning) | |
65 | return LEFT; | |
66 | if (this == Center) | |
67 | return CENTER; | |
68 | if (this == End) | |
69 | return RIGHT; | |
70 | return this; | |
71 | } | |
ec1f3444 NR |
72 | } |
73 | ||
e8aa5bf9 | 74 | static private Pattern marks = getMarks(); |
ec1f3444 NR |
75 | |
76 | /** | |
77 | * Fix the size of the given {@link String} either with space-padding or by | |
78 | * shortening it. | |
79 | * | |
80 | * @param text | |
81 | * the {@link String} to fix | |
82 | * @param width | |
83 | * the size of the resulting {@link String} or -1 for a noop | |
84 | * | |
85 | * @return the resulting {@link String} of size <i>size</i> | |
86 | */ | |
87 | static public String padString(String text, int width) { | |
451f434b | 88 | return padString(text, width, true, null); |
ec1f3444 NR |
89 | } |
90 | ||
91 | /** | |
92 | * Fix the size of the given {@link String} either with space-padding or by | |
93 | * optionally shortening it. | |
94 | * | |
95 | * @param text | |
96 | * the {@link String} to fix | |
97 | * @param width | |
98 | * the size of the resulting {@link String} if the text fits or | |
99 | * if cut is TRUE or -1 for a noop | |
100 | * @param cut | |
101 | * cut the {@link String} shorter if needed | |
102 | * @param align | |
103 | * align the {@link String} in this position if we have enough | |
451f434b | 104 | * space (default is Alignment.Beginning) |
ec1f3444 NR |
105 | * |
106 | * @return the resulting {@link String} of size <i>size</i> minimum | |
107 | */ | |
108 | static public String padString(String text, int width, boolean cut, | |
109 | Alignment align) { | |
110 | ||
451f434b | 111 | if (align == null) { |
cc3e7291 | 112 | align = Alignment.LEFT; |
451f434b NR |
113 | } |
114 | ||
cc3e7291 NR |
115 | align = align.undeprecate(); |
116 | ||
ec1f3444 NR |
117 | if (width >= 0) { |
118 | if (text == null) | |
119 | text = ""; | |
120 | ||
121 | int diff = width - text.length(); | |
122 | ||
123 | if (diff < 0) { | |
124 | if (cut) | |
125 | text = text.substring(0, width); | |
126 | } else if (diff > 0) { | |
cc3e7291 NR |
127 | if (diff < 2 && align != Alignment.RIGHT) |
128 | align = Alignment.LEFT; | |
ec1f3444 NR |
129 | |
130 | switch (align) { | |
cc3e7291 | 131 | case RIGHT: |
ec1f3444 NR |
132 | text = new String(new char[diff]).replace('\0', ' ') + text; |
133 | break; | |
cc3e7291 | 134 | case CENTER: |
ec1f3444 NR |
135 | int pad1 = (diff) / 2; |
136 | int pad2 = (diff + 1) / 2; | |
137 | text = new String(new char[pad1]).replace('\0', ' ') + text | |
138 | + new String(new char[pad2]).replace('\0', ' '); | |
139 | break; | |
cc3e7291 NR |
140 | case LEFT: |
141 | default: | |
142 | text = text + new String(new char[diff]).replace('\0', ' '); | |
143 | break; | |
ec1f3444 NR |
144 | } |
145 | } | |
146 | } | |
147 | ||
148 | return text; | |
149 | } | |
150 | ||
cc3e7291 NR |
151 | /** |
152 | * Justify a text into width-sized (at the maximum) lines. | |
153 | * | |
154 | * @param text | |
155 | * the {@link String} to justify | |
156 | * @param width | |
157 | * the maximum size of the resulting lines | |
158 | * | |
159 | * @return a list of justified text lines | |
160 | */ | |
161 | static public List<String> justifyText(String text, int width) { | |
162 | return justifyText(text, width, null); | |
163 | } | |
164 | ||
165 | /** | |
166 | * Justify a text into width-sized (at the maximum) lines. | |
167 | * | |
168 | * @param text | |
169 | * the {@link String} to justify | |
170 | * @param width | |
171 | * the maximum size of the resulting lines | |
172 | * @param align | |
173 | * align the lines in this position (default is | |
174 | * Alignment.Beginning) | |
175 | * | |
176 | * @return a list of justified text lines | |
177 | */ | |
178 | static public List<String> justifyText(String text, int width, | |
179 | Alignment align) { | |
180 | if (align == null) { | |
181 | align = Alignment.LEFT; | |
182 | } | |
183 | ||
184 | align = align.undeprecate(); | |
185 | ||
186 | switch (align) { | |
187 | case CENTER: | |
188 | return StringJustifier.center(text, width); | |
189 | case RIGHT: | |
190 | return StringJustifier.right(text, width); | |
191 | case JUSTIFY: | |
192 | return StringJustifier.full(text, width); | |
193 | case LEFT: | |
194 | default: | |
195 | return StringJustifier.left(text, width); | |
196 | } | |
197 | } | |
198 | ||
dc22eb95 NR |
199 | /** |
200 | * Justify a text into width-sized (at the maximum) lines. | |
201 | * | |
202 | * @param text | |
203 | * the {@link String} to justify | |
204 | * @param width | |
205 | * the maximum size of the resulting lines | |
206 | * | |
207 | * @return a list of justified text lines | |
208 | */ | |
209 | static public List<String> justifyText(List<String> text, int width) { | |
210 | return justifyText(text, width, null); | |
211 | } | |
212 | ||
213 | /** | |
214 | * Justify a text into width-sized (at the maximum) lines. | |
215 | * | |
216 | * @param text | |
217 | * the {@link String} to justify | |
218 | * @param width | |
219 | * the maximum size of the resulting lines | |
220 | * @param align | |
221 | * align the lines in this position (default is | |
222 | * Alignment.Beginning) | |
223 | * | |
224 | * @return a list of justified text lines | |
225 | */ | |
226 | static public List<String> justifyText(List<String> text, int width, | |
227 | Alignment align) { | |
228 | List<String> result = new ArrayList<String>(); | |
229 | ||
230 | // Content <-> Bullet spacing (null = no spacing) | |
231 | List<Entry<String, String>> lines = new ArrayList<Entry<String, String>>(); | |
232 | StringBuilder previous = null; | |
233 | StringBuilder tmp = new StringBuilder(); | |
234 | String previousItemBulletSpacing = null; | |
235 | String itemBulletSpacing = null; | |
236 | for (String inputLine : text) { | |
237 | boolean previousLineComplete = true; | |
238 | ||
239 | String current = inputLine.replace("\t", " "); | |
240 | itemBulletSpacing = getItemSpacing(current); | |
241 | boolean bullet = isItemLine(current); | |
242 | if ((previousItemBulletSpacing == null || itemBulletSpacing | |
243 | .length() <= previousItemBulletSpacing.length()) && !bullet) { | |
244 | itemBulletSpacing = null; | |
245 | } | |
246 | ||
247 | if (itemBulletSpacing != null) { | |
248 | current = current.trim(); | |
249 | if (!current.isEmpty() && bullet) { | |
250 | current = current.substring(1); | |
251 | } | |
252 | current = current.trim(); | |
253 | previousLineComplete = bullet; | |
254 | } else { | |
255 | tmp.setLength(0); | |
256 | for (String word : current.split(" ")) { | |
257 | if (word.isEmpty()) { | |
258 | continue; | |
259 | } | |
260 | ||
261 | if (tmp.length() > 0) { | |
262 | tmp.append(' '); | |
263 | } | |
264 | tmp.append(word.trim()); | |
265 | } | |
266 | current = tmp.toString(); | |
267 | ||
268 | previousLineComplete = current.isEmpty() | |
269 | || previousItemBulletSpacing != null | |
c0c091af NR |
270 | || (previous != null && isFullLine(previous)) |
271 | || isHrLine(current) || isHrLine(previous); | |
dc22eb95 NR |
272 | } |
273 | ||
274 | if (previous == null) { | |
275 | previous = new StringBuilder(); | |
276 | } else { | |
277 | if (previousLineComplete) { | |
278 | lines.add(new AbstractMap.SimpleEntry<String, String>( | |
279 | previous.toString(), previousItemBulletSpacing)); | |
280 | previous.setLength(0); | |
281 | previousItemBulletSpacing = itemBulletSpacing; | |
282 | } else { | |
283 | previous.append(' '); | |
284 | } | |
285 | } | |
286 | ||
287 | previous.append(current); | |
288 | ||
289 | } | |
290 | ||
291 | if (previous != null) { | |
292 | lines.add(new AbstractMap.SimpleEntry<String, String>(previous | |
293 | .toString(), previousItemBulletSpacing)); | |
294 | } | |
295 | ||
296 | for (Entry<String, String> line : lines) { | |
297 | String content = line.getKey(); | |
298 | String spacing = line.getValue(); | |
299 | ||
300 | String bullet = "- "; | |
301 | if (spacing == null) { | |
302 | bullet = ""; | |
303 | spacing = ""; | |
304 | } | |
305 | ||
306 | if (spacing.length() > width + 3) { | |
307 | spacing = ""; | |
308 | } | |
309 | ||
310 | for (String subline : StringUtils.justifyText(content, width | |
311 | - (spacing.length() + bullet.length()), align)) { | |
312 | result.add(spacing + bullet + subline); | |
313 | if (!bullet.isEmpty()) { | |
314 | bullet = " "; | |
315 | } | |
316 | } | |
317 | } | |
318 | ||
319 | return result; | |
320 | } | |
321 | ||
ec1f3444 NR |
322 | /** |
323 | * Sanitise the given input to make it more Terminal-friendly by removing | |
324 | * combining characters. | |
325 | * | |
326 | * @param input | |
327 | * the input to sanitise | |
328 | * @param allowUnicode | |
329 | * allow Unicode or only allow ASCII Latin characters | |
330 | * | |
331 | * @return the sanitised {@link String} | |
332 | */ | |
333 | static public String sanitize(String input, boolean allowUnicode) { | |
334 | return sanitize(input, allowUnicode, !allowUnicode); | |
335 | } | |
336 | ||
337 | /** | |
338 | * Sanitise the given input to make it more Terminal-friendly by removing | |
339 | * combining characters. | |
340 | * | |
341 | * @param input | |
342 | * the input to sanitise | |
343 | * @param allowUnicode | |
344 | * allow Unicode or only allow ASCII Latin characters | |
345 | * @param removeAllAccents | |
346 | * TRUE to replace all accentuated characters by their non | |
347 | * accentuated counter-parts | |
348 | * | |
349 | * @return the sanitised {@link String} | |
350 | */ | |
351 | static public String sanitize(String input, boolean allowUnicode, | |
352 | boolean removeAllAccents) { | |
353 | ||
354 | if (removeAllAccents) { | |
355 | input = Normalizer.normalize(input, Form.NFKD); | |
e8aa5bf9 NR |
356 | if (marks != null) { |
357 | input = marks.matcher(input).replaceAll(""); | |
358 | } | |
ec1f3444 NR |
359 | } |
360 | ||
361 | input = Normalizer.normalize(input, Form.NFKC); | |
362 | ||
363 | if (!allowUnicode) { | |
364 | StringBuilder builder = new StringBuilder(); | |
365 | for (int index = 0; index < input.length(); index++) { | |
366 | char car = input.charAt(index); | |
367 | // displayable chars in ASCII are in the range 32<->255, | |
368 | // except DEL (127) | |
369 | if (car >= 32 && car <= 255 && car != 127) { | |
370 | builder.append(car); | |
371 | } | |
372 | } | |
373 | input = builder.toString(); | |
374 | } | |
375 | ||
376 | return input; | |
377 | } | |
378 | ||
379 | /** | |
451f434b NR |
380 | * Convert between the time in milliseconds to a {@link String} in a "fixed" |
381 | * way (to exchange data over the wire, for instance). | |
382 | * <p> | |
383 | * Precise to the second. | |
ec1f3444 NR |
384 | * |
385 | * @param time | |
451f434b NR |
386 | * the specified number of milliseconds since the standard base |
387 | * time known as "the epoch", namely January 1, 1970, 00:00:00 | |
388 | * GMT | |
ec1f3444 NR |
389 | * |
390 | * @return the time as a {@link String} | |
391 | */ | |
392 | static public String fromTime(long time) { | |
393 | SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); | |
394 | return sdf.format(new Date(time)); | |
395 | } | |
396 | ||
397 | /** | |
451f434b | 398 | * Convert between the time as a {@link String} to milliseconds in a "fixed" |
ec1f3444 | 399 | * way (to exchange data over the wire, for instance). |
451f434b NR |
400 | * <p> |
401 | * Precise to the second. | |
ec1f3444 | 402 | * |
db31c358 | 403 | * @param displayTime |
ec1f3444 NR |
404 | * the time as a {@link String} |
405 | * | |
451f434b | 406 | * @return the number of milliseconds since the standard base time known as |
e8aa5bf9 NR |
407 | * "the epoch", namely January 1, 1970, 00:00:00 GMT, or -1 in case |
408 | * of error | |
409 | * | |
410 | * @throws ParseException | |
411 | * in case of parse error | |
ec1f3444 | 412 | */ |
e8aa5bf9 | 413 | static public long toTime(String displayTime) throws ParseException { |
ec1f3444 | 414 | SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); |
e8aa5bf9 | 415 | return sdf.parse(displayTime).getTime(); |
ec1f3444 NR |
416 | } |
417 | ||
ec1f3444 NR |
418 | /** |
419 | * Return a hash of the given {@link String}. | |
420 | * | |
421 | * @param input | |
422 | * the input data | |
423 | * | |
424 | * @return the hash | |
425 | */ | |
b771aed5 | 426 | static public String getMd5Hash(String input) { |
ec1f3444 NR |
427 | try { |
428 | MessageDigest md = MessageDigest.getInstance("MD5"); | |
3f8349b7 | 429 | md.update(input.getBytes("UTF-8")); |
ec1f3444 NR |
430 | byte byteData[] = md.digest(); |
431 | ||
432 | StringBuffer hexString = new StringBuffer(); | |
433 | for (int i = 0; i < byteData.length; i++) { | |
434 | String hex = Integer.toHexString(0xff & byteData[i]); | |
435 | if (hex.length() == 1) | |
436 | hexString.append('0'); | |
437 | hexString.append(hex); | |
438 | } | |
439 | ||
440 | return hexString.toString(); | |
441 | } catch (NoSuchAlgorithmException e) { | |
442 | return input; | |
3f8349b7 NR |
443 | } catch (UnsupportedEncodingException e) { |
444 | return input; | |
ec1f3444 NR |
445 | } |
446 | } | |
447 | ||
ec1f3444 NR |
448 | /** |
449 | * Remove the HTML content from the given input, and un-html-ize the rest. | |
450 | * | |
451 | * @param html | |
452 | * the HTML-encoded content | |
453 | * | |
454 | * @return the HTML-free equivalent content | |
455 | */ | |
456 | public static String unhtml(String html) { | |
457 | StringBuilder builder = new StringBuilder(); | |
458 | ||
459 | int inTag = 0; | |
460 | for (char car : html.toCharArray()) { | |
461 | if (car == '<') { | |
462 | inTag++; | |
463 | } else if (car == '>') { | |
464 | inTag--; | |
465 | } else if (inTag <= 0) { | |
466 | builder.append(car); | |
467 | } | |
468 | } | |
469 | ||
7ee9568b NR |
470 | char nbsp = ' '; // non-breakable space (a special char) |
471 | char space = ' '; | |
472 | return HtmlEscape.unescapeHtml(builder.toString()).replace(nbsp, space); | |
ec1f3444 NR |
473 | } |
474 | ||
475 | /** | |
476 | * Escape the given {@link String} so it can be used in XML, as content. | |
477 | * | |
478 | * @param input | |
479 | * the input {@link String} | |
480 | * | |
481 | * @return the escaped {@link String} | |
482 | */ | |
483 | public static String xmlEscape(String input) { | |
484 | if (input == null) { | |
485 | return ""; | |
486 | } | |
487 | ||
488 | return HtmlEscape.escapeHtml(input, | |
489 | HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA, | |
490 | HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT); | |
491 | } | |
492 | ||
493 | /** | |
494 | * Escape the given {@link String} so it can be used in XML, as text content | |
495 | * inside double-quotes. | |
496 | * | |
497 | * @param input | |
498 | * the input {@link String} | |
499 | * | |
500 | * @return the escaped {@link String} | |
501 | */ | |
502 | public static String xmlEscapeQuote(String input) { | |
503 | if (input == null) { | |
504 | return ""; | |
505 | } | |
506 | ||
507 | return HtmlEscape.escapeHtml(input, | |
508 | HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA, | |
509 | HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT); | |
510 | } | |
db31c358 | 511 | |
80500544 NR |
512 | /** |
513 | * Zip the data and then encode it into Base64. | |
514 | * | |
515 | * @param data | |
516 | * the data | |
517 | * | |
518 | * @return the Base64 zipped version | |
519 | */ | |
db31c358 NR |
520 | public static String zip64(String data) { |
521 | try { | |
522 | return Base64.encodeBytes(data.getBytes(), Base64.GZIP); | |
523 | } catch (IOException e) { | |
524 | e.printStackTrace(); | |
525 | return null; | |
526 | } | |
527 | } | |
528 | ||
80500544 NR |
529 | /** |
530 | * Unconvert from Base64 then unzip the content. | |
531 | * | |
532 | * @param data | |
533 | * the data in Base64 format | |
534 | * | |
535 | * @return the raw data | |
536 | * | |
537 | * @throws IOException | |
538 | * in case of I/O error | |
539 | */ | |
db31c358 NR |
540 | public static String unzip64(String data) throws IOException { |
541 | ByteArrayInputStream in = new ByteArrayInputStream(Base64.decode(data, | |
542 | Base64.GZIP)); | |
543 | ||
544 | Scanner scan = new Scanner(in); | |
545 | scan.useDelimiter("\\A"); | |
546 | try { | |
547 | return scan.next(); | |
548 | } finally { | |
549 | scan.close(); | |
550 | } | |
551 | } | |
e8aa5bf9 NR |
552 | |
553 | /** | |
554 | * The "remove accents" pattern. | |
555 | * | |
556 | * @return the pattern, or NULL if a problem happens | |
557 | */ | |
558 | private static Pattern getMarks() { | |
559 | try { | |
560 | return Pattern | |
561 | .compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+"); | |
562 | } catch (Exception e) { | |
563 | // Can fail on Android... | |
564 | return null; | |
565 | } | |
566 | } | |
dc22eb95 NR |
567 | |
568 | // justify List<String> related: | |
569 | ||
570 | static private boolean isFullLine(StringBuilder line) { | |
571 | return line.length() == 0 // | |
572 | || line.charAt(line.length() - 1) == '.' | |
573 | || line.charAt(line.length() - 1) == '"' | |
574 | || line.charAt(line.length() - 1) == '»'; | |
575 | } | |
576 | ||
577 | static private boolean isItemLine(String line) { | |
578 | String spacing = getItemSpacing(line); | |
c0c091af NR |
579 | return spacing != null && !spacing.isEmpty() |
580 | && line.charAt(spacing.length()) == '-'; | |
dc22eb95 NR |
581 | } |
582 | ||
583 | static private String getItemSpacing(String line) { | |
584 | int i; | |
585 | for (i = 0; i < line.length(); i++) { | |
586 | if (line.charAt(i) != ' ') { | |
587 | return line.substring(0, i); | |
588 | } | |
589 | } | |
590 | ||
591 | return ""; | |
592 | } | |
c0c091af NR |
593 | |
594 | static private boolean isHrLine(CharSequence line) { | |
595 | int count = 0; | |
596 | if (line != null) { | |
597 | for (int i = 0; i < line.length(); i++) { | |
598 | char car = line.charAt(i); | |
599 | if (car == ' ' || car == '\t' || car == '*' || car == '-' | |
600 | || car == '_' || car == '~' || car == '=' || car == '/' | |
601 | || car == '\\') { | |
602 | count++; | |
603 | } else { | |
604 | return false; | |
605 | } | |
606 | } | |
607 | } | |
608 | ||
609 | return count > 2; | |
610 | } | |
ec1f3444 | 611 | } |