Move justify List into StringUtils (tests needed)
[nikiroo-utils.git] / src / be / nikiroo / utils / StringUtils.java
CommitLineData
ec1f3444
NR
1package be.nikiroo.utils;
2
ec1f3444 3import java.io.ByteArrayInputStream;
ec1f3444 4import java.io.IOException;
3f8349b7 5import java.io.UnsupportedEncodingException;
ec1f3444
NR
6import java.security.MessageDigest;
7import java.security.NoSuchAlgorithmException;
8import java.text.Normalizer;
9import java.text.Normalizer.Form;
10import java.text.ParseException;
11import java.text.SimpleDateFormat;
dc22eb95
NR
12import java.util.AbstractMap;
13import java.util.ArrayList;
ec1f3444 14import java.util.Date;
cc3e7291 15import java.util.List;
dc22eb95 16import java.util.Map.Entry;
db31c358 17import java.util.Scanner;
ec1f3444
NR
18import java.util.regex.Pattern;
19
ec1f3444
NR
20import org.unbescape.html.HtmlEscape;
21import org.unbescape.html.HtmlEscapeLevel;
22import org.unbescape.html.HtmlEscapeType;
23
24/**
25 * This class offer some utilities based around {@link String}s.
26 *
27 * @author niki
28 */
29public class StringUtils {
30 /**
31 * This enum type will decide the alignment of a {@link String} when padding
cc3e7291
NR
32 * or justification is applied (if there is enough horizontal space for it
33 * to be aligned).
ec1f3444
NR
34 */
35 public enum Alignment {
36 /** Aligned at left. */
cc3e7291 37 LEFT,
ec1f3444 38 /** Centered. */
cc3e7291 39 CENTER,
ec1f3444 40 /** Aligned at right. */
cc3e7291
NR
41 RIGHT,
42 /** Full justified (to both left and right). */
43 JUSTIFY,
44
45 // Old Deprecated values:
46
47 /** DEPRECATED: please use LEFT. */
48 @Deprecated
49 Beginning,
50 /** DEPRECATED: please use CENTER. */
51 @Deprecated
52 Center,
53 /** DEPRECATED: please use RIGHT. */
54 @Deprecated
55 End;
56
57 /**
58 * Return the non-deprecated version of this enum if needed (or return
59 * self if not).
60 *
61 * @return the non-deprecated value
62 */
63 Alignment undeprecate() {
64 if (this == Beginning)
65 return LEFT;
66 if (this == Center)
67 return CENTER;
68 if (this == End)
69 return RIGHT;
70 return this;
71 }
ec1f3444
NR
72 }
73
e8aa5bf9 74 static private Pattern marks = getMarks();
ec1f3444
NR
75
76 /**
77 * Fix the size of the given {@link String} either with space-padding or by
78 * shortening it.
79 *
80 * @param text
81 * the {@link String} to fix
82 * @param width
83 * the size of the resulting {@link String} or -1 for a noop
84 *
85 * @return the resulting {@link String} of size <i>size</i>
86 */
87 static public String padString(String text, int width) {
451f434b 88 return padString(text, width, true, null);
ec1f3444
NR
89 }
90
91 /**
92 * Fix the size of the given {@link String} either with space-padding or by
93 * optionally shortening it.
94 *
95 * @param text
96 * the {@link String} to fix
97 * @param width
98 * the size of the resulting {@link String} if the text fits or
99 * if cut is TRUE or -1 for a noop
100 * @param cut
101 * cut the {@link String} shorter if needed
102 * @param align
103 * align the {@link String} in this position if we have enough
451f434b 104 * space (default is Alignment.Beginning)
ec1f3444
NR
105 *
106 * @return the resulting {@link String} of size <i>size</i> minimum
107 */
108 static public String padString(String text, int width, boolean cut,
109 Alignment align) {
110
451f434b 111 if (align == null) {
cc3e7291 112 align = Alignment.LEFT;
451f434b
NR
113 }
114
cc3e7291
NR
115 align = align.undeprecate();
116
ec1f3444
NR
117 if (width >= 0) {
118 if (text == null)
119 text = "";
120
121 int diff = width - text.length();
122
123 if (diff < 0) {
124 if (cut)
125 text = text.substring(0, width);
126 } else if (diff > 0) {
cc3e7291
NR
127 if (diff < 2 && align != Alignment.RIGHT)
128 align = Alignment.LEFT;
ec1f3444
NR
129
130 switch (align) {
cc3e7291 131 case RIGHT:
ec1f3444
NR
132 text = new String(new char[diff]).replace('\0', ' ') + text;
133 break;
cc3e7291 134 case CENTER:
ec1f3444
NR
135 int pad1 = (diff) / 2;
136 int pad2 = (diff + 1) / 2;
137 text = new String(new char[pad1]).replace('\0', ' ') + text
138 + new String(new char[pad2]).replace('\0', ' ');
139 break;
cc3e7291
NR
140 case LEFT:
141 default:
142 text = text + new String(new char[diff]).replace('\0', ' ');
143 break;
ec1f3444
NR
144 }
145 }
146 }
147
148 return text;
149 }
150
cc3e7291
NR
151 /**
152 * Justify a text into width-sized (at the maximum) lines.
153 *
154 * @param text
155 * the {@link String} to justify
156 * @param width
157 * the maximum size of the resulting lines
158 *
159 * @return a list of justified text lines
160 */
161 static public List<String> justifyText(String text, int width) {
162 return justifyText(text, width, null);
163 }
164
165 /**
166 * Justify a text into width-sized (at the maximum) lines.
167 *
168 * @param text
169 * the {@link String} to justify
170 * @param width
171 * the maximum size of the resulting lines
172 * @param align
173 * align the lines in this position (default is
174 * Alignment.Beginning)
175 *
176 * @return a list of justified text lines
177 */
178 static public List<String> justifyText(String text, int width,
179 Alignment align) {
180 if (align == null) {
181 align = Alignment.LEFT;
182 }
183
184 align = align.undeprecate();
185
186 switch (align) {
187 case CENTER:
188 return StringJustifier.center(text, width);
189 case RIGHT:
190 return StringJustifier.right(text, width);
191 case JUSTIFY:
192 return StringJustifier.full(text, width);
193 case LEFT:
194 default:
195 return StringJustifier.left(text, width);
196 }
197 }
198
dc22eb95
NR
199 /**
200 * Justify a text into width-sized (at the maximum) lines.
201 *
202 * @param text
203 * the {@link String} to justify
204 * @param width
205 * the maximum size of the resulting lines
206 *
207 * @return a list of justified text lines
208 */
209 static public List<String> justifyText(List<String> text, int width) {
210 return justifyText(text, width, null);
211 }
212
213 /**
214 * Justify a text into width-sized (at the maximum) lines.
215 *
216 * @param text
217 * the {@link String} to justify
218 * @param width
219 * the maximum size of the resulting lines
220 * @param align
221 * align the lines in this position (default is
222 * Alignment.Beginning)
223 *
224 * @return a list of justified text lines
225 */
226 static public List<String> justifyText(List<String> text, int width,
227 Alignment align) {
228 List<String> result = new ArrayList<String>();
229
230 // Content <-> Bullet spacing (null = no spacing)
231 List<Entry<String, String>> lines = new ArrayList<Entry<String, String>>();
232 StringBuilder previous = null;
233 StringBuilder tmp = new StringBuilder();
234 String previousItemBulletSpacing = null;
235 String itemBulletSpacing = null;
236 for (String inputLine : text) {
237 boolean previousLineComplete = true;
238
239 String current = inputLine.replace("\t", " ");
240 itemBulletSpacing = getItemSpacing(current);
241 boolean bullet = isItemLine(current);
242 if ((previousItemBulletSpacing == null || itemBulletSpacing
243 .length() <= previousItemBulletSpacing.length()) && !bullet) {
244 itemBulletSpacing = null;
245 }
246
247 if (itemBulletSpacing != null) {
248 current = current.trim();
249 if (!current.isEmpty() && bullet) {
250 current = current.substring(1);
251 }
252 current = current.trim();
253 previousLineComplete = bullet;
254 } else {
255 tmp.setLength(0);
256 for (String word : current.split(" ")) {
257 if (word.isEmpty()) {
258 continue;
259 }
260
261 if (tmp.length() > 0) {
262 tmp.append(' ');
263 }
264 tmp.append(word.trim());
265 }
266 current = tmp.toString();
267
268 previousLineComplete = current.isEmpty()
269 || previousItemBulletSpacing != null
270 || (previous != null && isFullLine(previous));
271 }
272
273 if (previous == null) {
274 previous = new StringBuilder();
275 } else {
276 if (previousLineComplete) {
277 lines.add(new AbstractMap.SimpleEntry<String, String>(
278 previous.toString(), previousItemBulletSpacing));
279 previous.setLength(0);
280 previousItemBulletSpacing = itemBulletSpacing;
281 } else {
282 previous.append(' ');
283 }
284 }
285
286 previous.append(current);
287
288 }
289
290 if (previous != null) {
291 lines.add(new AbstractMap.SimpleEntry<String, String>(previous
292 .toString(), previousItemBulletSpacing));
293 }
294
295 for (Entry<String, String> line : lines) {
296 String content = line.getKey();
297 String spacing = line.getValue();
298
299 String bullet = "- ";
300 if (spacing == null) {
301 bullet = "";
302 spacing = "";
303 }
304
305 if (spacing.length() > width + 3) {
306 spacing = "";
307 }
308
309 for (String subline : StringUtils.justifyText(content, width
310 - (spacing.length() + bullet.length()), align)) {
311 result.add(spacing + bullet + subline);
312 if (!bullet.isEmpty()) {
313 bullet = " ";
314 }
315 }
316 }
317
318 return result;
319 }
320
ec1f3444
NR
321 /**
322 * Sanitise the given input to make it more Terminal-friendly by removing
323 * combining characters.
324 *
325 * @param input
326 * the input to sanitise
327 * @param allowUnicode
328 * allow Unicode or only allow ASCII Latin characters
329 *
330 * @return the sanitised {@link String}
331 */
332 static public String sanitize(String input, boolean allowUnicode) {
333 return sanitize(input, allowUnicode, !allowUnicode);
334 }
335
336 /**
337 * Sanitise the given input to make it more Terminal-friendly by removing
338 * combining characters.
339 *
340 * @param input
341 * the input to sanitise
342 * @param allowUnicode
343 * allow Unicode or only allow ASCII Latin characters
344 * @param removeAllAccents
345 * TRUE to replace all accentuated characters by their non
346 * accentuated counter-parts
347 *
348 * @return the sanitised {@link String}
349 */
350 static public String sanitize(String input, boolean allowUnicode,
351 boolean removeAllAccents) {
352
353 if (removeAllAccents) {
354 input = Normalizer.normalize(input, Form.NFKD);
e8aa5bf9
NR
355 if (marks != null) {
356 input = marks.matcher(input).replaceAll("");
357 }
ec1f3444
NR
358 }
359
360 input = Normalizer.normalize(input, Form.NFKC);
361
362 if (!allowUnicode) {
363 StringBuilder builder = new StringBuilder();
364 for (int index = 0; index < input.length(); index++) {
365 char car = input.charAt(index);
366 // displayable chars in ASCII are in the range 32<->255,
367 // except DEL (127)
368 if (car >= 32 && car <= 255 && car != 127) {
369 builder.append(car);
370 }
371 }
372 input = builder.toString();
373 }
374
375 return input;
376 }
377
378 /**
451f434b
NR
379 * Convert between the time in milliseconds to a {@link String} in a "fixed"
380 * way (to exchange data over the wire, for instance).
381 * <p>
382 * Precise to the second.
ec1f3444
NR
383 *
384 * @param time
451f434b
NR
385 * the specified number of milliseconds since the standard base
386 * time known as "the epoch", namely January 1, 1970, 00:00:00
387 * GMT
ec1f3444
NR
388 *
389 * @return the time as a {@link String}
390 */
391 static public String fromTime(long time) {
392 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
393 return sdf.format(new Date(time));
394 }
395
396 /**
451f434b 397 * Convert between the time as a {@link String} to milliseconds in a "fixed"
ec1f3444 398 * way (to exchange data over the wire, for instance).
451f434b
NR
399 * <p>
400 * Precise to the second.
ec1f3444 401 *
db31c358 402 * @param displayTime
ec1f3444
NR
403 * the time as a {@link String}
404 *
451f434b 405 * @return the number of milliseconds since the standard base time known as
e8aa5bf9
NR
406 * "the epoch", namely January 1, 1970, 00:00:00 GMT, or -1 in case
407 * of error
408 *
409 * @throws ParseException
410 * in case of parse error
ec1f3444 411 */
e8aa5bf9 412 static public long toTime(String displayTime) throws ParseException {
ec1f3444 413 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
e8aa5bf9 414 return sdf.parse(displayTime).getTime();
ec1f3444
NR
415 }
416
ec1f3444
NR
417 /**
418 * Return a hash of the given {@link String}.
419 *
420 * @param input
421 * the input data
422 *
423 * @return the hash
424 */
b771aed5 425 static public String getMd5Hash(String input) {
ec1f3444
NR
426 try {
427 MessageDigest md = MessageDigest.getInstance("MD5");
3f8349b7 428 md.update(input.getBytes("UTF-8"));
ec1f3444
NR
429 byte byteData[] = md.digest();
430
431 StringBuffer hexString = new StringBuffer();
432 for (int i = 0; i < byteData.length; i++) {
433 String hex = Integer.toHexString(0xff & byteData[i]);
434 if (hex.length() == 1)
435 hexString.append('0');
436 hexString.append(hex);
437 }
438
439 return hexString.toString();
440 } catch (NoSuchAlgorithmException e) {
441 return input;
3f8349b7
NR
442 } catch (UnsupportedEncodingException e) {
443 return input;
ec1f3444
NR
444 }
445 }
446
ec1f3444
NR
447 /**
448 * Remove the HTML content from the given input, and un-html-ize the rest.
449 *
450 * @param html
451 * the HTML-encoded content
452 *
453 * @return the HTML-free equivalent content
454 */
455 public static String unhtml(String html) {
456 StringBuilder builder = new StringBuilder();
457
458 int inTag = 0;
459 for (char car : html.toCharArray()) {
460 if (car == '<') {
461 inTag++;
462 } else if (car == '>') {
463 inTag--;
464 } else if (inTag <= 0) {
465 builder.append(car);
466 }
467 }
468
7ee9568b
NR
469 char nbsp = ' '; // non-breakable space (a special char)
470 char space = ' ';
471 return HtmlEscape.unescapeHtml(builder.toString()).replace(nbsp, space);
ec1f3444
NR
472 }
473
474 /**
475 * Escape the given {@link String} so it can be used in XML, as content.
476 *
477 * @param input
478 * the input {@link String}
479 *
480 * @return the escaped {@link String}
481 */
482 public static String xmlEscape(String input) {
483 if (input == null) {
484 return "";
485 }
486
487 return HtmlEscape.escapeHtml(input,
488 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
489 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
490 }
491
492 /**
493 * Escape the given {@link String} so it can be used in XML, as text content
494 * inside double-quotes.
495 *
496 * @param input
497 * the input {@link String}
498 *
499 * @return the escaped {@link String}
500 */
501 public static String xmlEscapeQuote(String input) {
502 if (input == null) {
503 return "";
504 }
505
506 return HtmlEscape.escapeHtml(input,
507 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
508 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
509 }
db31c358 510
80500544
NR
511 /**
512 * Zip the data and then encode it into Base64.
513 *
514 * @param data
515 * the data
516 *
517 * @return the Base64 zipped version
518 */
db31c358
NR
519 public static String zip64(String data) {
520 try {
521 return Base64.encodeBytes(data.getBytes(), Base64.GZIP);
522 } catch (IOException e) {
523 e.printStackTrace();
524 return null;
525 }
526 }
527
80500544
NR
528 /**
529 * Unconvert from Base64 then unzip the content.
530 *
531 * @param data
532 * the data in Base64 format
533 *
534 * @return the raw data
535 *
536 * @throws IOException
537 * in case of I/O error
538 */
db31c358
NR
539 public static String unzip64(String data) throws IOException {
540 ByteArrayInputStream in = new ByteArrayInputStream(Base64.decode(data,
541 Base64.GZIP));
542
543 Scanner scan = new Scanner(in);
544 scan.useDelimiter("\\A");
545 try {
546 return scan.next();
547 } finally {
548 scan.close();
549 }
550 }
e8aa5bf9
NR
551
552 /**
553 * The "remove accents" pattern.
554 *
555 * @return the pattern, or NULL if a problem happens
556 */
557 private static Pattern getMarks() {
558 try {
559 return Pattern
560 .compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
561 } catch (Exception e) {
562 // Can fail on Android...
563 return null;
564 }
565 }
dc22eb95
NR
566
567 // justify List<String> related:
568
569 static private boolean isFullLine(StringBuilder line) {
570 return line.length() == 0 //
571 || line.charAt(line.length() - 1) == '.'
572 || line.charAt(line.length() - 1) == '"'
573 || line.charAt(line.length() - 1) == '»';
574 }
575
576 static private boolean isItemLine(String line) {
577 String spacing = getItemSpacing(line);
578 return spacing != null && line.charAt(spacing.length()) == '-';
579 }
580
581 static private String getItemSpacing(String line) {
582 int i;
583 for (i = 0; i < line.length(); i++) {
584 if (line.charAt(i) != ' ') {
585 return line.substring(0, i);
586 }
587 }
588
589 return "";
590 }
ec1f3444 591}