Initial commit, version 0.9.2
[nikiroo-utils.git] / src / be / nikiroo / utils / StringUtils.java
CommitLineData
ec1f3444
NR
1package be.nikiroo.utils;
2
3import java.awt.Image;
4import java.awt.geom.AffineTransform;
5import java.awt.image.AffineTransformOp;
6import java.awt.image.BufferedImage;
7import java.io.ByteArrayInputStream;
8import java.io.ByteArrayOutputStream;
9import java.io.File;
10import java.io.IOException;
11import java.io.InputStream;
12import java.security.MessageDigest;
13import java.security.NoSuchAlgorithmException;
14import java.text.Normalizer;
15import java.text.Normalizer.Form;
16import java.text.ParseException;
17import java.text.SimpleDateFormat;
18import java.util.Base64;
19import java.util.Date;
20import java.util.regex.Pattern;
21
22import javax.imageio.ImageIO;
23
24import org.unbescape.html.HtmlEscape;
25import org.unbescape.html.HtmlEscapeLevel;
26import org.unbescape.html.HtmlEscapeType;
27
28/**
29 * This class offer some utilities based around {@link String}s.
30 *
31 * @author niki
32 */
33public class StringUtils {
34 /**
35 * This enum type will decide the alignment of a {@link String} when padding
36 * is applied or if there is enough horizontal space for it to be aligned.
37 */
38 public enum Alignment {
39 /** Aligned at left. */
40 Beginning,
41 /** Centered. */
42 Center,
43 /** Aligned at right. */
44 End
45 }
46
47 static private Pattern marks = Pattern
48 .compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
49
50 /**
51 * Fix the size of the given {@link String} either with space-padding or by
52 * shortening it.
53 *
54 * @param text
55 * the {@link String} to fix
56 * @param width
57 * the size of the resulting {@link String} or -1 for a noop
58 *
59 * @return the resulting {@link String} of size <i>size</i>
60 */
61 static public String padString(String text, int width) {
62 return padString(text, width, true, Alignment.Beginning);
63 }
64
65 /**
66 * Fix the size of the given {@link String} either with space-padding or by
67 * optionally shortening it.
68 *
69 * @param text
70 * the {@link String} to fix
71 * @param width
72 * the size of the resulting {@link String} if the text fits or
73 * if cut is TRUE or -1 for a noop
74 * @param cut
75 * cut the {@link String} shorter if needed
76 * @param align
77 * align the {@link String} in this position if we have enough
78 * space
79 *
80 * @return the resulting {@link String} of size <i>size</i> minimum
81 */
82 static public String padString(String text, int width, boolean cut,
83 Alignment align) {
84
85 if (width >= 0) {
86 if (text == null)
87 text = "";
88
89 int diff = width - text.length();
90
91 if (diff < 0) {
92 if (cut)
93 text = text.substring(0, width);
94 } else if (diff > 0) {
95 if (diff < 2 && align != Alignment.End)
96 align = Alignment.Beginning;
97
98 switch (align) {
99 case Beginning:
100 text = text + new String(new char[diff]).replace('\0', ' ');
101 break;
102 case End:
103 text = new String(new char[diff]).replace('\0', ' ') + text;
104 break;
105 case Center:
106 default:
107 int pad1 = (diff) / 2;
108 int pad2 = (diff + 1) / 2;
109 text = new String(new char[pad1]).replace('\0', ' ') + text
110 + new String(new char[pad2]).replace('\0', ' ');
111 break;
112 }
113 }
114 }
115
116 return text;
117 }
118
119 /**
120 * Sanitise the given input to make it more Terminal-friendly by removing
121 * combining characters.
122 *
123 * @param input
124 * the input to sanitise
125 * @param allowUnicode
126 * allow Unicode or only allow ASCII Latin characters
127 *
128 * @return the sanitised {@link String}
129 */
130 static public String sanitize(String input, boolean allowUnicode) {
131 return sanitize(input, allowUnicode, !allowUnicode);
132 }
133
134 /**
135 * Sanitise the given input to make it more Terminal-friendly by removing
136 * combining characters.
137 *
138 * @param input
139 * the input to sanitise
140 * @param allowUnicode
141 * allow Unicode or only allow ASCII Latin characters
142 * @param removeAllAccents
143 * TRUE to replace all accentuated characters by their non
144 * accentuated counter-parts
145 *
146 * @return the sanitised {@link String}
147 */
148 static public String sanitize(String input, boolean allowUnicode,
149 boolean removeAllAccents) {
150
151 if (removeAllAccents) {
152 input = Normalizer.normalize(input, Form.NFKD);
153 input = marks.matcher(input).replaceAll("");
154 }
155
156 input = Normalizer.normalize(input, Form.NFKC);
157
158 if (!allowUnicode) {
159 StringBuilder builder = new StringBuilder();
160 for (int index = 0; index < input.length(); index++) {
161 char car = input.charAt(index);
162 // displayable chars in ASCII are in the range 32<->255,
163 // except DEL (127)
164 if (car >= 32 && car <= 255 && car != 127) {
165 builder.append(car);
166 }
167 }
168 input = builder.toString();
169 }
170
171 return input;
172 }
173
174 /**
175 * Convert between time in milliseconds to {@link String} in a "static" way
176 * (to exchange data over the wire, for instance).
177 *
178 * @param time
179 * the time in milliseconds
180 *
181 * @return the time as a {@link String}
182 */
183 static public String fromTime(long time) {
184 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
185 return sdf.format(new Date(time));
186 }
187
188 /**
189 * Convert between time as a {@link String} to milliseconds in a "static"
190 * way (to exchange data over the wire, for instance).
191 *
192 * @param time
193 * the time as a {@link String}
194 *
195 * @return the time in milliseconds
196 */
197 static public long toTime(String display) {
198 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
199 try {
200 return sdf.parse(display).getTime();
201 } catch (ParseException e) {
202 return -1;
203 }
204 }
205
206 /**
207 * Convert the given {@link Image} object into a Base64 representation of
208 * the same {@link Image}. object.
209 *
210 * @param image
211 * the {@link Image} object to convert
212 *
213 * @return the Base64 representation
214 *
215 * @throws IOException
216 * in case of IO error
217 */
218 static public String fromImage(BufferedImage image) throws IOException {
219 String imageString = null;
220 ByteArrayOutputStream out = new ByteArrayOutputStream();
221
222 ImageIO.write(image, "jpeg", out);
223 byte[] imageBytes = out.toByteArray();
224
225 imageString = new String(Base64.getEncoder().encode(imageBytes));
226
227 out.close();
228
229 return imageString;
230 }
231
232 /**
233 * Convert the given {@link File} image into a Base64 representation of the
234 * same {@link File}.
235 *
236 * @param file
237 * the {@link File} image to convert
238 *
239 * @return the Base64 representation
240 *
241 * @throws IOException
242 * in case of IO error
243 */
244 static public String fromStream(InputStream in) throws IOException {
245 String fileString = null;
246 ByteArrayOutputStream out = new ByteArrayOutputStream();
247
248 byte[] buf = new byte[8192];
249
250 int c = 0;
251 while ((c = in.read(buf, 0, buf.length)) > 0) {
252 out.write(buf, 0, c);
253 }
254 out.flush();
255 in.close();
256
257 fileString = new String(Base64.getEncoder().encode(out.toByteArray()));
258 out.close();
259
260 return fileString;
261 }
262
263 /**
264 * Convert the given Base64 representation of an image into an {@link Image}
265 * object.
266 *
267 * @param b64data
268 * the {@link Image} in Base64 format
269 *
270 * @return the {@link Image} object
271 *
272 * @throws IOException
273 * in case of IO error
274 */
275 static public BufferedImage toImage(String b64data) throws IOException {
276 ByteArrayInputStream in = new ByteArrayInputStream(Base64.getDecoder()
277 .decode(b64data));
278 return toImage(in);
279 }
280
281 /**
282 * Convert the given {@link InputStream} (which must allow calls to
283 * {@link InputStream#reset()}) into an {@link Image} object.
284 *
285 * @param in
286 * the 'resetable' {@link InputStream}
287 *
288 * @return the {@link Image} object
289 *
290 * @throws IOException
291 * in case of IO error
292 */
293 static public BufferedImage toImage(InputStream in) throws IOException {
294 int orientation;
295 try {
296 orientation = getExifTransorm(in);
297 } catch (Exception e) {
298 // no EXIF transform, ok
299 orientation = -1;
300 }
301
302 in.reset();
303 BufferedImage image = ImageIO.read(in);
304
305 if (image == null) {
306 throw new IOException("Failed to convert input to image");
307 }
308
309 // Note: this code has been found on internet;
310 // thank you anonymous coder.
311 int width = image.getWidth();
312 int height = image.getHeight();
313 AffineTransform affineTransform = new AffineTransform();
314
315 switch (orientation) {
316 case 1:
317 break;
318 case 2: // Flip X
319 affineTransform.scale(-1.0, 1.0);
320 affineTransform.translate(-width, 0);
321 break;
322 case 3: // PI rotation
323 affineTransform.translate(width, height);
324 affineTransform.rotate(Math.PI);
325 break;
326 case 4: // Flip Y
327 affineTransform.scale(1.0, -1.0);
328 affineTransform.translate(0, -height);
329 break;
330 case 5: // - PI/2 and Flip X
331 affineTransform.rotate(-Math.PI / 2);
332 affineTransform.scale(-1.0, 1.0);
333 break;
334 case 6: // -PI/2 and -width
335 affineTransform.translate(height, 0);
336 affineTransform.rotate(Math.PI / 2);
337 break;
338 case 7: // PI/2 and Flip
339 affineTransform.scale(-1.0, 1.0);
340 affineTransform.translate(-height, 0);
341 affineTransform.translate(0, width);
342 affineTransform.rotate(3 * Math.PI / 2);
343 break;
344 case 8: // PI / 2
345 affineTransform.translate(0, width);
346 affineTransform.rotate(3 * Math.PI / 2);
347 break;
348 default:
349 affineTransform = null;
350 break;
351 }
352
353 if (affineTransform != null) {
354 AffineTransformOp affineTransformOp = new AffineTransformOp(
355 affineTransform, AffineTransformOp.TYPE_BILINEAR);
356
357 BufferedImage transformedImage = new BufferedImage(height, width,
358 image.getType());
359 transformedImage = affineTransformOp
360 .filter(image, transformedImage);
361
362 image = transformedImage;
363 }
364 //
365
366 return image;
367 }
368
369 /**
370 * Return a hash of the given {@link String}.
371 *
372 * @param input
373 * the input data
374 *
375 * @return the hash
376 */
377 static public String getHash(String input) {
378 try {
379 MessageDigest md = MessageDigest.getInstance("MD5");
380 md.update(input.getBytes());
381 byte byteData[] = md.digest();
382
383 StringBuffer hexString = new StringBuffer();
384 for (int i = 0; i < byteData.length; i++) {
385 String hex = Integer.toHexString(0xff & byteData[i]);
386 if (hex.length() == 1)
387 hexString.append('0');
388 hexString.append(hex);
389 }
390
391 return hexString.toString();
392 } catch (NoSuchAlgorithmException e) {
393 return input;
394 }
395 }
396
397 /**
398 * Return the EXIF transformation flag of this image if any.
399 *
400 * <p>
401 * Note: this code has been found on internet; thank you anonymous coder.
402 * </p>
403 *
404 * @param in
405 * the data {@link InputStream}
406 *
407 * @return the transformation flag if any
408 *
409 * @throws IOException
410 * in case of IO error
411 */
412 static private int getExifTransorm(InputStream in) throws IOException {
413 int[] exif_data = new int[100];
414 int set_flag = 0;
415 int is_motorola = 0;
416
417 /* Read File head, check for JPEG SOI + Exif APP1 */
418 for (int i = 0; i < 4; i++)
419 exif_data[i] = in.read();
420
421 if (exif_data[0] != 0xFF || exif_data[1] != 0xD8
422 || exif_data[2] != 0xFF || exif_data[3] != 0xE1)
423 return -2;
424
425 /* Get the marker parameter length count */
426 int length = (in.read() << 8 | in.read());
427
428 /* Length includes itself, so must be at least 2 */
429 /* Following Exif data length must be at least 6 */
430 if (length < 8)
431 return -1;
432 length -= 8;
433 /* Read Exif head, check for "Exif" */
434 for (int i = 0; i < 6; i++)
435 exif_data[i] = in.read();
436
437 if (exif_data[0] != 0x45 || exif_data[1] != 0x78
438 || exif_data[2] != 0x69 || exif_data[3] != 0x66
439 || exif_data[4] != 0 || exif_data[5] != 0)
440 return -1;
441
442 /* Read Exif body */
443 length = length > exif_data.length ? exif_data.length : length;
444 for (int i = 0; i < length; i++)
445 exif_data[i] = in.read();
446
447 if (length < 12)
448 return -1; /* Length of an IFD entry */
449
450 /* Discover byte order */
451 if (exif_data[0] == 0x49 && exif_data[1] == 0x49)
452 is_motorola = 0;
453 else if (exif_data[0] == 0x4D && exif_data[1] == 0x4D)
454 is_motorola = 1;
455 else
456 return -1;
457
458 /* Check Tag Mark */
459 if (is_motorola == 1) {
460 if (exif_data[2] != 0)
461 return -1;
462 if (exif_data[3] != 0x2A)
463 return -1;
464 } else {
465 if (exif_data[3] != 0)
466 return -1;
467 if (exif_data[2] != 0x2A)
468 return -1;
469 }
470
471 /* Get first IFD offset (offset to IFD0) */
472 int offset;
473 if (is_motorola == 1) {
474 if (exif_data[4] != 0)
475 return -1;
476 if (exif_data[5] != 0)
477 return -1;
478 offset = exif_data[6];
479 offset <<= 8;
480 offset += exif_data[7];
481 } else {
482 if (exif_data[7] != 0)
483 return -1;
484 if (exif_data[6] != 0)
485 return -1;
486 offset = exif_data[5];
487 offset <<= 8;
488 offset += exif_data[4];
489 }
490 if (offset > length - 2)
491 return -1; /* check end of data segment */
492
493 /* Get the number of directory entries contained in this IFD */
494 int number_of_tags;
495 if (is_motorola == 1) {
496 number_of_tags = exif_data[offset];
497 number_of_tags <<= 8;
498 number_of_tags += exif_data[offset + 1];
499 } else {
500 number_of_tags = exif_data[offset + 1];
501 number_of_tags <<= 8;
502 number_of_tags += exif_data[offset];
503 }
504 if (number_of_tags == 0)
505 return -1;
506 offset += 2;
507
508 /* Search for Orientation Tag in IFD0 */
509 for (;;) {
510 if (offset > length - 12)
511 return -1; /* check end of data segment */
512 /* Get Tag number */
513 int tagnum;
514 if (is_motorola == 1) {
515 tagnum = exif_data[offset];
516 tagnum <<= 8;
517 tagnum += exif_data[offset + 1];
518 } else {
519 tagnum = exif_data[offset + 1];
520 tagnum <<= 8;
521 tagnum += exif_data[offset];
522 }
523 if (tagnum == 0x0112)
524 break; /* found Orientation Tag */
525 if (--number_of_tags == 0)
526 return -1;
527 offset += 12;
528 }
529
530 /* Get the Orientation value */
531 if (is_motorola == 1) {
532 if (exif_data[offset + 8] != 0)
533 return -1;
534 set_flag = exif_data[offset + 9];
535 } else {
536 if (exif_data[offset + 9] != 0)
537 return -1;
538 set_flag = exif_data[offset + 8];
539 }
540 if (set_flag > 8)
541 return -1;
542
543 return set_flag;
544 }
545
546 /**
547 * Remove the HTML content from the given input, and un-html-ize the rest.
548 *
549 * @param html
550 * the HTML-encoded content
551 *
552 * @return the HTML-free equivalent content
553 */
554 public static String unhtml(String html) {
555 StringBuilder builder = new StringBuilder();
556
557 int inTag = 0;
558 for (char car : html.toCharArray()) {
559 if (car == '<') {
560 inTag++;
561 } else if (car == '>') {
562 inTag--;
563 } else if (inTag <= 0) {
564 builder.append(car);
565 }
566 }
567
568 return HtmlEscape.unescapeHtml(builder.toString());
569 }
570
571 /**
572 * Escape the given {@link String} so it can be used in XML, as content.
573 *
574 * @param input
575 * the input {@link String}
576 *
577 * @return the escaped {@link String}
578 */
579 public static String xmlEscape(String input) {
580 if (input == null) {
581 return "";
582 }
583
584 return HtmlEscape.escapeHtml(input,
585 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
586 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
587 }
588
589 /**
590 * Escape the given {@link String} so it can be used in XML, as text content
591 * inside double-quotes.
592 *
593 * @param input
594 * the input {@link String}
595 *
596 * @return the escaped {@link String}
597 */
598 public static String xmlEscapeQuote(String input) {
599 if (input == null) {
600 return "";
601 }
602
603 return HtmlEscape.escapeHtml(input,
604 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
605 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
606 }
607}