Compat fix for Java 1.6 (unused "import")
[nikiroo-utils.git] / src / be / nikiroo / utils / StringUtils.java
1 package be.nikiroo.utils;
2
3 import java.awt.Image;
4 import java.awt.geom.AffineTransform;
5 import java.awt.image.AffineTransformOp;
6 import java.awt.image.BufferedImage;
7 import java.io.ByteArrayInputStream;
8 import java.io.ByteArrayOutputStream;
9 import java.io.File;
10 import java.io.IOException;
11 import java.io.InputStream;
12 import java.security.MessageDigest;
13 import java.security.NoSuchAlgorithmException;
14 import java.text.Normalizer;
15 import java.text.Normalizer.Form;
16 import java.text.ParseException;
17 import java.text.SimpleDateFormat;
18 import java.util.Date;
19 import java.util.regex.Pattern;
20
21 import javax.imageio.ImageIO;
22
23 import org.unbescape.html.HtmlEscape;
24 import org.unbescape.html.HtmlEscapeLevel;
25 import org.unbescape.html.HtmlEscapeType;
26
27 /**
28 * This class offer some utilities based around {@link String}s.
29 *
30 * @author niki
31 */
32 public class StringUtils {
33 /**
34 * This enum type will decide the alignment of a {@link String} when padding
35 * is applied or if there is enough horizontal space for it to be aligned.
36 */
37 public enum Alignment {
38 /** Aligned at left. */
39 Beginning,
40 /** Centered. */
41 Center,
42 /** Aligned at right. */
43 End
44 }
45
46 static private Pattern marks = Pattern
47 .compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
48
49 /**
50 * Fix the size of the given {@link String} either with space-padding or by
51 * shortening it.
52 *
53 * @param text
54 * the {@link String} to fix
55 * @param width
56 * the size of the resulting {@link String} or -1 for a noop
57 *
58 * @return the resulting {@link String} of size <i>size</i>
59 */
60 static public String padString(String text, int width) {
61 return padString(text, width, true, Alignment.Beginning);
62 }
63
64 /**
65 * Fix the size of the given {@link String} either with space-padding or by
66 * optionally shortening it.
67 *
68 * @param text
69 * the {@link String} to fix
70 * @param width
71 * the size of the resulting {@link String} if the text fits or
72 * if cut is TRUE or -1 for a noop
73 * @param cut
74 * cut the {@link String} shorter if needed
75 * @param align
76 * align the {@link String} in this position if we have enough
77 * space
78 *
79 * @return the resulting {@link String} of size <i>size</i> minimum
80 */
81 static public String padString(String text, int width, boolean cut,
82 Alignment align) {
83
84 if (width >= 0) {
85 if (text == null)
86 text = "";
87
88 int diff = width - text.length();
89
90 if (diff < 0) {
91 if (cut)
92 text = text.substring(0, width);
93 } else if (diff > 0) {
94 if (diff < 2 && align != Alignment.End)
95 align = Alignment.Beginning;
96
97 switch (align) {
98 case Beginning:
99 text = text + new String(new char[diff]).replace('\0', ' ');
100 break;
101 case End:
102 text = new String(new char[diff]).replace('\0', ' ') + text;
103 break;
104 case Center:
105 default:
106 int pad1 = (diff) / 2;
107 int pad2 = (diff + 1) / 2;
108 text = new String(new char[pad1]).replace('\0', ' ') + text
109 + new String(new char[pad2]).replace('\0', ' ');
110 break;
111 }
112 }
113 }
114
115 return text;
116 }
117
118 /**
119 * Sanitise the given input to make it more Terminal-friendly by removing
120 * combining characters.
121 *
122 * @param input
123 * the input to sanitise
124 * @param allowUnicode
125 * allow Unicode or only allow ASCII Latin characters
126 *
127 * @return the sanitised {@link String}
128 */
129 static public String sanitize(String input, boolean allowUnicode) {
130 return sanitize(input, allowUnicode, !allowUnicode);
131 }
132
133 /**
134 * Sanitise the given input to make it more Terminal-friendly by removing
135 * combining characters.
136 *
137 * @param input
138 * the input to sanitise
139 * @param allowUnicode
140 * allow Unicode or only allow ASCII Latin characters
141 * @param removeAllAccents
142 * TRUE to replace all accentuated characters by their non
143 * accentuated counter-parts
144 *
145 * @return the sanitised {@link String}
146 */
147 static public String sanitize(String input, boolean allowUnicode,
148 boolean removeAllAccents) {
149
150 if (removeAllAccents) {
151 input = Normalizer.normalize(input, Form.NFKD);
152 input = marks.matcher(input).replaceAll("");
153 }
154
155 input = Normalizer.normalize(input, Form.NFKC);
156
157 if (!allowUnicode) {
158 StringBuilder builder = new StringBuilder();
159 for (int index = 0; index < input.length(); index++) {
160 char car = input.charAt(index);
161 // displayable chars in ASCII are in the range 32<->255,
162 // except DEL (127)
163 if (car >= 32 && car <= 255 && car != 127) {
164 builder.append(car);
165 }
166 }
167 input = builder.toString();
168 }
169
170 return input;
171 }
172
173 /**
174 * Convert between time in milliseconds to {@link String} in a "static" way
175 * (to exchange data over the wire, for instance).
176 *
177 * @param time
178 * the time in milliseconds
179 *
180 * @return the time as a {@link String}
181 */
182 static public String fromTime(long time) {
183 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
184 return sdf.format(new Date(time));
185 }
186
187 /**
188 * Convert between time as a {@link String} to milliseconds in a "static"
189 * way (to exchange data over the wire, for instance).
190 *
191 * @param time
192 * the time as a {@link String}
193 *
194 * @return the time in milliseconds
195 */
196 static public long toTime(String display) {
197 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
198 try {
199 return sdf.parse(display).getTime();
200 } catch (ParseException e) {
201 return -1;
202 }
203 }
204
205 /**
206 * Convert the given {@link Image} object into a Base64 representation of
207 * the same {@link Image}. object.
208 *
209 * @param image
210 * the {@link Image} object to convert
211 *
212 * @return the Base64 representation
213 *
214 * @throws IOException
215 * in case of IO error
216 */
217 static public String fromImage(BufferedImage image) throws IOException {
218 String imageString = null;
219 ByteArrayOutputStream out = new ByteArrayOutputStream();
220
221 ImageIO.write(image, "jpeg", out);
222 byte[] imageBytes = out.toByteArray();
223
224 imageString = new String(Base64.encodeBytes(imageBytes));
225
226 out.close();
227
228 return imageString;
229 }
230
231 /**
232 * Convert the given {@link File} image into a Base64 representation of the
233 * same {@link File}.
234 *
235 * @param file
236 * the {@link File} image to convert
237 *
238 * @return the Base64 representation
239 *
240 * @throws IOException
241 * in case of IO error
242 */
243 static public String fromStream(InputStream in) throws IOException {
244 String fileString = null;
245 ByteArrayOutputStream out = new ByteArrayOutputStream();
246
247 byte[] buf = new byte[8192];
248
249 int c = 0;
250 while ((c = in.read(buf, 0, buf.length)) > 0) {
251 out.write(buf, 0, c);
252 }
253 out.flush();
254 in.close();
255
256 fileString = new String(Base64.encodeBytes(out.toByteArray()));
257 out.close();
258
259 return fileString;
260 }
261
262 /**
263 * Convert the given Base64 representation of an image into an {@link Image}
264 * object.
265 *
266 * @param b64data
267 * the {@link Image} in Base64 format
268 *
269 * @return the {@link Image} object
270 *
271 * @throws IOException
272 * in case of IO error
273 */
274 static public BufferedImage toImage(String b64data) throws IOException {
275 ByteArrayInputStream in = new ByteArrayInputStream(
276 Base64.decode(b64data));
277 return toImage(in);
278 }
279
280 /**
281 * Convert the given {@link InputStream} (which must allow calls to
282 * {@link InputStream#reset()}) into an {@link Image} object.
283 *
284 * @param in
285 * the 'resetable' {@link InputStream}
286 *
287 * @return the {@link Image} object
288 *
289 * @throws IOException
290 * in case of IO error
291 */
292 static public BufferedImage toImage(InputStream in) throws IOException {
293 int orientation;
294 try {
295 orientation = getExifTransorm(in);
296 } catch (Exception e) {
297 // no EXIF transform, ok
298 orientation = -1;
299 }
300
301 in.reset();
302 BufferedImage image = ImageIO.read(in);
303
304 if (image == null) {
305 throw new IOException("Failed to convert input to image");
306 }
307
308 // Note: this code has been found on internet;
309 // thank you anonymous coder.
310 int width = image.getWidth();
311 int height = image.getHeight();
312 AffineTransform affineTransform = new AffineTransform();
313
314 switch (orientation) {
315 case 1:
316 break;
317 case 2: // Flip X
318 affineTransform.scale(-1.0, 1.0);
319 affineTransform.translate(-width, 0);
320 break;
321 case 3: // PI rotation
322 affineTransform.translate(width, height);
323 affineTransform.rotate(Math.PI);
324 break;
325 case 4: // Flip Y
326 affineTransform.scale(1.0, -1.0);
327 affineTransform.translate(0, -height);
328 break;
329 case 5: // - PI/2 and Flip X
330 affineTransform.rotate(-Math.PI / 2);
331 affineTransform.scale(-1.0, 1.0);
332 break;
333 case 6: // -PI/2 and -width
334 affineTransform.translate(height, 0);
335 affineTransform.rotate(Math.PI / 2);
336 break;
337 case 7: // PI/2 and Flip
338 affineTransform.scale(-1.0, 1.0);
339 affineTransform.translate(-height, 0);
340 affineTransform.translate(0, width);
341 affineTransform.rotate(3 * Math.PI / 2);
342 break;
343 case 8: // PI / 2
344 affineTransform.translate(0, width);
345 affineTransform.rotate(3 * Math.PI / 2);
346 break;
347 default:
348 affineTransform = null;
349 break;
350 }
351
352 if (affineTransform != null) {
353 AffineTransformOp affineTransformOp = new AffineTransformOp(
354 affineTransform, AffineTransformOp.TYPE_BILINEAR);
355
356 BufferedImage transformedImage = new BufferedImage(height, width,
357 image.getType());
358 transformedImage = affineTransformOp
359 .filter(image, transformedImage);
360
361 image = transformedImage;
362 }
363 //
364
365 return image;
366 }
367
368 /**
369 * Return a hash of the given {@link String}.
370 *
371 * @param input
372 * the input data
373 *
374 * @return the hash
375 */
376 static public String getHash(String input) {
377 try {
378 MessageDigest md = MessageDigest.getInstance("MD5");
379 md.update(input.getBytes());
380 byte byteData[] = md.digest();
381
382 StringBuffer hexString = new StringBuffer();
383 for (int i = 0; i < byteData.length; i++) {
384 String hex = Integer.toHexString(0xff & byteData[i]);
385 if (hex.length() == 1)
386 hexString.append('0');
387 hexString.append(hex);
388 }
389
390 return hexString.toString();
391 } catch (NoSuchAlgorithmException e) {
392 return input;
393 }
394 }
395
396 /**
397 * Return the EXIF transformation flag of this image if any.
398 *
399 * <p>
400 * Note: this code has been found on internet; thank you anonymous coder.
401 * </p>
402 *
403 * @param in
404 * the data {@link InputStream}
405 *
406 * @return the transformation flag if any
407 *
408 * @throws IOException
409 * in case of IO error
410 */
411 static private int getExifTransorm(InputStream in) throws IOException {
412 int[] exif_data = new int[100];
413 int set_flag = 0;
414 int is_motorola = 0;
415
416 /* Read File head, check for JPEG SOI + Exif APP1 */
417 for (int i = 0; i < 4; i++)
418 exif_data[i] = in.read();
419
420 if (exif_data[0] != 0xFF || exif_data[1] != 0xD8
421 || exif_data[2] != 0xFF || exif_data[3] != 0xE1)
422 return -2;
423
424 /* Get the marker parameter length count */
425 int length = (in.read() << 8 | in.read());
426
427 /* Length includes itself, so must be at least 2 */
428 /* Following Exif data length must be at least 6 */
429 if (length < 8)
430 return -1;
431 length -= 8;
432 /* Read Exif head, check for "Exif" */
433 for (int i = 0; i < 6; i++)
434 exif_data[i] = in.read();
435
436 if (exif_data[0] != 0x45 || exif_data[1] != 0x78
437 || exif_data[2] != 0x69 || exif_data[3] != 0x66
438 || exif_data[4] != 0 || exif_data[5] != 0)
439 return -1;
440
441 /* Read Exif body */
442 length = length > exif_data.length ? exif_data.length : length;
443 for (int i = 0; i < length; i++)
444 exif_data[i] = in.read();
445
446 if (length < 12)
447 return -1; /* Length of an IFD entry */
448
449 /* Discover byte order */
450 if (exif_data[0] == 0x49 && exif_data[1] == 0x49)
451 is_motorola = 0;
452 else if (exif_data[0] == 0x4D && exif_data[1] == 0x4D)
453 is_motorola = 1;
454 else
455 return -1;
456
457 /* Check Tag Mark */
458 if (is_motorola == 1) {
459 if (exif_data[2] != 0)
460 return -1;
461 if (exif_data[3] != 0x2A)
462 return -1;
463 } else {
464 if (exif_data[3] != 0)
465 return -1;
466 if (exif_data[2] != 0x2A)
467 return -1;
468 }
469
470 /* Get first IFD offset (offset to IFD0) */
471 int offset;
472 if (is_motorola == 1) {
473 if (exif_data[4] != 0)
474 return -1;
475 if (exif_data[5] != 0)
476 return -1;
477 offset = exif_data[6];
478 offset <<= 8;
479 offset += exif_data[7];
480 } else {
481 if (exif_data[7] != 0)
482 return -1;
483 if (exif_data[6] != 0)
484 return -1;
485 offset = exif_data[5];
486 offset <<= 8;
487 offset += exif_data[4];
488 }
489 if (offset > length - 2)
490 return -1; /* check end of data segment */
491
492 /* Get the number of directory entries contained in this IFD */
493 int number_of_tags;
494 if (is_motorola == 1) {
495 number_of_tags = exif_data[offset];
496 number_of_tags <<= 8;
497 number_of_tags += exif_data[offset + 1];
498 } else {
499 number_of_tags = exif_data[offset + 1];
500 number_of_tags <<= 8;
501 number_of_tags += exif_data[offset];
502 }
503 if (number_of_tags == 0)
504 return -1;
505 offset += 2;
506
507 /* Search for Orientation Tag in IFD0 */
508 for (;;) {
509 if (offset > length - 12)
510 return -1; /* check end of data segment */
511 /* Get Tag number */
512 int tagnum;
513 if (is_motorola == 1) {
514 tagnum = exif_data[offset];
515 tagnum <<= 8;
516 tagnum += exif_data[offset + 1];
517 } else {
518 tagnum = exif_data[offset + 1];
519 tagnum <<= 8;
520 tagnum += exif_data[offset];
521 }
522 if (tagnum == 0x0112)
523 break; /* found Orientation Tag */
524 if (--number_of_tags == 0)
525 return -1;
526 offset += 12;
527 }
528
529 /* Get the Orientation value */
530 if (is_motorola == 1) {
531 if (exif_data[offset + 8] != 0)
532 return -1;
533 set_flag = exif_data[offset + 9];
534 } else {
535 if (exif_data[offset + 9] != 0)
536 return -1;
537 set_flag = exif_data[offset + 8];
538 }
539 if (set_flag > 8)
540 return -1;
541
542 return set_flag;
543 }
544
545 /**
546 * Remove the HTML content from the given input, and un-html-ize the rest.
547 *
548 * @param html
549 * the HTML-encoded content
550 *
551 * @return the HTML-free equivalent content
552 */
553 public static String unhtml(String html) {
554 StringBuilder builder = new StringBuilder();
555
556 int inTag = 0;
557 for (char car : html.toCharArray()) {
558 if (car == '<') {
559 inTag++;
560 } else if (car == '>') {
561 inTag--;
562 } else if (inTag <= 0) {
563 builder.append(car);
564 }
565 }
566
567 return HtmlEscape.unescapeHtml(builder.toString());
568 }
569
570 /**
571 * Escape the given {@link String} so it can be used in XML, as content.
572 *
573 * @param input
574 * the input {@link String}
575 *
576 * @return the escaped {@link String}
577 */
578 public static String xmlEscape(String input) {
579 if (input == null) {
580 return "";
581 }
582
583 return HtmlEscape.escapeHtml(input,
584 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
585 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
586 }
587
588 /**
589 * Escape the given {@link String} so it can be used in XML, as text content
590 * inside double-quotes.
591 *
592 * @param input
593 * the input {@link String}
594 *
595 * @return the escaped {@link String}
596 */
597 public static String xmlEscapeQuote(String input) {
598 if (input == null) {
599 return "";
600 }
601
602 return HtmlEscape.escapeHtml(input,
603 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
604 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
605 }
606 }