af69845b1a6c416f55e162b506c49b7ad2d938cf
[nikiroo-utils.git] / src / be / nikiroo / utils / StringUtils.java
1 package be.nikiroo.utils;
2
3 import java.awt.Image;
4 import java.awt.image.BufferedImage;
5 import java.io.ByteArrayInputStream;
6 import java.io.ByteArrayOutputStream;
7 import java.io.File;
8 import java.io.IOException;
9 import java.io.InputStream;
10 import java.security.MessageDigest;
11 import java.security.NoSuchAlgorithmException;
12 import java.text.Normalizer;
13 import java.text.Normalizer.Form;
14 import java.text.ParseException;
15 import java.text.SimpleDateFormat;
16 import java.util.Date;
17 import java.util.Scanner;
18 import java.util.regex.Pattern;
19
20 import javax.imageio.ImageIO;
21
22 import org.unbescape.html.HtmlEscape;
23 import org.unbescape.html.HtmlEscapeLevel;
24 import org.unbescape.html.HtmlEscapeType;
25
26 /**
27 * This class offer some utilities based around {@link String}s.
28 *
29 * @author niki
30 */
31 public class StringUtils {
32 /**
33 * This enum type will decide the alignment of a {@link String} when padding
34 * is applied or if there is enough horizontal space for it to be aligned.
35 */
36 public enum Alignment {
37 /** Aligned at left. */
38 Beginning,
39 /** Centered. */
40 Center,
41 /** Aligned at right. */
42 End
43 }
44
45 static private Pattern marks = Pattern
46 .compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
47
48 /**
49 * Fix the size of the given {@link String} either with space-padding or by
50 * shortening it.
51 *
52 * @param text
53 * the {@link String} to fix
54 * @param width
55 * the size of the resulting {@link String} or -1 for a noop
56 *
57 * @return the resulting {@link String} of size <i>size</i>
58 */
59 static public String padString(String text, int width) {
60 return padString(text, width, true, Alignment.Beginning);
61 }
62
63 /**
64 * Fix the size of the given {@link String} either with space-padding or by
65 * optionally shortening it.
66 *
67 * @param text
68 * the {@link String} to fix
69 * @param width
70 * the size of the resulting {@link String} if the text fits or
71 * if cut is TRUE or -1 for a noop
72 * @param cut
73 * cut the {@link String} shorter if needed
74 * @param align
75 * align the {@link String} in this position if we have enough
76 * space
77 *
78 * @return the resulting {@link String} of size <i>size</i> minimum
79 */
80 static public String padString(String text, int width, boolean cut,
81 Alignment align) {
82
83 if (width >= 0) {
84 if (text == null)
85 text = "";
86
87 int diff = width - text.length();
88
89 if (diff < 0) {
90 if (cut)
91 text = text.substring(0, width);
92 } else if (diff > 0) {
93 if (diff < 2 && align != Alignment.End)
94 align = Alignment.Beginning;
95
96 switch (align) {
97 case Beginning:
98 text = text + new String(new char[diff]).replace('\0', ' ');
99 break;
100 case End:
101 text = new String(new char[diff]).replace('\0', ' ') + text;
102 break;
103 case Center:
104 default:
105 int pad1 = (diff) / 2;
106 int pad2 = (diff + 1) / 2;
107 text = new String(new char[pad1]).replace('\0', ' ') + text
108 + new String(new char[pad2]).replace('\0', ' ');
109 break;
110 }
111 }
112 }
113
114 return text;
115 }
116
117 /**
118 * Sanitise the given input to make it more Terminal-friendly by removing
119 * combining characters.
120 *
121 * @param input
122 * the input to sanitise
123 * @param allowUnicode
124 * allow Unicode or only allow ASCII Latin characters
125 *
126 * @return the sanitised {@link String}
127 */
128 static public String sanitize(String input, boolean allowUnicode) {
129 return sanitize(input, allowUnicode, !allowUnicode);
130 }
131
132 /**
133 * Sanitise the given input to make it more Terminal-friendly by removing
134 * combining characters.
135 *
136 * @param input
137 * the input to sanitise
138 * @param allowUnicode
139 * allow Unicode or only allow ASCII Latin characters
140 * @param removeAllAccents
141 * TRUE to replace all accentuated characters by their non
142 * accentuated counter-parts
143 *
144 * @return the sanitised {@link String}
145 */
146 static public String sanitize(String input, boolean allowUnicode,
147 boolean removeAllAccents) {
148
149 if (removeAllAccents) {
150 input = Normalizer.normalize(input, Form.NFKD);
151 input = marks.matcher(input).replaceAll("");
152 }
153
154 input = Normalizer.normalize(input, Form.NFKC);
155
156 if (!allowUnicode) {
157 StringBuilder builder = new StringBuilder();
158 for (int index = 0; index < input.length(); index++) {
159 char car = input.charAt(index);
160 // displayable chars in ASCII are in the range 32<->255,
161 // except DEL (127)
162 if (car >= 32 && car <= 255 && car != 127) {
163 builder.append(car);
164 }
165 }
166 input = builder.toString();
167 }
168
169 return input;
170 }
171
172 /**
173 * Convert between time in milliseconds to {@link String} in a "static" way
174 * (to exchange data over the wire, for instance).
175 *
176 * @param time
177 * the time in milliseconds
178 *
179 * @return the time as a {@link String}
180 */
181 static public String fromTime(long time) {
182 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
183 return sdf.format(new Date(time));
184 }
185
186 /**
187 * Convert between time as a {@link String} to milliseconds in a "static"
188 * way (to exchange data over the wire, for instance).
189 *
190 * @param displayTime
191 * the time as a {@link String}
192 *
193 * @return the time in milliseconds
194 */
195 static public long toTime(String displayTime) {
196 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
197 try {
198 return sdf.parse(displayTime).getTime();
199 } catch (ParseException e) {
200 return -1;
201 }
202 }
203
204 /**
205 * Convert the given {@link Image} object into a Base64 representation of
206 * the same {@link Image}. object.
207 *
208 * @param image
209 * the {@link Image} object to convert
210 *
211 * @return the Base64 representation
212 *
213 * @throws IOException
214 * in case of IO error
215 */
216 static public String fromImage(BufferedImage image) throws IOException {
217 return fromImage(image, null);
218 }
219
220 /**
221 * Convert the given {@link Image} object into a Base64 representation of
222 * the same {@link Image}. object.
223 *
224 * @param image
225 * the {@link Image} object to convert
226 * @param format
227 * the image format to use to serialise it (default is PNG)
228 *
229 * @return the Base64 representation
230 *
231 * @throws IOException
232 * in case of IO error
233 */
234 static public String fromImage(BufferedImage image, String format)
235 throws IOException {
236 if (format == null) {
237 format = "png";
238 }
239
240 String imageString = null;
241 ByteArrayOutputStream out = new ByteArrayOutputStream();
242
243 ImageIO.write(image, format, out);
244 byte[] imageBytes = out.toByteArray();
245
246 imageString = new String(Base64.encodeBytes(imageBytes));
247
248 out.close();
249
250 return imageString;
251 }
252
253 /**
254 * Convert the given image into a Base64 representation of the same
255 * {@link File}.
256 *
257 * @param in
258 * the image to convert
259 *
260 * @return the Base64 representation
261 *
262 * @throws IOException
263 * in case of IO error
264 */
265 static public String fromStream(InputStream in) throws IOException {
266 String fileString = null;
267 ByteArrayOutputStream out = new ByteArrayOutputStream();
268
269 byte[] buf = new byte[8192];
270
271 int c = 0;
272 while ((c = in.read(buf, 0, buf.length)) > 0) {
273 out.write(buf, 0, c);
274 }
275 out.flush();
276 in.close();
277
278 fileString = new String(Base64.encodeBytes(out.toByteArray()));
279 out.close();
280
281 return fileString;
282 }
283
284 /**
285 * Convert the given Base64 representation of an image into an {@link Image}
286 * object.
287 *
288 * @param b64data
289 * the {@link Image} in Base64 format
290 *
291 * @return the {@link Image} object
292 *
293 * @throws IOException
294 * in case of IO error
295 */
296 static public BufferedImage toImage(String b64data) throws IOException {
297 ByteArrayInputStream in = new ByteArrayInputStream(
298 Base64.decode(b64data));
299 return IOUtils.toImage(in);
300 }
301
302 /**
303 * Return a hash of the given {@link String}.
304 *
305 * @param input
306 * the input data
307 *
308 * @return the hash
309 */
310 static public String getHash(String input) {
311 try {
312 MessageDigest md = MessageDigest.getInstance("MD5");
313 md.update(input.getBytes());
314 byte byteData[] = md.digest();
315
316 StringBuffer hexString = new StringBuffer();
317 for (int i = 0; i < byteData.length; i++) {
318 String hex = Integer.toHexString(0xff & byteData[i]);
319 if (hex.length() == 1)
320 hexString.append('0');
321 hexString.append(hex);
322 }
323
324 return hexString.toString();
325 } catch (NoSuchAlgorithmException e) {
326 return input;
327 }
328 }
329
330 /**
331 * Remove the HTML content from the given input, and un-html-ize the rest.
332 *
333 * @param html
334 * the HTML-encoded content
335 *
336 * @return the HTML-free equivalent content
337 */
338 public static String unhtml(String html) {
339 StringBuilder builder = new StringBuilder();
340
341 int inTag = 0;
342 for (char car : html.toCharArray()) {
343 if (car == '<') {
344 inTag++;
345 } else if (car == '>') {
346 inTag--;
347 } else if (inTag <= 0) {
348 builder.append(car);
349 }
350 }
351
352 char nbsp = ' '; // non-breakable space (a special char)
353 char space = ' ';
354 return HtmlEscape.unescapeHtml(builder.toString()).replace(nbsp, space);
355 }
356
357 /**
358 * Escape the given {@link String} so it can be used in XML, as content.
359 *
360 * @param input
361 * the input {@link String}
362 *
363 * @return the escaped {@link String}
364 */
365 public static String xmlEscape(String input) {
366 if (input == null) {
367 return "";
368 }
369
370 return HtmlEscape.escapeHtml(input,
371 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
372 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
373 }
374
375 /**
376 * Escape the given {@link String} so it can be used in XML, as text content
377 * inside double-quotes.
378 *
379 * @param input
380 * the input {@link String}
381 *
382 * @return the escaped {@link String}
383 */
384 public static String xmlEscapeQuote(String input) {
385 if (input == null) {
386 return "";
387 }
388
389 return HtmlEscape.escapeHtml(input,
390 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
391 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
392 }
393
394 public static String zip64(String data) {
395 try {
396 return Base64.encodeBytes(data.getBytes(), Base64.GZIP);
397 } catch (IOException e) {
398 e.printStackTrace();
399 return null;
400 }
401 }
402
403 public static String unzip64(String data) throws IOException {
404 ByteArrayInputStream in = new ByteArrayInputStream(Base64.decode(data,
405 Base64.GZIP));
406
407 Scanner scan = new Scanner(in);
408 scan.useDelimiter("\\A");
409 try {
410 return scan.next();
411 } finally {
412 scan.close();
413 }
414 }
415 }