dc40e878d9c68a3b8b25af2ba03011ef39f57f7c
[nikiroo-utils.git] / src / be / nikiroo / utils / StringUtils.java
1 package be.nikiroo.utils;
2
3 import java.awt.Image;
4 import java.awt.image.BufferedImage;
5 import java.io.ByteArrayInputStream;
6 import java.io.ByteArrayOutputStream;
7 import java.io.DataInputStream;
8 import java.io.File;
9 import java.io.IOException;
10 import java.io.InputStream;
11 import java.security.MessageDigest;
12 import java.security.NoSuchAlgorithmException;
13 import java.text.Normalizer;
14 import java.text.Normalizer.Form;
15 import java.text.ParseException;
16 import java.text.SimpleDateFormat;
17 import java.util.Date;
18 import java.util.Scanner;
19 import java.util.regex.Pattern;
20 import java.util.zip.ZipInputStream;
21
22 import javax.imageio.ImageIO;
23
24 import org.unbescape.html.HtmlEscape;
25 import org.unbescape.html.HtmlEscapeLevel;
26 import org.unbescape.html.HtmlEscapeType;
27
28 /**
29 * This class offer some utilities based around {@link String}s.
30 *
31 * @author niki
32 */
33 public class StringUtils {
34 /**
35 * This enum type will decide the alignment of a {@link String} when padding
36 * is applied or if there is enough horizontal space for it to be aligned.
37 */
38 public enum Alignment {
39 /** Aligned at left. */
40 Beginning,
41 /** Centered. */
42 Center,
43 /** Aligned at right. */
44 End
45 }
46
47 static private Pattern marks = Pattern
48 .compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
49
50 /**
51 * Fix the size of the given {@link String} either with space-padding or by
52 * shortening it.
53 *
54 * @param text
55 * the {@link String} to fix
56 * @param width
57 * the size of the resulting {@link String} or -1 for a noop
58 *
59 * @return the resulting {@link String} of size <i>size</i>
60 */
61 static public String padString(String text, int width) {
62 return padString(text, width, true, Alignment.Beginning);
63 }
64
65 /**
66 * Fix the size of the given {@link String} either with space-padding or by
67 * optionally shortening it.
68 *
69 * @param text
70 * the {@link String} to fix
71 * @param width
72 * the size of the resulting {@link String} if the text fits or
73 * if cut is TRUE or -1 for a noop
74 * @param cut
75 * cut the {@link String} shorter if needed
76 * @param align
77 * align the {@link String} in this position if we have enough
78 * space
79 *
80 * @return the resulting {@link String} of size <i>size</i> minimum
81 */
82 static public String padString(String text, int width, boolean cut,
83 Alignment align) {
84
85 if (width >= 0) {
86 if (text == null)
87 text = "";
88
89 int diff = width - text.length();
90
91 if (diff < 0) {
92 if (cut)
93 text = text.substring(0, width);
94 } else if (diff > 0) {
95 if (diff < 2 && align != Alignment.End)
96 align = Alignment.Beginning;
97
98 switch (align) {
99 case Beginning:
100 text = text + new String(new char[diff]).replace('\0', ' ');
101 break;
102 case End:
103 text = new String(new char[diff]).replace('\0', ' ') + text;
104 break;
105 case Center:
106 default:
107 int pad1 = (diff) / 2;
108 int pad2 = (diff + 1) / 2;
109 text = new String(new char[pad1]).replace('\0', ' ') + text
110 + new String(new char[pad2]).replace('\0', ' ');
111 break;
112 }
113 }
114 }
115
116 return text;
117 }
118
119 /**
120 * Sanitise the given input to make it more Terminal-friendly by removing
121 * combining characters.
122 *
123 * @param input
124 * the input to sanitise
125 * @param allowUnicode
126 * allow Unicode or only allow ASCII Latin characters
127 *
128 * @return the sanitised {@link String}
129 */
130 static public String sanitize(String input, boolean allowUnicode) {
131 return sanitize(input, allowUnicode, !allowUnicode);
132 }
133
134 /**
135 * Sanitise the given input to make it more Terminal-friendly by removing
136 * combining characters.
137 *
138 * @param input
139 * the input to sanitise
140 * @param allowUnicode
141 * allow Unicode or only allow ASCII Latin characters
142 * @param removeAllAccents
143 * TRUE to replace all accentuated characters by their non
144 * accentuated counter-parts
145 *
146 * @return the sanitised {@link String}
147 */
148 static public String sanitize(String input, boolean allowUnicode,
149 boolean removeAllAccents) {
150
151 if (removeAllAccents) {
152 input = Normalizer.normalize(input, Form.NFKD);
153 input = marks.matcher(input).replaceAll("");
154 }
155
156 input = Normalizer.normalize(input, Form.NFKC);
157
158 if (!allowUnicode) {
159 StringBuilder builder = new StringBuilder();
160 for (int index = 0; index < input.length(); index++) {
161 char car = input.charAt(index);
162 // displayable chars in ASCII are in the range 32<->255,
163 // except DEL (127)
164 if (car >= 32 && car <= 255 && car != 127) {
165 builder.append(car);
166 }
167 }
168 input = builder.toString();
169 }
170
171 return input;
172 }
173
174 /**
175 * Convert between time in milliseconds to {@link String} in a "static" way
176 * (to exchange data over the wire, for instance).
177 *
178 * @param time
179 * the time in milliseconds
180 *
181 * @return the time as a {@link String}
182 */
183 static public String fromTime(long time) {
184 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
185 return sdf.format(new Date(time));
186 }
187
188 /**
189 * Convert between time as a {@link String} to milliseconds in a "static"
190 * way (to exchange data over the wire, for instance).
191 *
192 * @param displayTime
193 * the time as a {@link String}
194 *
195 * @return the time in milliseconds
196 */
197 static public long toTime(String displayTime) {
198 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
199 try {
200 return sdf.parse(displayTime).getTime();
201 } catch (ParseException e) {
202 return -1;
203 }
204 }
205
206 /**
207 * Convert the given {@link Image} object into a Base64 representation of
208 * the same {@link Image}. object.
209 *
210 * @param image
211 * the {@link Image} object to convert
212 *
213 * @return the Base64 representation
214 *
215 * @throws IOException
216 * in case of IO error
217 */
218 static public String fromImage(BufferedImage image) throws IOException {
219 String imageString = null;
220 ByteArrayOutputStream out = new ByteArrayOutputStream();
221
222 ImageIO.write(image, "jpeg", out);
223 byte[] imageBytes = out.toByteArray();
224
225 imageString = new String(Base64.encodeBytes(imageBytes));
226
227 out.close();
228
229 return imageString;
230 }
231
232 /**
233 * Convert the given image into a Base64 representation of the same
234 * {@link File}.
235 *
236 * @param in
237 * the image to convert
238 *
239 * @return the Base64 representation
240 *
241 * @throws IOException
242 * in case of IO error
243 */
244 static public String fromStream(InputStream in) throws IOException {
245 String fileString = null;
246 ByteArrayOutputStream out = new ByteArrayOutputStream();
247
248 byte[] buf = new byte[8192];
249
250 int c = 0;
251 while ((c = in.read(buf, 0, buf.length)) > 0) {
252 out.write(buf, 0, c);
253 }
254 out.flush();
255 in.close();
256
257 fileString = new String(Base64.encodeBytes(out.toByteArray()));
258 out.close();
259
260 return fileString;
261 }
262
263 /**
264 * Convert the given Base64 representation of an image into an {@link Image}
265 * object.
266 *
267 * @param b64data
268 * the {@link Image} in Base64 format
269 *
270 * @return the {@link Image} object
271 *
272 * @throws IOException
273 * in case of IO error
274 */
275 static public BufferedImage toImage(String b64data) throws IOException {
276 ByteArrayInputStream in = new ByteArrayInputStream(
277 Base64.decode(b64data));
278 return IOUtils.toImage(in);
279 }
280
281 /**
282 * Return a hash of the given {@link String}.
283 *
284 * @param input
285 * the input data
286 *
287 * @return the hash
288 */
289 static public String getHash(String input) {
290 try {
291 MessageDigest md = MessageDigest.getInstance("MD5");
292 md.update(input.getBytes());
293 byte byteData[] = md.digest();
294
295 StringBuffer hexString = new StringBuffer();
296 for (int i = 0; i < byteData.length; i++) {
297 String hex = Integer.toHexString(0xff & byteData[i]);
298 if (hex.length() == 1)
299 hexString.append('0');
300 hexString.append(hex);
301 }
302
303 return hexString.toString();
304 } catch (NoSuchAlgorithmException e) {
305 return input;
306 }
307 }
308
309 /**
310 * Remove the HTML content from the given input, and un-html-ize the rest.
311 *
312 * @param html
313 * the HTML-encoded content
314 *
315 * @return the HTML-free equivalent content
316 */
317 public static String unhtml(String html) {
318 StringBuilder builder = new StringBuilder();
319
320 int inTag = 0;
321 for (char car : html.toCharArray()) {
322 if (car == '<') {
323 inTag++;
324 } else if (car == '>') {
325 inTag--;
326 } else if (inTag <= 0) {
327 builder.append(car);
328 }
329 }
330
331 char nbsp = ' '; // non-breakable space (a special char)
332 char space = ' ';
333 return HtmlEscape.unescapeHtml(builder.toString()).replace(nbsp, space);
334 }
335
336 /**
337 * Escape the given {@link String} so it can be used in XML, as content.
338 *
339 * @param input
340 * the input {@link String}
341 *
342 * @return the escaped {@link String}
343 */
344 public static String xmlEscape(String input) {
345 if (input == null) {
346 return "";
347 }
348
349 return HtmlEscape.escapeHtml(input,
350 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
351 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
352 }
353
354 /**
355 * Escape the given {@link String} so it can be used in XML, as text content
356 * inside double-quotes.
357 *
358 * @param input
359 * the input {@link String}
360 *
361 * @return the escaped {@link String}
362 */
363 public static String xmlEscapeQuote(String input) {
364 if (input == null) {
365 return "";
366 }
367
368 return HtmlEscape.escapeHtml(input,
369 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
370 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
371 }
372
373 public static String zip64(String data) {
374 try {
375 return Base64.encodeBytes(data.getBytes(), Base64.GZIP);
376 } catch (IOException e) {
377 e.printStackTrace();
378 return null;
379 }
380 }
381
382 public static String unzip64(String data) throws IOException {
383 ByteArrayInputStream in = new ByteArrayInputStream(Base64.decode(data,
384 Base64.GZIP));
385
386 Scanner scan = new Scanner(in);
387 scan.useDelimiter("\\A");
388 try {
389 return scan.next();
390 } finally {
391 scan.close();
392 }
393 }
394 }