Version 1.4.2: bugfixes (unhtml, deltree)
[nikiroo-utils.git] / src / be / nikiroo / utils / StringUtils.java
CommitLineData
ec1f3444
NR
1package be.nikiroo.utils;
2
3import java.awt.Image;
ec1f3444
NR
4import java.awt.image.BufferedImage;
5import java.io.ByteArrayInputStream;
6import java.io.ByteArrayOutputStream;
7import java.io.File;
8import java.io.IOException;
9import java.io.InputStream;
10import java.security.MessageDigest;
11import java.security.NoSuchAlgorithmException;
12import java.text.Normalizer;
13import java.text.Normalizer.Form;
14import java.text.ParseException;
15import java.text.SimpleDateFormat;
ec1f3444
NR
16import java.util.Date;
17import java.util.regex.Pattern;
18
19import javax.imageio.ImageIO;
20
21import org.unbescape.html.HtmlEscape;
22import org.unbescape.html.HtmlEscapeLevel;
23import org.unbescape.html.HtmlEscapeType;
24
25/**
26 * This class offer some utilities based around {@link String}s.
27 *
28 * @author niki
29 */
30public class StringUtils {
31 /**
32 * This enum type will decide the alignment of a {@link String} when padding
33 * is applied or if there is enough horizontal space for it to be aligned.
34 */
35 public enum Alignment {
36 /** Aligned at left. */
37 Beginning,
38 /** Centered. */
39 Center,
40 /** Aligned at right. */
41 End
42 }
43
44 static private Pattern marks = Pattern
45 .compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
46
47 /**
48 * Fix the size of the given {@link String} either with space-padding or by
49 * shortening it.
50 *
51 * @param text
52 * the {@link String} to fix
53 * @param width
54 * the size of the resulting {@link String} or -1 for a noop
55 *
56 * @return the resulting {@link String} of size <i>size</i>
57 */
58 static public String padString(String text, int width) {
59 return padString(text, width, true, Alignment.Beginning);
60 }
61
62 /**
63 * Fix the size of the given {@link String} either with space-padding or by
64 * optionally shortening it.
65 *
66 * @param text
67 * the {@link String} to fix
68 * @param width
69 * the size of the resulting {@link String} if the text fits or
70 * if cut is TRUE or -1 for a noop
71 * @param cut
72 * cut the {@link String} shorter if needed
73 * @param align
74 * align the {@link String} in this position if we have enough
75 * space
76 *
77 * @return the resulting {@link String} of size <i>size</i> minimum
78 */
79 static public String padString(String text, int width, boolean cut,
80 Alignment align) {
81
82 if (width >= 0) {
83 if (text == null)
84 text = "";
85
86 int diff = width - text.length();
87
88 if (diff < 0) {
89 if (cut)
90 text = text.substring(0, width);
91 } else if (diff > 0) {
92 if (diff < 2 && align != Alignment.End)
93 align = Alignment.Beginning;
94
95 switch (align) {
96 case Beginning:
97 text = text + new String(new char[diff]).replace('\0', ' ');
98 break;
99 case End:
100 text = new String(new char[diff]).replace('\0', ' ') + text;
101 break;
102 case Center:
103 default:
104 int pad1 = (diff) / 2;
105 int pad2 = (diff + 1) / 2;
106 text = new String(new char[pad1]).replace('\0', ' ') + text
107 + new String(new char[pad2]).replace('\0', ' ');
108 break;
109 }
110 }
111 }
112
113 return text;
114 }
115
116 /**
117 * Sanitise the given input to make it more Terminal-friendly by removing
118 * combining characters.
119 *
120 * @param input
121 * the input to sanitise
122 * @param allowUnicode
123 * allow Unicode or only allow ASCII Latin characters
124 *
125 * @return the sanitised {@link String}
126 */
127 static public String sanitize(String input, boolean allowUnicode) {
128 return sanitize(input, allowUnicode, !allowUnicode);
129 }
130
131 /**
132 * Sanitise the given input to make it more Terminal-friendly by removing
133 * combining characters.
134 *
135 * @param input
136 * the input to sanitise
137 * @param allowUnicode
138 * allow Unicode or only allow ASCII Latin characters
139 * @param removeAllAccents
140 * TRUE to replace all accentuated characters by their non
141 * accentuated counter-parts
142 *
143 * @return the sanitised {@link String}
144 */
145 static public String sanitize(String input, boolean allowUnicode,
146 boolean removeAllAccents) {
147
148 if (removeAllAccents) {
149 input = Normalizer.normalize(input, Form.NFKD);
150 input = marks.matcher(input).replaceAll("");
151 }
152
153 input = Normalizer.normalize(input, Form.NFKC);
154
155 if (!allowUnicode) {
156 StringBuilder builder = new StringBuilder();
157 for (int index = 0; index < input.length(); index++) {
158 char car = input.charAt(index);
159 // displayable chars in ASCII are in the range 32<->255,
160 // except DEL (127)
161 if (car >= 32 && car <= 255 && car != 127) {
162 builder.append(car);
163 }
164 }
165 input = builder.toString();
166 }
167
168 return input;
169 }
170
171 /**
172 * Convert between time in milliseconds to {@link String} in a "static" way
173 * (to exchange data over the wire, for instance).
174 *
175 * @param time
176 * the time in milliseconds
177 *
178 * @return the time as a {@link String}
179 */
180 static public String fromTime(long time) {
181 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
182 return sdf.format(new Date(time));
183 }
184
185 /**
186 * Convert between time as a {@link String} to milliseconds in a "static"
187 * way (to exchange data over the wire, for instance).
188 *
189 * @param time
190 * the time as a {@link String}
191 *
192 * @return the time in milliseconds
193 */
194 static public long toTime(String display) {
195 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
196 try {
197 return sdf.parse(display).getTime();
198 } catch (ParseException e) {
199 return -1;
200 }
201 }
202
203 /**
204 * Convert the given {@link Image} object into a Base64 representation of
205 * the same {@link Image}. object.
206 *
207 * @param image
208 * the {@link Image} object to convert
209 *
210 * @return the Base64 representation
211 *
212 * @throws IOException
213 * in case of IO error
214 */
215 static public String fromImage(BufferedImage image) throws IOException {
216 String imageString = null;
217 ByteArrayOutputStream out = new ByteArrayOutputStream();
218
219 ImageIO.write(image, "jpeg", out);
220 byte[] imageBytes = out.toByteArray();
221
72c32e88 222 imageString = new String(Base64.encodeBytes(imageBytes));
ec1f3444
NR
223
224 out.close();
225
226 return imageString;
227 }
228
229 /**
230 * Convert the given {@link File} image into a Base64 representation of the
231 * same {@link File}.
232 *
233 * @param file
234 * the {@link File} image to convert
235 *
236 * @return the Base64 representation
237 *
238 * @throws IOException
239 * in case of IO error
240 */
241 static public String fromStream(InputStream in) throws IOException {
242 String fileString = null;
243 ByteArrayOutputStream out = new ByteArrayOutputStream();
244
245 byte[] buf = new byte[8192];
246
247 int c = 0;
248 while ((c = in.read(buf, 0, buf.length)) > 0) {
249 out.write(buf, 0, c);
250 }
251 out.flush();
252 in.close();
253
72c32e88 254 fileString = new String(Base64.encodeBytes(out.toByteArray()));
ec1f3444
NR
255 out.close();
256
257 return fileString;
258 }
259
260 /**
261 * Convert the given Base64 representation of an image into an {@link Image}
262 * object.
263 *
264 * @param b64data
265 * the {@link Image} in Base64 format
266 *
267 * @return the {@link Image} object
268 *
269 * @throws IOException
270 * in case of IO error
271 */
272 static public BufferedImage toImage(String b64data) throws IOException {
72c32e88
NR
273 ByteArrayInputStream in = new ByteArrayInputStream(
274 Base64.decode(b64data));
b607df60 275 return IOUtils.toImage(in);
ec1f3444
NR
276 }
277
278 /**
279 * Return a hash of the given {@link String}.
280 *
281 * @param input
282 * the input data
283 *
284 * @return the hash
285 */
286 static public String getHash(String input) {
287 try {
288 MessageDigest md = MessageDigest.getInstance("MD5");
289 md.update(input.getBytes());
290 byte byteData[] = md.digest();
291
292 StringBuffer hexString = new StringBuffer();
293 for (int i = 0; i < byteData.length; i++) {
294 String hex = Integer.toHexString(0xff & byteData[i]);
295 if (hex.length() == 1)
296 hexString.append('0');
297 hexString.append(hex);
298 }
299
300 return hexString.toString();
301 } catch (NoSuchAlgorithmException e) {
302 return input;
303 }
304 }
305
ec1f3444
NR
306 /**
307 * Remove the HTML content from the given input, and un-html-ize the rest.
308 *
309 * @param html
310 * the HTML-encoded content
311 *
312 * @return the HTML-free equivalent content
313 */
314 public static String unhtml(String html) {
315 StringBuilder builder = new StringBuilder();
316
317 int inTag = 0;
318 for (char car : html.toCharArray()) {
319 if (car == '<') {
320 inTag++;
321 } else if (car == '>') {
322 inTag--;
323 } else if (inTag <= 0) {
324 builder.append(car);
325 }
326 }
327
7ee9568b
NR
328 char nbsp = ' '; // non-breakable space (a special char)
329 char space = ' ';
330 return HtmlEscape.unescapeHtml(builder.toString()).replace(nbsp, space);
ec1f3444
NR
331 }
332
333 /**
334 * Escape the given {@link String} so it can be used in XML, as content.
335 *
336 * @param input
337 * the input {@link String}
338 *
339 * @return the escaped {@link String}
340 */
341 public static String xmlEscape(String input) {
342 if (input == null) {
343 return "";
344 }
345
346 return HtmlEscape.escapeHtml(input,
347 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
348 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
349 }
350
351 /**
352 * Escape the given {@link String} so it can be used in XML, as text content
353 * inside double-quotes.
354 *
355 * @param input
356 * the input {@link String}
357 *
358 * @return the escaped {@link String}
359 */
360 public static String xmlEscapeQuote(String input) {
361 if (input == null) {
362 return "";
363 }
364
365 return HtmlEscape.escapeHtml(input,
366 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
367 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
368 }
369}