9e5d654a8252ff3b2b94f4a6b610c016aa526f5c
[nikiroo-utils.git] / src / be / nikiroo / utils / StringUtils.java
1 package be.nikiroo.utils;
2
3 import java.io.ByteArrayInputStream;
4 import java.io.IOException;
5 import java.io.UnsupportedEncodingException;
6 import java.security.MessageDigest;
7 import java.security.NoSuchAlgorithmException;
8 import java.text.Normalizer;
9 import java.text.Normalizer.Form;
10 import java.text.ParseException;
11 import java.text.SimpleDateFormat;
12 import java.util.Date;
13 import java.util.List;
14 import java.util.Scanner;
15 import java.util.regex.Pattern;
16
17 import org.unbescape.html.HtmlEscape;
18 import org.unbescape.html.HtmlEscapeLevel;
19 import org.unbescape.html.HtmlEscapeType;
20
21 /**
22 * This class offer some utilities based around {@link String}s.
23 *
24 * @author niki
25 */
26 public class StringUtils {
27 /**
28 * This enum type will decide the alignment of a {@link String} when padding
29 * or justification is applied (if there is enough horizontal space for it
30 * to be aligned).
31 */
32 public enum Alignment {
33 /** Aligned at left. */
34 LEFT,
35 /** Centered. */
36 CENTER,
37 /** Aligned at right. */
38 RIGHT,
39 /** Full justified (to both left and right). */
40 JUSTIFY,
41
42 // Old Deprecated values:
43
44 /** DEPRECATED: please use LEFT. */
45 @Deprecated
46 Beginning,
47 /** DEPRECATED: please use CENTER. */
48 @Deprecated
49 Center,
50 /** DEPRECATED: please use RIGHT. */
51 @Deprecated
52 End;
53
54 /**
55 * Return the non-deprecated version of this enum if needed (or return
56 * self if not).
57 *
58 * @return the non-deprecated value
59 */
60 Alignment undeprecate() {
61 if (this == Beginning)
62 return LEFT;
63 if (this == Center)
64 return CENTER;
65 if (this == End)
66 return RIGHT;
67 return this;
68 }
69 }
70
71 static private Pattern marks = getMarks();
72
73 /**
74 * Fix the size of the given {@link String} either with space-padding or by
75 * shortening it.
76 *
77 * @param text
78 * the {@link String} to fix
79 * @param width
80 * the size of the resulting {@link String} or -1 for a noop
81 *
82 * @return the resulting {@link String} of size <i>size</i>
83 */
84 static public String padString(String text, int width) {
85 return padString(text, width, true, null);
86 }
87
88 /**
89 * Fix the size of the given {@link String} either with space-padding or by
90 * optionally shortening it.
91 *
92 * @param text
93 * the {@link String} to fix
94 * @param width
95 * the size of the resulting {@link String} if the text fits or
96 * if cut is TRUE or -1 for a noop
97 * @param cut
98 * cut the {@link String} shorter if needed
99 * @param align
100 * align the {@link String} in this position if we have enough
101 * space (default is Alignment.Beginning)
102 *
103 * @return the resulting {@link String} of size <i>size</i> minimum
104 */
105 static public String padString(String text, int width, boolean cut,
106 Alignment align) {
107
108 if (align == null) {
109 align = Alignment.LEFT;
110 }
111
112 align = align.undeprecate();
113
114 if (width >= 0) {
115 if (text == null)
116 text = "";
117
118 int diff = width - text.length();
119
120 if (diff < 0) {
121 if (cut)
122 text = text.substring(0, width);
123 } else if (diff > 0) {
124 if (diff < 2 && align != Alignment.RIGHT)
125 align = Alignment.LEFT;
126
127 switch (align) {
128 case RIGHT:
129 text = new String(new char[diff]).replace('\0', ' ') + text;
130 break;
131 case CENTER:
132 int pad1 = (diff) / 2;
133 int pad2 = (diff + 1) / 2;
134 text = new String(new char[pad1]).replace('\0', ' ') + text
135 + new String(new char[pad2]).replace('\0', ' ');
136 break;
137 case LEFT:
138 default:
139 text = text + new String(new char[diff]).replace('\0', ' ');
140 break;
141 }
142 }
143 }
144
145 return text;
146 }
147
148 /**
149 * Justify a text into width-sized (at the maximum) lines.
150 *
151 * @param text
152 * the {@link String} to justify
153 * @param width
154 * the maximum size of the resulting lines
155 *
156 * @return a list of justified text lines
157 */
158 static public List<String> justifyText(String text, int width) {
159 return justifyText(text, width, null);
160 }
161
162 /**
163 * Justify a text into width-sized (at the maximum) lines.
164 *
165 * @param text
166 * the {@link String} to justify
167 * @param width
168 * the maximum size of the resulting lines
169 * @param align
170 * align the lines in this position (default is
171 * Alignment.Beginning)
172 *
173 * @return a list of justified text lines
174 */
175 static public List<String> justifyText(String text, int width,
176 Alignment align) {
177 if (align == null) {
178 align = Alignment.LEFT;
179 }
180
181 align = align.undeprecate();
182
183 switch (align) {
184 case CENTER:
185 return StringJustifier.center(text, width);
186 case RIGHT:
187 return StringJustifier.right(text, width);
188 case JUSTIFY:
189 return StringJustifier.full(text, width);
190 case LEFT:
191 default:
192 return StringJustifier.left(text, width);
193 }
194 }
195
196 /**
197 * Sanitise the given input to make it more Terminal-friendly by removing
198 * combining characters.
199 *
200 * @param input
201 * the input to sanitise
202 * @param allowUnicode
203 * allow Unicode or only allow ASCII Latin characters
204 *
205 * @return the sanitised {@link String}
206 */
207 static public String sanitize(String input, boolean allowUnicode) {
208 return sanitize(input, allowUnicode, !allowUnicode);
209 }
210
211 /**
212 * Sanitise the given input to make it more Terminal-friendly by removing
213 * combining characters.
214 *
215 * @param input
216 * the input to sanitise
217 * @param allowUnicode
218 * allow Unicode or only allow ASCII Latin characters
219 * @param removeAllAccents
220 * TRUE to replace all accentuated characters by their non
221 * accentuated counter-parts
222 *
223 * @return the sanitised {@link String}
224 */
225 static public String sanitize(String input, boolean allowUnicode,
226 boolean removeAllAccents) {
227
228 if (removeAllAccents) {
229 input = Normalizer.normalize(input, Form.NFKD);
230 if (marks != null) {
231 input = marks.matcher(input).replaceAll("");
232 }
233 }
234
235 input = Normalizer.normalize(input, Form.NFKC);
236
237 if (!allowUnicode) {
238 StringBuilder builder = new StringBuilder();
239 for (int index = 0; index < input.length(); index++) {
240 char car = input.charAt(index);
241 // displayable chars in ASCII are in the range 32<->255,
242 // except DEL (127)
243 if (car >= 32 && car <= 255 && car != 127) {
244 builder.append(car);
245 }
246 }
247 input = builder.toString();
248 }
249
250 return input;
251 }
252
253 /**
254 * Convert between the time in milliseconds to a {@link String} in a "fixed"
255 * way (to exchange data over the wire, for instance).
256 * <p>
257 * Precise to the second.
258 *
259 * @param time
260 * the specified number of milliseconds since the standard base
261 * time known as "the epoch", namely January 1, 1970, 00:00:00
262 * GMT
263 *
264 * @return the time as a {@link String}
265 */
266 static public String fromTime(long time) {
267 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
268 return sdf.format(new Date(time));
269 }
270
271 /**
272 * Convert between the time as a {@link String} to milliseconds in a "fixed"
273 * way (to exchange data over the wire, for instance).
274 * <p>
275 * Precise to the second.
276 *
277 * @param displayTime
278 * the time as a {@link String}
279 *
280 * @return the number of milliseconds since the standard base time known as
281 * "the epoch", namely January 1, 1970, 00:00:00 GMT, or -1 in case
282 * of error
283 *
284 * @throws ParseException
285 * in case of parse error
286 */
287 static public long toTime(String displayTime) throws ParseException {
288 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
289 return sdf.parse(displayTime).getTime();
290 }
291
292 /**
293 * Return a hash of the given {@link String}.
294 *
295 * @param input
296 * the input data
297 *
298 * @return the hash
299 */
300 static public String getMd5Hash(String input) {
301 try {
302 MessageDigest md = MessageDigest.getInstance("MD5");
303 md.update(input.getBytes("UTF-8"));
304 byte byteData[] = md.digest();
305
306 StringBuffer hexString = new StringBuffer();
307 for (int i = 0; i < byteData.length; i++) {
308 String hex = Integer.toHexString(0xff & byteData[i]);
309 if (hex.length() == 1)
310 hexString.append('0');
311 hexString.append(hex);
312 }
313
314 return hexString.toString();
315 } catch (NoSuchAlgorithmException e) {
316 return input;
317 } catch (UnsupportedEncodingException e) {
318 return input;
319 }
320 }
321
322 /**
323 * Remove the HTML content from the given input, and un-html-ize the rest.
324 *
325 * @param html
326 * the HTML-encoded content
327 *
328 * @return the HTML-free equivalent content
329 */
330 public static String unhtml(String html) {
331 StringBuilder builder = new StringBuilder();
332
333 int inTag = 0;
334 for (char car : html.toCharArray()) {
335 if (car == '<') {
336 inTag++;
337 } else if (car == '>') {
338 inTag--;
339 } else if (inTag <= 0) {
340 builder.append(car);
341 }
342 }
343
344 char nbsp = ' '; // non-breakable space (a special char)
345 char space = ' ';
346 return HtmlEscape.unescapeHtml(builder.toString()).replace(nbsp, space);
347 }
348
349 /**
350 * Escape the given {@link String} so it can be used in XML, as content.
351 *
352 * @param input
353 * the input {@link String}
354 *
355 * @return the escaped {@link String}
356 */
357 public static String xmlEscape(String input) {
358 if (input == null) {
359 return "";
360 }
361
362 return HtmlEscape.escapeHtml(input,
363 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
364 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
365 }
366
367 /**
368 * Escape the given {@link String} so it can be used in XML, as text content
369 * inside double-quotes.
370 *
371 * @param input
372 * the input {@link String}
373 *
374 * @return the escaped {@link String}
375 */
376 public static String xmlEscapeQuote(String input) {
377 if (input == null) {
378 return "";
379 }
380
381 return HtmlEscape.escapeHtml(input,
382 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
383 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
384 }
385
386 /**
387 * Zip the data and then encode it into Base64.
388 *
389 * @param data
390 * the data
391 *
392 * @return the Base64 zipped version
393 */
394 public static String zip64(String data) {
395 try {
396 return Base64.encodeBytes(data.getBytes(), Base64.GZIP);
397 } catch (IOException e) {
398 e.printStackTrace();
399 return null;
400 }
401 }
402
403 /**
404 * Unconvert from Base64 then unzip the content.
405 *
406 * @param data
407 * the data in Base64 format
408 *
409 * @return the raw data
410 *
411 * @throws IOException
412 * in case of I/O error
413 */
414 public static String unzip64(String data) throws IOException {
415 ByteArrayInputStream in = new ByteArrayInputStream(Base64.decode(data,
416 Base64.GZIP));
417
418 Scanner scan = new Scanner(in);
419 scan.useDelimiter("\\A");
420 try {
421 return scan.next();
422 } finally {
423 scan.close();
424 }
425 }
426
427 /**
428 * The "remove accents" pattern.
429 *
430 * @return the pattern, or NULL if a problem happens
431 */
432 private static Pattern getMarks() {
433 try {
434 return Pattern
435 .compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
436 } catch (Exception e) {
437 // Can fail on Android...
438 return null;
439 }
440 }
441 }