Commit | Line | Data |
---|---|---|
ec1f3444 NR |
1 | package be.nikiroo.utils; |
2 | ||
ec1f3444 | 3 | import java.io.ByteArrayInputStream; |
ec1f3444 | 4 | import java.io.IOException; |
3f8349b7 | 5 | import java.io.UnsupportedEncodingException; |
ec1f3444 NR |
6 | import java.security.MessageDigest; |
7 | import java.security.NoSuchAlgorithmException; | |
8 | import java.text.Normalizer; | |
9 | import java.text.Normalizer.Form; | |
10 | import java.text.ParseException; | |
11 | import java.text.SimpleDateFormat; | |
ec1f3444 | 12 | import java.util.Date; |
cc3e7291 | 13 | import java.util.List; |
db31c358 | 14 | import java.util.Scanner; |
ec1f3444 NR |
15 | import java.util.regex.Pattern; |
16 | ||
ec1f3444 NR |
17 | import org.unbescape.html.HtmlEscape; |
18 | import org.unbescape.html.HtmlEscapeLevel; | |
19 | import org.unbescape.html.HtmlEscapeType; | |
20 | ||
21 | /** | |
22 | * This class offer some utilities based around {@link String}s. | |
23 | * | |
24 | * @author niki | |
25 | */ | |
26 | public class StringUtils { | |
27 | /** | |
28 | * This enum type will decide the alignment of a {@link String} when padding | |
cc3e7291 NR |
29 | * or justification is applied (if there is enough horizontal space for it |
30 | * to be aligned). | |
ec1f3444 NR |
31 | */ |
32 | public enum Alignment { | |
33 | /** Aligned at left. */ | |
cc3e7291 | 34 | LEFT, |
ec1f3444 | 35 | /** Centered. */ |
cc3e7291 | 36 | CENTER, |
ec1f3444 | 37 | /** Aligned at right. */ |
cc3e7291 NR |
38 | RIGHT, |
39 | /** Full justified (to both left and right). */ | |
40 | JUSTIFY, | |
41 | ||
42 | // Old Deprecated values: | |
43 | ||
44 | /** DEPRECATED: please use LEFT. */ | |
45 | @Deprecated | |
46 | Beginning, | |
47 | /** DEPRECATED: please use CENTER. */ | |
48 | @Deprecated | |
49 | Center, | |
50 | /** DEPRECATED: please use RIGHT. */ | |
51 | @Deprecated | |
52 | End; | |
53 | ||
54 | /** | |
55 | * Return the non-deprecated version of this enum if needed (or return | |
56 | * self if not). | |
57 | * | |
58 | * @return the non-deprecated value | |
59 | */ | |
60 | Alignment undeprecate() { | |
61 | if (this == Beginning) | |
62 | return LEFT; | |
63 | if (this == Center) | |
64 | return CENTER; | |
65 | if (this == End) | |
66 | return RIGHT; | |
67 | return this; | |
68 | } | |
ec1f3444 NR |
69 | } |
70 | ||
e8aa5bf9 | 71 | static private Pattern marks = getMarks(); |
ec1f3444 NR |
72 | |
73 | /** | |
74 | * Fix the size of the given {@link String} either with space-padding or by | |
75 | * shortening it. | |
76 | * | |
77 | * @param text | |
78 | * the {@link String} to fix | |
79 | * @param width | |
80 | * the size of the resulting {@link String} or -1 for a noop | |
81 | * | |
82 | * @return the resulting {@link String} of size <i>size</i> | |
83 | */ | |
84 | static public String padString(String text, int width) { | |
451f434b | 85 | return padString(text, width, true, null); |
ec1f3444 NR |
86 | } |
87 | ||
88 | /** | |
89 | * Fix the size of the given {@link String} either with space-padding or by | |
90 | * optionally shortening it. | |
91 | * | |
92 | * @param text | |
93 | * the {@link String} to fix | |
94 | * @param width | |
95 | * the size of the resulting {@link String} if the text fits or | |
96 | * if cut is TRUE or -1 for a noop | |
97 | * @param cut | |
98 | * cut the {@link String} shorter if needed | |
99 | * @param align | |
100 | * align the {@link String} in this position if we have enough | |
451f434b | 101 | * space (default is Alignment.Beginning) |
ec1f3444 NR |
102 | * |
103 | * @return the resulting {@link String} of size <i>size</i> minimum | |
104 | */ | |
105 | static public String padString(String text, int width, boolean cut, | |
106 | Alignment align) { | |
107 | ||
451f434b | 108 | if (align == null) { |
cc3e7291 | 109 | align = Alignment.LEFT; |
451f434b NR |
110 | } |
111 | ||
cc3e7291 NR |
112 | align = align.undeprecate(); |
113 | ||
ec1f3444 NR |
114 | if (width >= 0) { |
115 | if (text == null) | |
116 | text = ""; | |
117 | ||
118 | int diff = width - text.length(); | |
119 | ||
120 | if (diff < 0) { | |
121 | if (cut) | |
122 | text = text.substring(0, width); | |
123 | } else if (diff > 0) { | |
cc3e7291 NR |
124 | if (diff < 2 && align != Alignment.RIGHT) |
125 | align = Alignment.LEFT; | |
ec1f3444 NR |
126 | |
127 | switch (align) { | |
cc3e7291 | 128 | case RIGHT: |
ec1f3444 NR |
129 | text = new String(new char[diff]).replace('\0', ' ') + text; |
130 | break; | |
cc3e7291 | 131 | case CENTER: |
ec1f3444 NR |
132 | int pad1 = (diff) / 2; |
133 | int pad2 = (diff + 1) / 2; | |
134 | text = new String(new char[pad1]).replace('\0', ' ') + text | |
135 | + new String(new char[pad2]).replace('\0', ' '); | |
136 | break; | |
cc3e7291 NR |
137 | case LEFT: |
138 | default: | |
139 | text = text + new String(new char[diff]).replace('\0', ' '); | |
140 | break; | |
ec1f3444 NR |
141 | } |
142 | } | |
143 | } | |
144 | ||
145 | return text; | |
146 | } | |
147 | ||
cc3e7291 NR |
148 | /** |
149 | * Justify a text into width-sized (at the maximum) lines. | |
150 | * | |
151 | * @param text | |
152 | * the {@link String} to justify | |
153 | * @param width | |
154 | * the maximum size of the resulting lines | |
155 | * | |
156 | * @return a list of justified text lines | |
157 | */ | |
158 | static public List<String> justifyText(String text, int width) { | |
159 | return justifyText(text, width, null); | |
160 | } | |
161 | ||
162 | /** | |
163 | * Justify a text into width-sized (at the maximum) lines. | |
164 | * | |
165 | * @param text | |
166 | * the {@link String} to justify | |
167 | * @param width | |
168 | * the maximum size of the resulting lines | |
169 | * @param align | |
170 | * align the lines in this position (default is | |
171 | * Alignment.Beginning) | |
172 | * | |
173 | * @return a list of justified text lines | |
174 | */ | |
175 | static public List<String> justifyText(String text, int width, | |
176 | Alignment align) { | |
177 | if (align == null) { | |
178 | align = Alignment.LEFT; | |
179 | } | |
180 | ||
181 | align = align.undeprecate(); | |
182 | ||
183 | switch (align) { | |
184 | case CENTER: | |
185 | return StringJustifier.center(text, width); | |
186 | case RIGHT: | |
187 | return StringJustifier.right(text, width); | |
188 | case JUSTIFY: | |
189 | return StringJustifier.full(text, width); | |
190 | case LEFT: | |
191 | default: | |
192 | return StringJustifier.left(text, width); | |
193 | } | |
194 | } | |
195 | ||
ec1f3444 NR |
196 | /** |
197 | * Sanitise the given input to make it more Terminal-friendly by removing | |
198 | * combining characters. | |
199 | * | |
200 | * @param input | |
201 | * the input to sanitise | |
202 | * @param allowUnicode | |
203 | * allow Unicode or only allow ASCII Latin characters | |
204 | * | |
205 | * @return the sanitised {@link String} | |
206 | */ | |
207 | static public String sanitize(String input, boolean allowUnicode) { | |
208 | return sanitize(input, allowUnicode, !allowUnicode); | |
209 | } | |
210 | ||
211 | /** | |
212 | * Sanitise the given input to make it more Terminal-friendly by removing | |
213 | * combining characters. | |
214 | * | |
215 | * @param input | |
216 | * the input to sanitise | |
217 | * @param allowUnicode | |
218 | * allow Unicode or only allow ASCII Latin characters | |
219 | * @param removeAllAccents | |
220 | * TRUE to replace all accentuated characters by their non | |
221 | * accentuated counter-parts | |
222 | * | |
223 | * @return the sanitised {@link String} | |
224 | */ | |
225 | static public String sanitize(String input, boolean allowUnicode, | |
226 | boolean removeAllAccents) { | |
227 | ||
228 | if (removeAllAccents) { | |
229 | input = Normalizer.normalize(input, Form.NFKD); | |
e8aa5bf9 NR |
230 | if (marks != null) { |
231 | input = marks.matcher(input).replaceAll(""); | |
232 | } | |
ec1f3444 NR |
233 | } |
234 | ||
235 | input = Normalizer.normalize(input, Form.NFKC); | |
236 | ||
237 | if (!allowUnicode) { | |
238 | StringBuilder builder = new StringBuilder(); | |
239 | for (int index = 0; index < input.length(); index++) { | |
240 | char car = input.charAt(index); | |
241 | // displayable chars in ASCII are in the range 32<->255, | |
242 | // except DEL (127) | |
243 | if (car >= 32 && car <= 255 && car != 127) { | |
244 | builder.append(car); | |
245 | } | |
246 | } | |
247 | input = builder.toString(); | |
248 | } | |
249 | ||
250 | return input; | |
251 | } | |
252 | ||
253 | /** | |
451f434b NR |
254 | * Convert between the time in milliseconds to a {@link String} in a "fixed" |
255 | * way (to exchange data over the wire, for instance). | |
256 | * <p> | |
257 | * Precise to the second. | |
ec1f3444 NR |
258 | * |
259 | * @param time | |
451f434b NR |
260 | * the specified number of milliseconds since the standard base |
261 | * time known as "the epoch", namely January 1, 1970, 00:00:00 | |
262 | * GMT | |
ec1f3444 NR |
263 | * |
264 | * @return the time as a {@link String} | |
265 | */ | |
266 | static public String fromTime(long time) { | |
267 | SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); | |
268 | return sdf.format(new Date(time)); | |
269 | } | |
270 | ||
271 | /** | |
451f434b | 272 | * Convert between the time as a {@link String} to milliseconds in a "fixed" |
ec1f3444 | 273 | * way (to exchange data over the wire, for instance). |
451f434b NR |
274 | * <p> |
275 | * Precise to the second. | |
ec1f3444 | 276 | * |
db31c358 | 277 | * @param displayTime |
ec1f3444 NR |
278 | * the time as a {@link String} |
279 | * | |
451f434b | 280 | * @return the number of milliseconds since the standard base time known as |
e8aa5bf9 NR |
281 | * "the epoch", namely January 1, 1970, 00:00:00 GMT, or -1 in case |
282 | * of error | |
283 | * | |
284 | * @throws ParseException | |
285 | * in case of parse error | |
ec1f3444 | 286 | */ |
e8aa5bf9 | 287 | static public long toTime(String displayTime) throws ParseException { |
ec1f3444 | 288 | SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); |
e8aa5bf9 | 289 | return sdf.parse(displayTime).getTime(); |
ec1f3444 NR |
290 | } |
291 | ||
ec1f3444 NR |
292 | /** |
293 | * Return a hash of the given {@link String}. | |
294 | * | |
295 | * @param input | |
296 | * the input data | |
297 | * | |
298 | * @return the hash | |
299 | */ | |
b771aed5 | 300 | static public String getMd5Hash(String input) { |
ec1f3444 NR |
301 | try { |
302 | MessageDigest md = MessageDigest.getInstance("MD5"); | |
3f8349b7 | 303 | md.update(input.getBytes("UTF-8")); |
ec1f3444 NR |
304 | byte byteData[] = md.digest(); |
305 | ||
306 | StringBuffer hexString = new StringBuffer(); | |
307 | for (int i = 0; i < byteData.length; i++) { | |
308 | String hex = Integer.toHexString(0xff & byteData[i]); | |
309 | if (hex.length() == 1) | |
310 | hexString.append('0'); | |
311 | hexString.append(hex); | |
312 | } | |
313 | ||
314 | return hexString.toString(); | |
315 | } catch (NoSuchAlgorithmException e) { | |
316 | return input; | |
3f8349b7 NR |
317 | } catch (UnsupportedEncodingException e) { |
318 | return input; | |
ec1f3444 NR |
319 | } |
320 | } | |
321 | ||
ec1f3444 NR |
322 | /** |
323 | * Remove the HTML content from the given input, and un-html-ize the rest. | |
324 | * | |
325 | * @param html | |
326 | * the HTML-encoded content | |
327 | * | |
328 | * @return the HTML-free equivalent content | |
329 | */ | |
330 | public static String unhtml(String html) { | |
331 | StringBuilder builder = new StringBuilder(); | |
332 | ||
333 | int inTag = 0; | |
334 | for (char car : html.toCharArray()) { | |
335 | if (car == '<') { | |
336 | inTag++; | |
337 | } else if (car == '>') { | |
338 | inTag--; | |
339 | } else if (inTag <= 0) { | |
340 | builder.append(car); | |
341 | } | |
342 | } | |
343 | ||
7ee9568b NR |
344 | char nbsp = ' '; // non-breakable space (a special char) |
345 | char space = ' '; | |
346 | return HtmlEscape.unescapeHtml(builder.toString()).replace(nbsp, space); | |
ec1f3444 NR |
347 | } |
348 | ||
349 | /** | |
350 | * Escape the given {@link String} so it can be used in XML, as content. | |
351 | * | |
352 | * @param input | |
353 | * the input {@link String} | |
354 | * | |
355 | * @return the escaped {@link String} | |
356 | */ | |
357 | public static String xmlEscape(String input) { | |
358 | if (input == null) { | |
359 | return ""; | |
360 | } | |
361 | ||
362 | return HtmlEscape.escapeHtml(input, | |
363 | HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA, | |
364 | HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT); | |
365 | } | |
366 | ||
367 | /** | |
368 | * Escape the given {@link String} so it can be used in XML, as text content | |
369 | * inside double-quotes. | |
370 | * | |
371 | * @param input | |
372 | * the input {@link String} | |
373 | * | |
374 | * @return the escaped {@link String} | |
375 | */ | |
376 | public static String xmlEscapeQuote(String input) { | |
377 | if (input == null) { | |
378 | return ""; | |
379 | } | |
380 | ||
381 | return HtmlEscape.escapeHtml(input, | |
382 | HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA, | |
383 | HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT); | |
384 | } | |
db31c358 | 385 | |
80500544 NR |
386 | /** |
387 | * Zip the data and then encode it into Base64. | |
388 | * | |
389 | * @param data | |
390 | * the data | |
391 | * | |
392 | * @return the Base64 zipped version | |
393 | */ | |
db31c358 NR |
394 | public static String zip64(String data) { |
395 | try { | |
396 | return Base64.encodeBytes(data.getBytes(), Base64.GZIP); | |
397 | } catch (IOException e) { | |
398 | e.printStackTrace(); | |
399 | return null; | |
400 | } | |
401 | } | |
402 | ||
80500544 NR |
403 | /** |
404 | * Unconvert from Base64 then unzip the content. | |
405 | * | |
406 | * @param data | |
407 | * the data in Base64 format | |
408 | * | |
409 | * @return the raw data | |
410 | * | |
411 | * @throws IOException | |
412 | * in case of I/O error | |
413 | */ | |
db31c358 NR |
414 | public static String unzip64(String data) throws IOException { |
415 | ByteArrayInputStream in = new ByteArrayInputStream(Base64.decode(data, | |
416 | Base64.GZIP)); | |
417 | ||
418 | Scanner scan = new Scanner(in); | |
419 | scan.useDelimiter("\\A"); | |
420 | try { | |
421 | return scan.next(); | |
422 | } finally { | |
423 | scan.close(); | |
424 | } | |
425 | } | |
e8aa5bf9 NR |
426 | |
427 | /** | |
428 | * The "remove accents" pattern. | |
429 | * | |
430 | * @return the pattern, or NULL if a problem happens | |
431 | */ | |
432 | private static Pattern getMarks() { | |
433 | try { | |
434 | return Pattern | |
435 | .compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+"); | |
436 | } catch (Exception e) { | |
437 | // Can fail on Android... | |
438 | return null; | |
439 | } | |
440 | } | |
ec1f3444 | 441 | } |