Version 4.0.0: java.awt dependencies move
[nikiroo-utils.git] / src / be / nikiroo / utils / StringUtils.java
1 package be.nikiroo.utils;
2
3 import java.io.ByteArrayInputStream;
4 import java.io.IOException;
5 import java.io.UnsupportedEncodingException;
6 import java.security.MessageDigest;
7 import java.security.NoSuchAlgorithmException;
8 import java.text.Normalizer;
9 import java.text.Normalizer.Form;
10 import java.text.ParseException;
11 import java.text.SimpleDateFormat;
12 import java.util.Date;
13 import java.util.Scanner;
14 import java.util.regex.Pattern;
15
16 import org.unbescape.html.HtmlEscape;
17 import org.unbescape.html.HtmlEscapeLevel;
18 import org.unbescape.html.HtmlEscapeType;
19
20 /**
21 * This class offer some utilities based around {@link String}s.
22 *
23 * @author niki
24 */
25 public class StringUtils {
26 /**
27 * This enum type will decide the alignment of a {@link String} when padding
28 * is applied or if there is enough horizontal space for it to be aligned.
29 */
30 public enum Alignment {
31 /** Aligned at left. */
32 Beginning,
33 /** Centered. */
34 Center,
35 /** Aligned at right. */
36 End
37 }
38
39 static private Pattern marks = Pattern
40 .compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
41
42 /**
43 * Fix the size of the given {@link String} either with space-padding or by
44 * shortening it.
45 *
46 * @param text
47 * the {@link String} to fix
48 * @param width
49 * the size of the resulting {@link String} or -1 for a noop
50 *
51 * @return the resulting {@link String} of size <i>size</i>
52 */
53 static public String padString(String text, int width) {
54 return padString(text, width, true, null);
55 }
56
57 /**
58 * Fix the size of the given {@link String} either with space-padding or by
59 * optionally shortening it.
60 *
61 * @param text
62 * the {@link String} to fix
63 * @param width
64 * the size of the resulting {@link String} if the text fits or
65 * if cut is TRUE or -1 for a noop
66 * @param cut
67 * cut the {@link String} shorter if needed
68 * @param align
69 * align the {@link String} in this position if we have enough
70 * space (default is Alignment.Beginning)
71 *
72 * @return the resulting {@link String} of size <i>size</i> minimum
73 */
74 static public String padString(String text, int width, boolean cut,
75 Alignment align) {
76
77 if (align == null) {
78 align = Alignment.Beginning;
79 }
80
81 if (width >= 0) {
82 if (text == null)
83 text = "";
84
85 int diff = width - text.length();
86
87 if (diff < 0) {
88 if (cut)
89 text = text.substring(0, width);
90 } else if (diff > 0) {
91 if (diff < 2 && align != Alignment.End)
92 align = Alignment.Beginning;
93
94 switch (align) {
95 case Beginning:
96 text = text + new String(new char[diff]).replace('\0', ' ');
97 break;
98 case End:
99 text = new String(new char[diff]).replace('\0', ' ') + text;
100 break;
101 case Center:
102 default:
103 int pad1 = (diff) / 2;
104 int pad2 = (diff + 1) / 2;
105 text = new String(new char[pad1]).replace('\0', ' ') + text
106 + new String(new char[pad2]).replace('\0', ' ');
107 break;
108 }
109 }
110 }
111
112 return text;
113 }
114
115 /**
116 * Sanitise the given input to make it more Terminal-friendly by removing
117 * combining characters.
118 *
119 * @param input
120 * the input to sanitise
121 * @param allowUnicode
122 * allow Unicode or only allow ASCII Latin characters
123 *
124 * @return the sanitised {@link String}
125 */
126 static public String sanitize(String input, boolean allowUnicode) {
127 return sanitize(input, allowUnicode, !allowUnicode);
128 }
129
130 /**
131 * Sanitise the given input to make it more Terminal-friendly by removing
132 * combining characters.
133 *
134 * @param input
135 * the input to sanitise
136 * @param allowUnicode
137 * allow Unicode or only allow ASCII Latin characters
138 * @param removeAllAccents
139 * TRUE to replace all accentuated characters by their non
140 * accentuated counter-parts
141 *
142 * @return the sanitised {@link String}
143 */
144 static public String sanitize(String input, boolean allowUnicode,
145 boolean removeAllAccents) {
146
147 if (removeAllAccents) {
148 input = Normalizer.normalize(input, Form.NFKD);
149 input = marks.matcher(input).replaceAll("");
150 }
151
152 input = Normalizer.normalize(input, Form.NFKC);
153
154 if (!allowUnicode) {
155 StringBuilder builder = new StringBuilder();
156 for (int index = 0; index < input.length(); index++) {
157 char car = input.charAt(index);
158 // displayable chars in ASCII are in the range 32<->255,
159 // except DEL (127)
160 if (car >= 32 && car <= 255 && car != 127) {
161 builder.append(car);
162 }
163 }
164 input = builder.toString();
165 }
166
167 return input;
168 }
169
170 /**
171 * Convert between the time in milliseconds to a {@link String} in a "fixed"
172 * way (to exchange data over the wire, for instance).
173 * <p>
174 * Precise to the second.
175 *
176 * @param time
177 * the specified number of milliseconds since the standard base
178 * time known as "the epoch", namely January 1, 1970, 00:00:00
179 * GMT
180 *
181 * @return the time as a {@link String}
182 */
183 static public String fromTime(long time) {
184 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
185 return sdf.format(new Date(time));
186 }
187
188 /**
189 * Convert between the time as a {@link String} to milliseconds in a "fixed"
190 * way (to exchange data over the wire, for instance).
191 * <p>
192 * Precise to the second.
193 *
194 * @param displayTime
195 * the time as a {@link String}
196 *
197 * @return the number of milliseconds since the standard base time known as
198 * "the epoch", namely January 1, 1970, 00:00:00 GMT
199 */
200 static public long toTime(String displayTime) {
201 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
202 try {
203 return sdf.parse(displayTime).getTime();
204 } catch (ParseException e) {
205 return -1;
206 }
207 }
208
209 /**
210 * Return a hash of the given {@link String}.
211 *
212 * @param input
213 * the input data
214 *
215 * @return the hash
216 */
217 static public String getMd5Hash(String input) {
218 try {
219 MessageDigest md = MessageDigest.getInstance("MD5");
220 md.update(input.getBytes("UTF-8"));
221 byte byteData[] = md.digest();
222
223 StringBuffer hexString = new StringBuffer();
224 for (int i = 0; i < byteData.length; i++) {
225 String hex = Integer.toHexString(0xff & byteData[i]);
226 if (hex.length() == 1)
227 hexString.append('0');
228 hexString.append(hex);
229 }
230
231 return hexString.toString();
232 } catch (NoSuchAlgorithmException e) {
233 return input;
234 } catch (UnsupportedEncodingException e) {
235 return input;
236 }
237 }
238
239 /**
240 * Remove the HTML content from the given input, and un-html-ize the rest.
241 *
242 * @param html
243 * the HTML-encoded content
244 *
245 * @return the HTML-free equivalent content
246 */
247 public static String unhtml(String html) {
248 StringBuilder builder = new StringBuilder();
249
250 int inTag = 0;
251 for (char car : html.toCharArray()) {
252 if (car == '<') {
253 inTag++;
254 } else if (car == '>') {
255 inTag--;
256 } else if (inTag <= 0) {
257 builder.append(car);
258 }
259 }
260
261 char nbsp = ' '; // non-breakable space (a special char)
262 char space = ' ';
263 return HtmlEscape.unescapeHtml(builder.toString()).replace(nbsp, space);
264 }
265
266 /**
267 * Escape the given {@link String} so it can be used in XML, as content.
268 *
269 * @param input
270 * the input {@link String}
271 *
272 * @return the escaped {@link String}
273 */
274 public static String xmlEscape(String input) {
275 if (input == null) {
276 return "";
277 }
278
279 return HtmlEscape.escapeHtml(input,
280 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
281 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
282 }
283
284 /**
285 * Escape the given {@link String} so it can be used in XML, as text content
286 * inside double-quotes.
287 *
288 * @param input
289 * the input {@link String}
290 *
291 * @return the escaped {@link String}
292 */
293 public static String xmlEscapeQuote(String input) {
294 if (input == null) {
295 return "";
296 }
297
298 return HtmlEscape.escapeHtml(input,
299 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
300 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
301 }
302
303 /**
304 * Zip the data and then encode it into Base64.
305 *
306 * @param data
307 * the data
308 *
309 * @return the Base64 zipped version
310 */
311 public static String zip64(String data) {
312 try {
313 return Base64.encodeBytes(data.getBytes(), Base64.GZIP);
314 } catch (IOException e) {
315 e.printStackTrace();
316 return null;
317 }
318 }
319
320 /**
321 * Unconvert from Base64 then unzip the content.
322 *
323 * @param data
324 * the data in Base64 format
325 *
326 * @return the raw data
327 *
328 * @throws IOException
329 * in case of I/O error
330 */
331 public static String unzip64(String data) throws IOException {
332 ByteArrayInputStream in = new ByteArrayInputStream(Base64.decode(data,
333 Base64.GZIP));
334
335 Scanner scan = new Scanner(in);
336 scan.useDelimiter("\\A");
337 try {
338 return scan.next();
339 } finally {
340 scan.close();
341 }
342 }
343 }