Fix some warnings
[nikiroo-utils.git] / src / be / nikiroo / utils / StringUtils.java
CommitLineData
ec1f3444
NR
1package be.nikiroo.utils;
2
ec1f3444 3import java.io.ByteArrayInputStream;
ec1f3444 4import java.io.IOException;
3f8349b7 5import java.io.UnsupportedEncodingException;
ec1f3444
NR
6import java.security.MessageDigest;
7import java.security.NoSuchAlgorithmException;
8import java.text.Normalizer;
9import java.text.Normalizer.Form;
10import java.text.ParseException;
11import java.text.SimpleDateFormat;
ec1f3444 12import java.util.Date;
db31c358 13import java.util.Scanner;
ec1f3444
NR
14import java.util.regex.Pattern;
15
ec1f3444
NR
16import org.unbescape.html.HtmlEscape;
17import org.unbescape.html.HtmlEscapeLevel;
18import org.unbescape.html.HtmlEscapeType;
19
20/**
21 * This class offer some utilities based around {@link String}s.
22 *
23 * @author niki
24 */
25public class StringUtils {
26 /**
27 * This enum type will decide the alignment of a {@link String} when padding
28 * is applied or if there is enough horizontal space for it to be aligned.
29 */
30 public enum Alignment {
31 /** Aligned at left. */
32 Beginning,
33 /** Centered. */
34 Center,
35 /** Aligned at right. */
36 End
37 }
38
e8aa5bf9 39 static private Pattern marks = getMarks();
ec1f3444
NR
40
41 /**
42 * Fix the size of the given {@link String} either with space-padding or by
43 * shortening it.
44 *
45 * @param text
46 * the {@link String} to fix
47 * @param width
48 * the size of the resulting {@link String} or -1 for a noop
49 *
50 * @return the resulting {@link String} of size <i>size</i>
51 */
52 static public String padString(String text, int width) {
451f434b 53 return padString(text, width, true, null);
ec1f3444
NR
54 }
55
56 /**
57 * Fix the size of the given {@link String} either with space-padding or by
58 * optionally shortening it.
59 *
60 * @param text
61 * the {@link String} to fix
62 * @param width
63 * the size of the resulting {@link String} if the text fits or
64 * if cut is TRUE or -1 for a noop
65 * @param cut
66 * cut the {@link String} shorter if needed
67 * @param align
68 * align the {@link String} in this position if we have enough
451f434b 69 * space (default is Alignment.Beginning)
ec1f3444
NR
70 *
71 * @return the resulting {@link String} of size <i>size</i> minimum
72 */
73 static public String padString(String text, int width, boolean cut,
74 Alignment align) {
75
451f434b
NR
76 if (align == null) {
77 align = Alignment.Beginning;
78 }
79
ec1f3444
NR
80 if (width >= 0) {
81 if (text == null)
82 text = "";
83
84 int diff = width - text.length();
85
86 if (diff < 0) {
87 if (cut)
88 text = text.substring(0, width);
89 } else if (diff > 0) {
90 if (diff < 2 && align != Alignment.End)
91 align = Alignment.Beginning;
92
93 switch (align) {
94 case Beginning:
95 text = text + new String(new char[diff]).replace('\0', ' ');
96 break;
97 case End:
98 text = new String(new char[diff]).replace('\0', ' ') + text;
99 break;
100 case Center:
101 default:
102 int pad1 = (diff) / 2;
103 int pad2 = (diff + 1) / 2;
104 text = new String(new char[pad1]).replace('\0', ' ') + text
105 + new String(new char[pad2]).replace('\0', ' ');
106 break;
107 }
108 }
109 }
110
111 return text;
112 }
113
114 /**
115 * Sanitise the given input to make it more Terminal-friendly by removing
116 * combining characters.
117 *
118 * @param input
119 * the input to sanitise
120 * @param allowUnicode
121 * allow Unicode or only allow ASCII Latin characters
122 *
123 * @return the sanitised {@link String}
124 */
125 static public String sanitize(String input, boolean allowUnicode) {
126 return sanitize(input, allowUnicode, !allowUnicode);
127 }
128
129 /**
130 * Sanitise the given input to make it more Terminal-friendly by removing
131 * combining characters.
132 *
133 * @param input
134 * the input to sanitise
135 * @param allowUnicode
136 * allow Unicode or only allow ASCII Latin characters
137 * @param removeAllAccents
138 * TRUE to replace all accentuated characters by their non
139 * accentuated counter-parts
140 *
141 * @return the sanitised {@link String}
142 */
143 static public String sanitize(String input, boolean allowUnicode,
144 boolean removeAllAccents) {
145
146 if (removeAllAccents) {
147 input = Normalizer.normalize(input, Form.NFKD);
e8aa5bf9
NR
148 if (marks != null) {
149 input = marks.matcher(input).replaceAll("");
150 }
ec1f3444
NR
151 }
152
153 input = Normalizer.normalize(input, Form.NFKC);
154
155 if (!allowUnicode) {
156 StringBuilder builder = new StringBuilder();
157 for (int index = 0; index < input.length(); index++) {
158 char car = input.charAt(index);
159 // displayable chars in ASCII are in the range 32<->255,
160 // except DEL (127)
161 if (car >= 32 && car <= 255 && car != 127) {
162 builder.append(car);
163 }
164 }
165 input = builder.toString();
166 }
167
168 return input;
169 }
170
171 /**
451f434b
NR
172 * Convert between the time in milliseconds to a {@link String} in a "fixed"
173 * way (to exchange data over the wire, for instance).
174 * <p>
175 * Precise to the second.
ec1f3444
NR
176 *
177 * @param time
451f434b
NR
178 * the specified number of milliseconds since the standard base
179 * time known as "the epoch", namely January 1, 1970, 00:00:00
180 * GMT
ec1f3444
NR
181 *
182 * @return the time as a {@link String}
183 */
184 static public String fromTime(long time) {
185 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
186 return sdf.format(new Date(time));
187 }
188
189 /**
451f434b 190 * Convert between the time as a {@link String} to milliseconds in a "fixed"
ec1f3444 191 * way (to exchange data over the wire, for instance).
451f434b
NR
192 * <p>
193 * Precise to the second.
ec1f3444 194 *
db31c358 195 * @param displayTime
ec1f3444
NR
196 * the time as a {@link String}
197 *
451f434b 198 * @return the number of milliseconds since the standard base time known as
e8aa5bf9
NR
199 * "the epoch", namely January 1, 1970, 00:00:00 GMT, or -1 in case
200 * of error
201 *
202 * @throws ParseException
203 * in case of parse error
ec1f3444 204 */
e8aa5bf9 205 static public long toTime(String displayTime) throws ParseException {
ec1f3444 206 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
e8aa5bf9 207 return sdf.parse(displayTime).getTime();
ec1f3444
NR
208 }
209
ec1f3444
NR
210 /**
211 * Return a hash of the given {@link String}.
212 *
213 * @param input
214 * the input data
215 *
216 * @return the hash
217 */
b771aed5 218 static public String getMd5Hash(String input) {
ec1f3444
NR
219 try {
220 MessageDigest md = MessageDigest.getInstance("MD5");
3f8349b7 221 md.update(input.getBytes("UTF-8"));
ec1f3444
NR
222 byte byteData[] = md.digest();
223
224 StringBuffer hexString = new StringBuffer();
225 for (int i = 0; i < byteData.length; i++) {
226 String hex = Integer.toHexString(0xff & byteData[i]);
227 if (hex.length() == 1)
228 hexString.append('0');
229 hexString.append(hex);
230 }
231
232 return hexString.toString();
233 } catch (NoSuchAlgorithmException e) {
234 return input;
3f8349b7
NR
235 } catch (UnsupportedEncodingException e) {
236 return input;
ec1f3444
NR
237 }
238 }
239
ec1f3444
NR
240 /**
241 * Remove the HTML content from the given input, and un-html-ize the rest.
242 *
243 * @param html
244 * the HTML-encoded content
245 *
246 * @return the HTML-free equivalent content
247 */
248 public static String unhtml(String html) {
249 StringBuilder builder = new StringBuilder();
250
251 int inTag = 0;
252 for (char car : html.toCharArray()) {
253 if (car == '<') {
254 inTag++;
255 } else if (car == '>') {
256 inTag--;
257 } else if (inTag <= 0) {
258 builder.append(car);
259 }
260 }
261
7ee9568b
NR
262 char nbsp = ' '; // non-breakable space (a special char)
263 char space = ' ';
264 return HtmlEscape.unescapeHtml(builder.toString()).replace(nbsp, space);
ec1f3444
NR
265 }
266
267 /**
268 * Escape the given {@link String} so it can be used in XML, as content.
269 *
270 * @param input
271 * the input {@link String}
272 *
273 * @return the escaped {@link String}
274 */
275 public static String xmlEscape(String input) {
276 if (input == null) {
277 return "";
278 }
279
280 return HtmlEscape.escapeHtml(input,
281 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
282 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
283 }
284
285 /**
286 * Escape the given {@link String} so it can be used in XML, as text content
287 * inside double-quotes.
288 *
289 * @param input
290 * the input {@link String}
291 *
292 * @return the escaped {@link String}
293 */
294 public static String xmlEscapeQuote(String input) {
295 if (input == null) {
296 return "";
297 }
298
299 return HtmlEscape.escapeHtml(input,
300 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
301 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
302 }
db31c358 303
80500544
NR
304 /**
305 * Zip the data and then encode it into Base64.
306 *
307 * @param data
308 * the data
309 *
310 * @return the Base64 zipped version
311 */
db31c358
NR
312 public static String zip64(String data) {
313 try {
314 return Base64.encodeBytes(data.getBytes(), Base64.GZIP);
315 } catch (IOException e) {
316 e.printStackTrace();
317 return null;
318 }
319 }
320
80500544
NR
321 /**
322 * Unconvert from Base64 then unzip the content.
323 *
324 * @param data
325 * the data in Base64 format
326 *
327 * @return the raw data
328 *
329 * @throws IOException
330 * in case of I/O error
331 */
db31c358
NR
332 public static String unzip64(String data) throws IOException {
333 ByteArrayInputStream in = new ByteArrayInputStream(Base64.decode(data,
334 Base64.GZIP));
335
336 Scanner scan = new Scanner(in);
337 scan.useDelimiter("\\A");
338 try {
339 return scan.next();
340 } finally {
341 scan.close();
342 }
343 }
e8aa5bf9
NR
344
345 /**
346 * The "remove accents" pattern.
347 *
348 * @return the pattern, or NULL if a problem happens
349 */
350 private static Pattern getMarks() {
351 try {
352 return Pattern
353 .compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
354 } catch (Exception e) {
355 // Can fail on Android...
356 return null;
357 }
358 }
ec1f3444 359}