Fix Cache (URL to File could fail if no parent)
[nikiroo-utils.git] / src / be / nikiroo / utils / StringUtils.java
CommitLineData
ec1f3444
NR
1package be.nikiroo.utils;
2
ec1f3444 3import java.io.ByteArrayInputStream;
ec1f3444 4import java.io.IOException;
3f8349b7 5import java.io.UnsupportedEncodingException;
ec1f3444
NR
6import java.security.MessageDigest;
7import java.security.NoSuchAlgorithmException;
8import java.text.Normalizer;
9import java.text.Normalizer.Form;
10import java.text.ParseException;
11import java.text.SimpleDateFormat;
ec1f3444 12import java.util.Date;
db31c358 13import java.util.Scanner;
ec1f3444
NR
14import java.util.regex.Pattern;
15
ec1f3444
NR
16import org.unbescape.html.HtmlEscape;
17import org.unbescape.html.HtmlEscapeLevel;
18import org.unbescape.html.HtmlEscapeType;
19
20/**
21 * This class offer some utilities based around {@link String}s.
22 *
23 * @author niki
24 */
25public class StringUtils {
26 /**
27 * This enum type will decide the alignment of a {@link String} when padding
28 * is applied or if there is enough horizontal space for it to be aligned.
29 */
30 public enum Alignment {
31 /** Aligned at left. */
32 Beginning,
33 /** Centered. */
34 Center,
35 /** Aligned at right. */
36 End
37 }
38
39 static private Pattern marks = Pattern
40 .compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
41
42 /**
43 * Fix the size of the given {@link String} either with space-padding or by
44 * shortening it.
45 *
46 * @param text
47 * the {@link String} to fix
48 * @param width
49 * the size of the resulting {@link String} or -1 for a noop
50 *
51 * @return the resulting {@link String} of size <i>size</i>
52 */
53 static public String padString(String text, int width) {
54 return padString(text, width, true, Alignment.Beginning);
55 }
56
57 /**
58 * Fix the size of the given {@link String} either with space-padding or by
59 * optionally shortening it.
60 *
61 * @param text
62 * the {@link String} to fix
63 * @param width
64 * the size of the resulting {@link String} if the text fits or
65 * if cut is TRUE or -1 for a noop
66 * @param cut
67 * cut the {@link String} shorter if needed
68 * @param align
69 * align the {@link String} in this position if we have enough
70 * space
71 *
72 * @return the resulting {@link String} of size <i>size</i> minimum
73 */
74 static public String padString(String text, int width, boolean cut,
75 Alignment align) {
76
77 if (width >= 0) {
78 if (text == null)
79 text = "";
80
81 int diff = width - text.length();
82
83 if (diff < 0) {
84 if (cut)
85 text = text.substring(0, width);
86 } else if (diff > 0) {
87 if (diff < 2 && align != Alignment.End)
88 align = Alignment.Beginning;
89
90 switch (align) {
91 case Beginning:
92 text = text + new String(new char[diff]).replace('\0', ' ');
93 break;
94 case End:
95 text = new String(new char[diff]).replace('\0', ' ') + text;
96 break;
97 case Center:
98 default:
99 int pad1 = (diff) / 2;
100 int pad2 = (diff + 1) / 2;
101 text = new String(new char[pad1]).replace('\0', ' ') + text
102 + new String(new char[pad2]).replace('\0', ' ');
103 break;
104 }
105 }
106 }
107
108 return text;
109 }
110
111 /**
112 * Sanitise the given input to make it more Terminal-friendly by removing
113 * combining characters.
114 *
115 * @param input
116 * the input to sanitise
117 * @param allowUnicode
118 * allow Unicode or only allow ASCII Latin characters
119 *
120 * @return the sanitised {@link String}
121 */
122 static public String sanitize(String input, boolean allowUnicode) {
123 return sanitize(input, allowUnicode, !allowUnicode);
124 }
125
126 /**
127 * Sanitise the given input to make it more Terminal-friendly by removing
128 * combining characters.
129 *
130 * @param input
131 * the input to sanitise
132 * @param allowUnicode
133 * allow Unicode or only allow ASCII Latin characters
134 * @param removeAllAccents
135 * TRUE to replace all accentuated characters by their non
136 * accentuated counter-parts
137 *
138 * @return the sanitised {@link String}
139 */
140 static public String sanitize(String input, boolean allowUnicode,
141 boolean removeAllAccents) {
142
143 if (removeAllAccents) {
144 input = Normalizer.normalize(input, Form.NFKD);
145 input = marks.matcher(input).replaceAll("");
146 }
147
148 input = Normalizer.normalize(input, Form.NFKC);
149
150 if (!allowUnicode) {
151 StringBuilder builder = new StringBuilder();
152 for (int index = 0; index < input.length(); index++) {
153 char car = input.charAt(index);
154 // displayable chars in ASCII are in the range 32<->255,
155 // except DEL (127)
156 if (car >= 32 && car <= 255 && car != 127) {
157 builder.append(car);
158 }
159 }
160 input = builder.toString();
161 }
162
163 return input;
164 }
165
166 /**
167 * Convert between time in milliseconds to {@link String} in a "static" way
168 * (to exchange data over the wire, for instance).
169 *
170 * @param time
171 * the time in milliseconds
172 *
173 * @return the time as a {@link String}
174 */
175 static public String fromTime(long time) {
176 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
177 return sdf.format(new Date(time));
178 }
179
180 /**
181 * Convert between time as a {@link String} to milliseconds in a "static"
182 * way (to exchange data over the wire, for instance).
183 *
db31c358 184 * @param displayTime
ec1f3444
NR
185 * the time as a {@link String}
186 *
187 * @return the time in milliseconds
188 */
db31c358 189 static public long toTime(String displayTime) {
ec1f3444
NR
190 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
191 try {
db31c358 192 return sdf.parse(displayTime).getTime();
ec1f3444
NR
193 } catch (ParseException e) {
194 return -1;
195 }
196 }
197
ec1f3444
NR
198 /**
199 * Return a hash of the given {@link String}.
200 *
201 * @param input
202 * the input data
203 *
204 * @return the hash
205 */
b771aed5 206 static public String getMd5Hash(String input) {
ec1f3444
NR
207 try {
208 MessageDigest md = MessageDigest.getInstance("MD5");
3f8349b7 209 md.update(input.getBytes("UTF-8"));
ec1f3444
NR
210 byte byteData[] = md.digest();
211
212 StringBuffer hexString = new StringBuffer();
213 for (int i = 0; i < byteData.length; i++) {
214 String hex = Integer.toHexString(0xff & byteData[i]);
215 if (hex.length() == 1)
216 hexString.append('0');
217 hexString.append(hex);
218 }
219
220 return hexString.toString();
221 } catch (NoSuchAlgorithmException e) {
222 return input;
3f8349b7
NR
223 } catch (UnsupportedEncodingException e) {
224 return input;
ec1f3444
NR
225 }
226 }
227
ec1f3444
NR
228 /**
229 * Remove the HTML content from the given input, and un-html-ize the rest.
230 *
231 * @param html
232 * the HTML-encoded content
233 *
234 * @return the HTML-free equivalent content
235 */
236 public static String unhtml(String html) {
237 StringBuilder builder = new StringBuilder();
238
239 int inTag = 0;
240 for (char car : html.toCharArray()) {
241 if (car == '<') {
242 inTag++;
243 } else if (car == '>') {
244 inTag--;
245 } else if (inTag <= 0) {
246 builder.append(car);
247 }
248 }
249
7ee9568b
NR
250 char nbsp = ' '; // non-breakable space (a special char)
251 char space = ' ';
252 return HtmlEscape.unescapeHtml(builder.toString()).replace(nbsp, space);
ec1f3444
NR
253 }
254
255 /**
256 * Escape the given {@link String} so it can be used in XML, as content.
257 *
258 * @param input
259 * the input {@link String}
260 *
261 * @return the escaped {@link String}
262 */
263 public static String xmlEscape(String input) {
264 if (input == null) {
265 return "";
266 }
267
268 return HtmlEscape.escapeHtml(input,
269 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
270 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
271 }
272
273 /**
274 * Escape the given {@link String} so it can be used in XML, as text content
275 * inside double-quotes.
276 *
277 * @param input
278 * the input {@link String}
279 *
280 * @return the escaped {@link String}
281 */
282 public static String xmlEscapeQuote(String input) {
283 if (input == null) {
284 return "";
285 }
286
287 return HtmlEscape.escapeHtml(input,
288 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
289 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
290 }
db31c358
NR
291
292 public static String zip64(String data) {
293 try {
294 return Base64.encodeBytes(data.getBytes(), Base64.GZIP);
295 } catch (IOException e) {
296 e.printStackTrace();
297 return null;
298 }
299 }
300
301 public static String unzip64(String data) throws IOException {
302 ByteArrayInputStream in = new ByteArrayInputStream(Base64.decode(data,
303 Base64.GZIP));
304
305 Scanner scan = new Scanner(in);
306 scan.useDelimiter("\\A");
307 try {
308 return scan.next();
309 } finally {
310 scan.close();
311 }
312 }
ec1f3444 313}