Version 2.0.0 (small API change)
[nikiroo-utils.git] / src / be / nikiroo / utils / StringUtils.java
1 package be.nikiroo.utils;
2
3 import java.io.ByteArrayInputStream;
4 import java.io.IOException;
5 import java.security.MessageDigest;
6 import java.security.NoSuchAlgorithmException;
7 import java.text.Normalizer;
8 import java.text.Normalizer.Form;
9 import java.text.ParseException;
10 import java.text.SimpleDateFormat;
11 import java.util.Date;
12 import java.util.Scanner;
13 import java.util.regex.Pattern;
14
15 import org.unbescape.html.HtmlEscape;
16 import org.unbescape.html.HtmlEscapeLevel;
17 import org.unbescape.html.HtmlEscapeType;
18
19 /**
20 * This class offer some utilities based around {@link String}s.
21 *
22 * @author niki
23 */
24 public class StringUtils {
25 /**
26 * This enum type will decide the alignment of a {@link String} when padding
27 * is applied or if there is enough horizontal space for it to be aligned.
28 */
29 public enum Alignment {
30 /** Aligned at left. */
31 Beginning,
32 /** Centered. */
33 Center,
34 /** Aligned at right. */
35 End
36 }
37
38 static private Pattern marks = Pattern
39 .compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
40
41 /**
42 * Fix the size of the given {@link String} either with space-padding or by
43 * shortening it.
44 *
45 * @param text
46 * the {@link String} to fix
47 * @param width
48 * the size of the resulting {@link String} or -1 for a noop
49 *
50 * @return the resulting {@link String} of size <i>size</i>
51 */
52 static public String padString(String text, int width) {
53 return padString(text, width, true, Alignment.Beginning);
54 }
55
56 /**
57 * Fix the size of the given {@link String} either with space-padding or by
58 * optionally shortening it.
59 *
60 * @param text
61 * the {@link String} to fix
62 * @param width
63 * the size of the resulting {@link String} if the text fits or
64 * if cut is TRUE or -1 for a noop
65 * @param cut
66 * cut the {@link String} shorter if needed
67 * @param align
68 * align the {@link String} in this position if we have enough
69 * space
70 *
71 * @return the resulting {@link String} of size <i>size</i> minimum
72 */
73 static public String padString(String text, int width, boolean cut,
74 Alignment align) {
75
76 if (width >= 0) {
77 if (text == null)
78 text = "";
79
80 int diff = width - text.length();
81
82 if (diff < 0) {
83 if (cut)
84 text = text.substring(0, width);
85 } else if (diff > 0) {
86 if (diff < 2 && align != Alignment.End)
87 align = Alignment.Beginning;
88
89 switch (align) {
90 case Beginning:
91 text = text + new String(new char[diff]).replace('\0', ' ');
92 break;
93 case End:
94 text = new String(new char[diff]).replace('\0', ' ') + text;
95 break;
96 case Center:
97 default:
98 int pad1 = (diff) / 2;
99 int pad2 = (diff + 1) / 2;
100 text = new String(new char[pad1]).replace('\0', ' ') + text
101 + new String(new char[pad2]).replace('\0', ' ');
102 break;
103 }
104 }
105 }
106
107 return text;
108 }
109
110 /**
111 * Sanitise the given input to make it more Terminal-friendly by removing
112 * combining characters.
113 *
114 * @param input
115 * the input to sanitise
116 * @param allowUnicode
117 * allow Unicode or only allow ASCII Latin characters
118 *
119 * @return the sanitised {@link String}
120 */
121 static public String sanitize(String input, boolean allowUnicode) {
122 return sanitize(input, allowUnicode, !allowUnicode);
123 }
124
125 /**
126 * Sanitise the given input to make it more Terminal-friendly by removing
127 * combining characters.
128 *
129 * @param input
130 * the input to sanitise
131 * @param allowUnicode
132 * allow Unicode or only allow ASCII Latin characters
133 * @param removeAllAccents
134 * TRUE to replace all accentuated characters by their non
135 * accentuated counter-parts
136 *
137 * @return the sanitised {@link String}
138 */
139 static public String sanitize(String input, boolean allowUnicode,
140 boolean removeAllAccents) {
141
142 if (removeAllAccents) {
143 input = Normalizer.normalize(input, Form.NFKD);
144 input = marks.matcher(input).replaceAll("");
145 }
146
147 input = Normalizer.normalize(input, Form.NFKC);
148
149 if (!allowUnicode) {
150 StringBuilder builder = new StringBuilder();
151 for (int index = 0; index < input.length(); index++) {
152 char car = input.charAt(index);
153 // displayable chars in ASCII are in the range 32<->255,
154 // except DEL (127)
155 if (car >= 32 && car <= 255 && car != 127) {
156 builder.append(car);
157 }
158 }
159 input = builder.toString();
160 }
161
162 return input;
163 }
164
165 /**
166 * Convert between time in milliseconds to {@link String} in a "static" way
167 * (to exchange data over the wire, for instance).
168 *
169 * @param time
170 * the time in milliseconds
171 *
172 * @return the time as a {@link String}
173 */
174 static public String fromTime(long time) {
175 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
176 return sdf.format(new Date(time));
177 }
178
179 /**
180 * Convert between time as a {@link String} to milliseconds in a "static"
181 * way (to exchange data over the wire, for instance).
182 *
183 * @param displayTime
184 * the time as a {@link String}
185 *
186 * @return the time in milliseconds
187 */
188 static public long toTime(String displayTime) {
189 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
190 try {
191 return sdf.parse(displayTime).getTime();
192 } catch (ParseException e) {
193 return -1;
194 }
195 }
196
197 /**
198 * Return a hash of the given {@link String}.
199 *
200 * @param input
201 * the input data
202 *
203 * @return the hash
204 */
205 static public String getMd5Hash(String input) {
206 try {
207 MessageDigest md = MessageDigest.getInstance("MD5");
208 md.update(input.getBytes());
209 byte byteData[] = md.digest();
210
211 StringBuffer hexString = new StringBuffer();
212 for (int i = 0; i < byteData.length; i++) {
213 String hex = Integer.toHexString(0xff & byteData[i]);
214 if (hex.length() == 1)
215 hexString.append('0');
216 hexString.append(hex);
217 }
218
219 return hexString.toString();
220 } catch (NoSuchAlgorithmException e) {
221 return input;
222 }
223 }
224
225 /**
226 * Remove the HTML content from the given input, and un-html-ize the rest.
227 *
228 * @param html
229 * the HTML-encoded content
230 *
231 * @return the HTML-free equivalent content
232 */
233 public static String unhtml(String html) {
234 StringBuilder builder = new StringBuilder();
235
236 int inTag = 0;
237 for (char car : html.toCharArray()) {
238 if (car == '<') {
239 inTag++;
240 } else if (car == '>') {
241 inTag--;
242 } else if (inTag <= 0) {
243 builder.append(car);
244 }
245 }
246
247 char nbsp = ' '; // non-breakable space (a special char)
248 char space = ' ';
249 return HtmlEscape.unescapeHtml(builder.toString()).replace(nbsp, space);
250 }
251
252 /**
253 * Escape the given {@link String} so it can be used in XML, as content.
254 *
255 * @param input
256 * the input {@link String}
257 *
258 * @return the escaped {@link String}
259 */
260 public static String xmlEscape(String input) {
261 if (input == null) {
262 return "";
263 }
264
265 return HtmlEscape.escapeHtml(input,
266 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
267 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
268 }
269
270 /**
271 * Escape the given {@link String} so it can be used in XML, as text content
272 * inside double-quotes.
273 *
274 * @param input
275 * the input {@link String}
276 *
277 * @return the escaped {@link String}
278 */
279 public static String xmlEscapeQuote(String input) {
280 if (input == null) {
281 return "";
282 }
283
284 return HtmlEscape.escapeHtml(input,
285 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
286 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
287 }
288
289 public static String zip64(String data) {
290 try {
291 return Base64.encodeBytes(data.getBytes(), Base64.GZIP);
292 } catch (IOException e) {
293 e.printStackTrace();
294 return null;
295 }
296 }
297
298 public static String unzip64(String data) throws IOException {
299 ByteArrayInputStream in = new ByteArrayInputStream(Base64.decode(data,
300 Base64.GZIP));
301
302 Scanner scan = new Scanner(in);
303 scan.useDelimiter("\\A");
304 try {
305 return scan.next();
306 } finally {
307 scan.close();
308 }
309 }
310 }