1 package be
.nikiroo
.utils
;
3 import java
.io
.ByteArrayInputStream
;
4 import java
.io
.IOException
;
5 import java
.io
.UnsupportedEncodingException
;
6 import java
.security
.MessageDigest
;
7 import java
.security
.NoSuchAlgorithmException
;
8 import java
.text
.Normalizer
;
9 import java
.text
.Normalizer
.Form
;
10 import java
.text
.ParseException
;
11 import java
.text
.SimpleDateFormat
;
12 import java
.util
.Date
;
13 import java
.util
.Scanner
;
14 import java
.util
.regex
.Pattern
;
16 import org
.unbescape
.html
.HtmlEscape
;
17 import org
.unbescape
.html
.HtmlEscapeLevel
;
18 import org
.unbescape
.html
.HtmlEscapeType
;
21 * This class offer some utilities based around {@link String}s.
25 public class StringUtils
{
27 * This enum type will decide the alignment of a {@link String} when padding
28 * is applied or if there is enough horizontal space for it to be aligned.
30 public enum Alignment
{
31 /** Aligned at left. */
35 /** Aligned at right. */
39 static private Pattern marks
= Pattern
40 .compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
43 * Fix the size of the given {@link String} either with space-padding or by
47 * the {@link String} to fix
49 * the size of the resulting {@link String} or -1 for a noop
51 * @return the resulting {@link String} of size <i>size</i>
53 static public String
padString(String text
, int width
) {
54 return padString(text
, width
, true, null);
58 * Fix the size of the given {@link String} either with space-padding or by
59 * optionally shortening it.
62 * the {@link String} to fix
64 * the size of the resulting {@link String} if the text fits or
65 * if cut is TRUE or -1 for a noop
67 * cut the {@link String} shorter if needed
69 * align the {@link String} in this position if we have enough
70 * space (default is Alignment.Beginning)
72 * @return the resulting {@link String} of size <i>size</i> minimum
74 static public String
padString(String text
, int width
, boolean cut
,
78 align
= Alignment
.Beginning
;
85 int diff
= width
- text
.length();
89 text
= text
.substring(0, width
);
90 } else if (diff
> 0) {
91 if (diff
< 2 && align
!= Alignment
.End
)
92 align
= Alignment
.Beginning
;
96 text
= text
+ new String(new char[diff
]).replace('\0', ' ');
99 text
= new String(new char[diff
]).replace('\0', ' ') + text
;
103 int pad1
= (diff
) / 2;
104 int pad2
= (diff
+ 1) / 2;
105 text
= new String(new char[pad1
]).replace('\0', ' ') + text
106 + new String(new char[pad2
]).replace('\0', ' ');
116 * Sanitise the given input to make it more Terminal-friendly by removing
117 * combining characters.
120 * the input to sanitise
121 * @param allowUnicode
122 * allow Unicode or only allow ASCII Latin characters
124 * @return the sanitised {@link String}
126 static public String
sanitize(String input
, boolean allowUnicode
) {
127 return sanitize(input
, allowUnicode
, !allowUnicode
);
131 * Sanitise the given input to make it more Terminal-friendly by removing
132 * combining characters.
135 * the input to sanitise
136 * @param allowUnicode
137 * allow Unicode or only allow ASCII Latin characters
138 * @param removeAllAccents
139 * TRUE to replace all accentuated characters by their non
140 * accentuated counter-parts
142 * @return the sanitised {@link String}
144 static public String
sanitize(String input
, boolean allowUnicode
,
145 boolean removeAllAccents
) {
147 if (removeAllAccents
) {
148 input
= Normalizer
.normalize(input
, Form
.NFKD
);
149 input
= marks
.matcher(input
).replaceAll("");
152 input
= Normalizer
.normalize(input
, Form
.NFKC
);
155 StringBuilder builder
= new StringBuilder();
156 for (int index
= 0; index
< input
.length(); index
++) {
157 char car
= input
.charAt(index
);
158 // displayable chars in ASCII are in the range 32<->255,
160 if (car
>= 32 && car
<= 255 && car
!= 127) {
164 input
= builder
.toString();
171 * Convert between the time in milliseconds to a {@link String} in a "fixed"
172 * way (to exchange data over the wire, for instance).
174 * Precise to the second.
177 * the specified number of milliseconds since the standard base
178 * time known as "the epoch", namely January 1, 1970, 00:00:00
181 * @return the time as a {@link String}
183 static public String
fromTime(long time
) {
184 SimpleDateFormat sdf
= new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
185 return sdf
.format(new Date(time
));
189 * Convert between the time as a {@link String} to milliseconds in a "fixed"
190 * way (to exchange data over the wire, for instance).
192 * Precise to the second.
195 * the time as a {@link String}
197 * @return the number of milliseconds since the standard base time known as
198 * "the epoch", namely January 1, 1970, 00:00:00 GMT
200 static public long toTime(String displayTime
) {
201 SimpleDateFormat sdf
= new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
203 return sdf
.parse(displayTime
).getTime();
204 } catch (ParseException e
) {
210 * Return a hash of the given {@link String}.
217 static public String
getMd5Hash(String input
) {
219 MessageDigest md
= MessageDigest
.getInstance("MD5");
220 md
.update(input
.getBytes("UTF-8"));
221 byte byteData
[] = md
.digest();
223 StringBuffer hexString
= new StringBuffer();
224 for (int i
= 0; i
< byteData
.length
; i
++) {
225 String hex
= Integer
.toHexString(0xff & byteData
[i
]);
226 if (hex
.length() == 1)
227 hexString
.append('0');
228 hexString
.append(hex
);
231 return hexString
.toString();
232 } catch (NoSuchAlgorithmException e
) {
234 } catch (UnsupportedEncodingException e
) {
240 * Remove the HTML content from the given input, and un-html-ize the rest.
243 * the HTML-encoded content
245 * @return the HTML-free equivalent content
247 public static String
unhtml(String html
) {
248 StringBuilder builder
= new StringBuilder();
251 for (char car
: html
.toCharArray()) {
254 } else if (car
== '>') {
256 } else if (inTag
<= 0) {
261 char nbsp
= ' '; // non-breakable space (a special char)
263 return HtmlEscape
.unescapeHtml(builder
.toString()).replace(nbsp
, space
);
267 * Escape the given {@link String} so it can be used in XML, as content.
270 * the input {@link String}
272 * @return the escaped {@link String}
274 public static String
xmlEscape(String input
) {
279 return HtmlEscape
.escapeHtml(input
,
280 HtmlEscapeType
.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA
,
281 HtmlEscapeLevel
.LEVEL_1_ONLY_MARKUP_SIGNIFICANT
);
285 * Escape the given {@link String} so it can be used in XML, as text content
286 * inside double-quotes.
289 * the input {@link String}
291 * @return the escaped {@link String}
293 public static String
xmlEscapeQuote(String input
) {
298 return HtmlEscape
.escapeHtml(input
,
299 HtmlEscapeType
.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA
,
300 HtmlEscapeLevel
.LEVEL_1_ONLY_MARKUP_SIGNIFICANT
);
304 * Zip the data and then encode it into Base64.
309 * @return the Base64 zipped version
311 public static String
zip64(String data
) {
313 return Base64
.encodeBytes(data
.getBytes(), Base64
.GZIP
);
314 } catch (IOException e
) {
321 * Unconvert from Base64 then unzip the content.
324 * the data in Base64 format
326 * @return the raw data
328 * @throws IOException
329 * in case of I/O error
331 public static String
unzip64(String data
) throws IOException
{
332 ByteArrayInputStream in
= new ByteArrayInputStream(Base64
.decode(data
,
335 Scanner scan
= new Scanner(in
);
336 scan
.useDelimiter("\\A");