1 package be
.nikiroo
.utils
;
3 import java
.io
.ByteArrayInputStream
;
4 import java
.io
.IOException
;
5 import java
.io
.UnsupportedEncodingException
;
6 import java
.security
.MessageDigest
;
7 import java
.security
.NoSuchAlgorithmException
;
8 import java
.text
.Normalizer
;
9 import java
.text
.Normalizer
.Form
;
10 import java
.text
.ParseException
;
11 import java
.text
.SimpleDateFormat
;
12 import java
.util
.Date
;
13 import java
.util
.List
;
14 import java
.util
.Scanner
;
15 import java
.util
.regex
.Pattern
;
17 import org
.unbescape
.html
.HtmlEscape
;
18 import org
.unbescape
.html
.HtmlEscapeLevel
;
19 import org
.unbescape
.html
.HtmlEscapeType
;
22 * This class offer some utilities based around {@link String}s.
26 public class StringUtils
{
28 * This enum type will decide the alignment of a {@link String} when padding
29 * or justification is applied (if there is enough horizontal space for it
32 public enum Alignment
{
33 /** Aligned at left. */
37 /** Aligned at right. */
39 /** Full justified (to both left and right). */
42 // Old Deprecated values:
44 /** DEPRECATED: please use LEFT. */
47 /** DEPRECATED: please use CENTER. */
50 /** DEPRECATED: please use RIGHT. */
55 * Return the non-deprecated version of this enum if needed (or return
58 * @return the non-deprecated value
60 Alignment
undeprecate() {
61 if (this == Beginning
)
71 static private Pattern marks
= getMarks();
74 * Fix the size of the given {@link String} either with space-padding or by
78 * the {@link String} to fix
80 * the size of the resulting {@link String} or -1 for a noop
82 * @return the resulting {@link String} of size <i>size</i>
84 static public String
padString(String text
, int width
) {
85 return padString(text
, width
, true, null);
89 * Fix the size of the given {@link String} either with space-padding or by
90 * optionally shortening it.
93 * the {@link String} to fix
95 * the size of the resulting {@link String} if the text fits or
96 * if cut is TRUE or -1 for a noop
98 * cut the {@link String} shorter if needed
100 * align the {@link String} in this position if we have enough
101 * space (default is Alignment.Beginning)
103 * @return the resulting {@link String} of size <i>size</i> minimum
105 static public String
padString(String text
, int width
, boolean cut
,
109 align
= Alignment
.LEFT
;
112 align
= align
.undeprecate();
118 int diff
= width
- text
.length();
122 text
= text
.substring(0, width
);
123 } else if (diff
> 0) {
124 if (diff
< 2 && align
!= Alignment
.RIGHT
)
125 align
= Alignment
.LEFT
;
129 text
= new String(new char[diff
]).replace('\0', ' ') + text
;
132 int pad1
= (diff
) / 2;
133 int pad2
= (diff
+ 1) / 2;
134 text
= new String(new char[pad1
]).replace('\0', ' ') + text
135 + new String(new char[pad2
]).replace('\0', ' ');
139 text
= text
+ new String(new char[diff
]).replace('\0', ' ');
149 * Justify a text into width-sized (at the maximum) lines.
152 * the {@link String} to justify
154 * the maximum size of the resulting lines
156 * @return a list of justified text lines
158 static public List
<String
> justifyText(String text
, int width
) {
159 return justifyText(text
, width
, null);
163 * Justify a text into width-sized (at the maximum) lines.
166 * the {@link String} to justify
168 * the maximum size of the resulting lines
170 * align the lines in this position (default is
171 * Alignment.Beginning)
173 * @return a list of justified text lines
175 static public List
<String
> justifyText(String text
, int width
,
178 align
= Alignment
.LEFT
;
181 align
= align
.undeprecate();
185 return StringJustifier
.center(text
, width
);
187 return StringJustifier
.right(text
, width
);
189 return StringJustifier
.full(text
, width
);
192 return StringJustifier
.left(text
, width
);
197 * Sanitise the given input to make it more Terminal-friendly by removing
198 * combining characters.
201 * the input to sanitise
202 * @param allowUnicode
203 * allow Unicode or only allow ASCII Latin characters
205 * @return the sanitised {@link String}
207 static public String
sanitize(String input
, boolean allowUnicode
) {
208 return sanitize(input
, allowUnicode
, !allowUnicode
);
212 * Sanitise the given input to make it more Terminal-friendly by removing
213 * combining characters.
216 * the input to sanitise
217 * @param allowUnicode
218 * allow Unicode or only allow ASCII Latin characters
219 * @param removeAllAccents
220 * TRUE to replace all accentuated characters by their non
221 * accentuated counter-parts
223 * @return the sanitised {@link String}
225 static public String
sanitize(String input
, boolean allowUnicode
,
226 boolean removeAllAccents
) {
228 if (removeAllAccents
) {
229 input
= Normalizer
.normalize(input
, Form
.NFKD
);
231 input
= marks
.matcher(input
).replaceAll("");
235 input
= Normalizer
.normalize(input
, Form
.NFKC
);
238 StringBuilder builder
= new StringBuilder();
239 for (int index
= 0; index
< input
.length(); index
++) {
240 char car
= input
.charAt(index
);
241 // displayable chars in ASCII are in the range 32<->255,
243 if (car
>= 32 && car
<= 255 && car
!= 127) {
247 input
= builder
.toString();
254 * Convert between the time in milliseconds to a {@link String} in a "fixed"
255 * way (to exchange data over the wire, for instance).
257 * Precise to the second.
260 * the specified number of milliseconds since the standard base
261 * time known as "the epoch", namely January 1, 1970, 00:00:00
264 * @return the time as a {@link String}
266 static public String
fromTime(long time
) {
267 SimpleDateFormat sdf
= new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
268 return sdf
.format(new Date(time
));
272 * Convert between the time as a {@link String} to milliseconds in a "fixed"
273 * way (to exchange data over the wire, for instance).
275 * Precise to the second.
278 * the time as a {@link String}
280 * @return the number of milliseconds since the standard base time known as
281 * "the epoch", namely January 1, 1970, 00:00:00 GMT, or -1 in case
284 * @throws ParseException
285 * in case of parse error
287 static public long toTime(String displayTime
) throws ParseException
{
288 SimpleDateFormat sdf
= new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
289 return sdf
.parse(displayTime
).getTime();
293 * Return a hash of the given {@link String}.
300 static public String
getMd5Hash(String input
) {
302 MessageDigest md
= MessageDigest
.getInstance("MD5");
303 md
.update(input
.getBytes("UTF-8"));
304 byte byteData
[] = md
.digest();
306 StringBuffer hexString
= new StringBuffer();
307 for (int i
= 0; i
< byteData
.length
; i
++) {
308 String hex
= Integer
.toHexString(0xff & byteData
[i
]);
309 if (hex
.length() == 1)
310 hexString
.append('0');
311 hexString
.append(hex
);
314 return hexString
.toString();
315 } catch (NoSuchAlgorithmException e
) {
317 } catch (UnsupportedEncodingException e
) {
323 * Remove the HTML content from the given input, and un-html-ize the rest.
326 * the HTML-encoded content
328 * @return the HTML-free equivalent content
330 public static String
unhtml(String html
) {
331 StringBuilder builder
= new StringBuilder();
334 for (char car
: html
.toCharArray()) {
337 } else if (car
== '>') {
339 } else if (inTag
<= 0) {
344 char nbsp
= ' '; // non-breakable space (a special char)
346 return HtmlEscape
.unescapeHtml(builder
.toString()).replace(nbsp
, space
);
350 * Escape the given {@link String} so it can be used in XML, as content.
353 * the input {@link String}
355 * @return the escaped {@link String}
357 public static String
xmlEscape(String input
) {
362 return HtmlEscape
.escapeHtml(input
,
363 HtmlEscapeType
.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA
,
364 HtmlEscapeLevel
.LEVEL_1_ONLY_MARKUP_SIGNIFICANT
);
368 * Escape the given {@link String} so it can be used in XML, as text content
369 * inside double-quotes.
372 * the input {@link String}
374 * @return the escaped {@link String}
376 public static String
xmlEscapeQuote(String input
) {
381 return HtmlEscape
.escapeHtml(input
,
382 HtmlEscapeType
.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA
,
383 HtmlEscapeLevel
.LEVEL_1_ONLY_MARKUP_SIGNIFICANT
);
387 * Zip the data and then encode it into Base64.
392 * @return the Base64 zipped version
394 public static String
zip64(String data
) {
396 return Base64
.encodeBytes(data
.getBytes(), Base64
.GZIP
);
397 } catch (IOException e
) {
404 * Unconvert from Base64 then unzip the content.
407 * the data in Base64 format
409 * @return the raw data
411 * @throws IOException
412 * in case of I/O error
414 public static String
unzip64(String data
) throws IOException
{
415 ByteArrayInputStream in
= new ByteArrayInputStream(Base64
.decode(data
,
418 Scanner scan
= new Scanner(in
);
419 scan
.useDelimiter("\\A");
428 * The "remove accents" pattern.
430 * @return the pattern, or NULL if a problem happens
432 private static Pattern
getMarks() {
435 .compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
436 } catch (Exception e
) {
437 // Can fail on Android...