src/be/nikiroo/utils/StringUtils.java

   1 package be.nikiroo.utils;
   2
   3 import java.io.ByteArrayInputStream;
   4 import java.io.IOException;
   5 import java.io.UnsupportedEncodingException;
   6 import java.security.MessageDigest;
   7 import java.security.NoSuchAlgorithmException;
   8 import java.text.Normalizer;
   9 import java.text.Normalizer.Form;
  10 import java.text.ParseException;
  11 import java.text.SimpleDateFormat;
  12 import java.util.Date;
  13 import java.util.Scanner;
  14 import java.util.regex.Pattern;
  15
  16 import org.unbescape.html.HtmlEscape;
  17 import org.unbescape.html.HtmlEscapeLevel;
  18 import org.unbescape.html.HtmlEscapeType;
  19
  20 /**
  21  * This class offer some utilities based around {@link String}s.
  22  *
  23  * @author niki
  24  */
  25 public class StringUtils {
  26         /**
  27          * This enum type will decide the alignment of a {@link String} when padding
  28          * is applied or if there is enough horizontal space for it to be aligned.
  29          */
  30         public enum Alignment {
  31                 /** Aligned at left. */
  32                 Beginning,
  33                 /** Centered. */
  34                 Center,
  35                 /** Aligned at right. */
  36                 End
  37         }
  38
  39         static private Pattern marks = Pattern
  40                         .compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
  41
  42         /**
  43          * Fix the size of the given {@link String} either with space-padding or by
  44          * shortening it.
  45          *
  46          * @param text
  47          *            the {@link String} to fix
  48          * @param width
  49          *            the size of the resulting {@link String} or -1 for a noop
  50          *
  51          * @return the resulting {@link String} of size <i>size</i>
  52          */
  53         static public String padString(String text, int width) {
  54                 return padString(text, width, true, null);
  55         }
  56
  57         /**
  58          * Fix the size of the given {@link String} either with space-padding or by
  59          * optionally shortening it.
  60          *
  61          * @param text
  62          *            the {@link String} to fix
  63          * @param width
  64          *            the size of the resulting {@link String} if the text fits or
  65          *            if cut is TRUE or -1 for a noop
  66          * @param cut
  67          *            cut the {@link String} shorter if needed
  68          * @param align
  69          *            align the {@link String} in this position if we have enough
  70          *            space (default is Alignment.Beginning)
  71          *
  72          * @return the resulting {@link String} of size <i>size</i> minimum
  73          */
  74         static public String padString(String text, int width, boolean cut,
  75                         Alignment align) {
  76
  77                 if (align == null) {
  78                         align = Alignment.Beginning;
  79                 }
  80
  81                 if (width >= 0) {
  82                         if (text == null)
  83                                 text = "";
  84
  85                         int diff = width - text.length();
  86
  87                         if (diff < 0) {
  88                                 if (cut)
  89                                         text = text.substring(0, width);
  90                         } else if (diff > 0) {
  91                                 if (diff < 2 && align != Alignment.End)
  92                                         align = Alignment.Beginning;
  93
  94                                 switch (align) {
  95                                 case Beginning:
  96                                         text = text + new String(new char[diff]).replace('\0', ' ');
  97                                         break;
  98                                 case End:
  99                                         text = new String(new char[diff]).replace('\0', ' ') + text;
 100                                         break;
 101                                 case Center:
 102                                 default:
 103                                         int pad1 = (diff) / 2;
 104                                         int pad2 = (diff + 1) / 2;
 105                                         text = new String(new char[pad1]).replace('\0', ' ') + text
 106                                                         + new String(new char[pad2]).replace('\0', ' ');
 107                                         break;
 108                                 }
 109                         }
 110                 }
 111
 112                 return text;
 113         }
 114
 115         /**
 116          * Sanitise the given input to make it more Terminal-friendly by removing
 117          * combining characters.
 118          *
 119          * @param input
 120          *            the input to sanitise
 121          * @param allowUnicode
 122          *            allow Unicode or only allow ASCII Latin characters
 123          *
 124          * @return the sanitised {@link String}
 125          */
 126         static public String sanitize(String input, boolean allowUnicode) {
 127                 return sanitize(input, allowUnicode, !allowUnicode);
 128         }
 129
 130         /**
 131          * Sanitise the given input to make it more Terminal-friendly by removing
 132          * combining characters.
 133          *
 134          * @param input
 135          *            the input to sanitise
 136          * @param allowUnicode
 137          *            allow Unicode or only allow ASCII Latin characters
 138          * @param removeAllAccents
 139          *            TRUE to replace all accentuated characters by their non
 140          *            accentuated counter-parts
 141          *
 142          * @return the sanitised {@link String}
 143          */
 144         static public String sanitize(String input, boolean allowUnicode,
 145                         boolean removeAllAccents) {
 146
 147                 if (removeAllAccents) {
 148                         input = Normalizer.normalize(input, Form.NFKD);
 149                         input = marks.matcher(input).replaceAll("");
 150                 }
 151
 152                 input = Normalizer.normalize(input, Form.NFKC);
 153
 154                 if (!allowUnicode) {
 155                         StringBuilder builder = new StringBuilder();
 156                         for (int index = 0; index < input.length(); index++) {
 157                                 char car = input.charAt(index);
 158                                 // displayable chars in ASCII are in the range 32<->255,
 159                                 // except DEL (127)
 160                                 if (car >= 32 && car <= 255 && car != 127) {
 161                                         builder.append(car);
 162                                 }
 163                         }
 164                         input = builder.toString();
 165                 }
 166
 167                 return input;
 168         }
 169
 170         /**
 171          * Convert between the time in milliseconds to a {@link String} in a "fixed"
 172          * way (to exchange data over the wire, for instance).
 173          * <p>
 174          * Precise to the second.
 175          *
 176          * @param time
 177          *            the specified number of milliseconds since the standard base
 178          *            time known as "the epoch", namely January 1, 1970, 00:00:00
 179          *            GMT
 180          *
 181          * @return the time as a {@link String}
 182          */
 183         static public String fromTime(long time) {
 184                 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
 185                 return sdf.format(new Date(time));
 186         }
 187
 188         /**
 189          * Convert between the time as a {@link String} to milliseconds in a "fixed"
 190          * way (to exchange data over the wire, for instance).
 191          * <p>
 192          * Precise to the second.
 193          *
 194          * @param displayTime
 195          *            the time as a {@link String}
 196          *
 197          * @return the number of milliseconds since the standard base time known as
 198          *         "the epoch", namely January 1, 1970, 00:00:00 GMT
 199          */
 200         static public long toTime(String displayTime) {
 201                 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
 202                 try {
 203                         return sdf.parse(displayTime).getTime();
 204                 } catch (ParseException e) {
 205                         return -1;
 206                 }
 207         }
 208
 209         /**
 210          * Return a hash of the given {@link String}.
 211          *
 212          * @param input
 213          *            the input data
 214          *
 215          * @return the hash
 216          */
 217         static public String getMd5Hash(String input) {
 218                 try {
 219                         MessageDigest md = MessageDigest.getInstance("MD5");
 220                         md.update(input.getBytes("UTF-8"));
 221                         byte byteData[] = md.digest();
 222
 223                         StringBuffer hexString = new StringBuffer();
 224                         for (int i = 0; i < byteData.length; i++) {
 225                                 String hex = Integer.toHexString(0xff & byteData[i]);
 226                                 if (hex.length() == 1)
 227                                         hexString.append('0');
 228                                 hexString.append(hex);
 229                         }
 230
 231                         return hexString.toString();
 232                 } catch (NoSuchAlgorithmException e) {
 233                         return input;
 234                 } catch (UnsupportedEncodingException e) {
 235                         return input;
 236                 }
 237         }
 238
 239         /**
 240          * Remove the HTML content from the given input, and un-html-ize the rest.
 241          *
 242          * @param html
 243          *            the HTML-encoded content
 244          *
 245          * @return the HTML-free equivalent content
 246          */
 247         public static String unhtml(String html) {
 248                 StringBuilder builder = new StringBuilder();
 249
 250                 int inTag = 0;
 251                 for (char car : html.toCharArray()) {
 252                         if (car == '<') {
 253                                 inTag++;
 254                         } else if (car == '>') {
 255                                 inTag--;
 256                         } else if (inTag <= 0) {
 257                                 builder.append(car);
 258                         }
 259                 }
 260
 261                 char nbsp = ' '; // non-breakable space (a special char)
 262                 char space = ' ';
 263                 return HtmlEscape.unescapeHtml(builder.toString()).replace(nbsp, space);
 264         }
 265
 266         /**
 267          * Escape the given {@link String} so it can be used in XML, as content.
 268          *
 269          * @param input
 270          *            the input {@link String}
 271          *
 272          * @return the escaped {@link String}
 273          */
 274         public static String xmlEscape(String input) {
 275                 if (input == null) {
 276                         return "";
 277                 }
 278
 279                 return HtmlEscape.escapeHtml(input,
 280                                 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
 281                                 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
 282         }
 283
 284         /**
 285          * Escape the given {@link String} so it can be used in XML, as text content
 286          * inside double-quotes.
 287          *
 288          * @param input
 289          *            the input {@link String}
 290          *
 291          * @return the escaped {@link String}
 292          */
 293         public static String xmlEscapeQuote(String input) {
 294                 if (input == null) {
 295                         return "";
 296                 }
 297
 298                 return HtmlEscape.escapeHtml(input,
 299                                 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
 300                                 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
 301         }
 302
 303         /**
 304          * Zip the data and then encode it into Base64.
 305          *
 306          * @param data
 307          *            the data
 308          *
 309          * @return the Base64 zipped version
 310          */
 311         public static String zip64(String data) {
 312                 try {
 313                         return Base64.encodeBytes(data.getBytes(), Base64.GZIP);
 314                 } catch (IOException e) {
 315                         e.printStackTrace();
 316                         return null;
 317                 }
 318         }
 319
 320         /**
 321          * Unconvert from Base64 then unzip the content.
 322          *
 323          * @param data
 324          *            the data in Base64 format
 325          *
 326          * @return the raw data
 327          *
 328          * @throws IOException
 329          *             in case of I/O error
 330          */
 331         public static String unzip64(String data) throws IOException {
 332                 ByteArrayInputStream in = new ByteArrayInputStream(Base64.decode(data,
 333                                 Base64.GZIP));
 334
 335                 Scanner scan = new Scanner(in);
 336                 scan.useDelimiter("\\A");
 337                 try {
 338                         return scan.next();
 339                 } finally {
 340                         scan.close();
 341                 }
 342         }
 343 }