src/be/nikiroo/utils/StringUtils.java

   1 package be.nikiroo.utils;
   2
   3 import java.io.ByteArrayInputStream;
   4 import java.io.ByteArrayOutputStream;
   5 import java.io.IOException;
   6 import java.io.InputStream;
   7 import java.io.OutputStream;
   8 import java.io.UnsupportedEncodingException;
   9 import java.security.MessageDigest;
  10 import java.security.NoSuchAlgorithmException;
  11 import java.text.Normalizer;
  12 import java.text.Normalizer.Form;
  13 import java.text.ParseException;
  14 import java.text.SimpleDateFormat;
  15 import java.util.AbstractMap;
  16 import java.util.ArrayList;
  17 import java.util.Arrays;
  18 import java.util.Date;
  19 import java.util.List;
  20 import java.util.Map.Entry;
  21 import java.util.regex.Pattern;
  22 import java.util.zip.GZIPInputStream;
  23 import java.util.zip.GZIPOutputStream;
  24
  25 import org.unbescape.html.HtmlEscape;
  26 import org.unbescape.html.HtmlEscapeLevel;
  27 import org.unbescape.html.HtmlEscapeType;
  28
  29 import be.nikiroo.utils.streams.Base64InputStream;
  30 import be.nikiroo.utils.streams.Base64OutputStream;
  31
  32 /**
  33  * This class offer some utilities based around {@link String}s.
  34  *
  35  * @author niki
  36  */
  37 public class StringUtils {
  38         /**
  39          * This enum type will decide the alignment of a {@link String} when padding
  40          * or justification is applied (if there is enough horizontal space for it
  41          * to be aligned).
  42          */
  43         public enum Alignment {
  44                 /** Aligned at left. */
  45                 LEFT,
  46                 /** Centered. */
  47                 CENTER,
  48                 /** Aligned at right. */
  49                 RIGHT,
  50                 /** Full justified (to both left and right). */
  51                 JUSTIFY,
  52
  53                 // Old Deprecated values:
  54
  55                 /** DEPRECATED: please use LEFT. */
  56                 @Deprecated
  57                 Beginning,
  58                 /** DEPRECATED: please use CENTER. */
  59                 @Deprecated
  60                 Center,
  61                 /** DEPRECATED: please use RIGHT. */
  62                 @Deprecated
  63                 End;
  64
  65                 /**
  66                  * Return the non-deprecated version of this enum if needed (or return
  67                  * self if not).
  68                  *
  69                  * @return the non-deprecated value
  70                  */
  71                 Alignment undeprecate() {
  72                         if (this == Beginning)
  73                                 return LEFT;
  74                         if (this == Center)
  75                                 return CENTER;
  76                         if (this == End)
  77                                 return RIGHT;
  78                         return this;
  79                 }
  80         }
  81
  82         static private Pattern marks = getMarks();
  83
  84         /**
  85          * Fix the size of the given {@link String} either with space-padding or by
  86          * shortening it.
  87          *
  88          * @param text
  89          *            the {@link String} to fix
  90          * @param width
  91          *            the size of the resulting {@link String} or -1 for a noop
  92          *
  93          * @return the resulting {@link String} of size <i>size</i>
  94          */
  95         static public String padString(String text, int width) {
  96                 return padString(text, width, true, null);
  97         }
  98
  99         /**
 100          * Fix the size of the given {@link String} either with space-padding or by
 101          * optionally shortening it.
 102          *
 103          * @param text
 104          *            the {@link String} to fix
 105          * @param width
 106          *            the size of the resulting {@link String} if the text fits or
 107          *            if cut is TRUE or -1 for a noop
 108          * @param cut
 109          *            cut the {@link String} shorter if needed
 110          * @param align
 111          *            align the {@link String} in this position if we have enough
 112          *            space (default is Alignment.Beginning)
 113          *
 114          * @return the resulting {@link String} of size <i>size</i> minimum
 115          */
 116         static public String padString(String text, int width, boolean cut,
 117                         Alignment align) {
 118
 119                 if (align == null) {
 120                         align = Alignment.LEFT;
 121                 }
 122
 123                 align = align.undeprecate();
 124
 125                 if (width >= 0) {
 126                         if (text == null)
 127                                 text = "";
 128
 129                         int diff = width - text.length();
 130
 131                         if (diff < 0) {
 132                                 if (cut)
 133                                         text = text.substring(0, width);
 134                         } else if (diff > 0) {
 135                                 if (diff < 2 && align != Alignment.RIGHT)
 136                                         align = Alignment.LEFT;
 137
 138                                 switch (align) {
 139                                 case RIGHT:
 140                                         text = new String(new char[diff]).replace('\0', ' ') + text;
 141                                         break;
 142                                 case CENTER:
 143                                         int pad1 = (diff) / 2;
 144                                         int pad2 = (diff + 1) / 2;
 145                                         text = new String(new char[pad1]).replace('\0', ' ') + text
 146                                                         + new String(new char[pad2]).replace('\0', ' ');
 147                                         break;
 148                                 case LEFT:
 149                                 default:
 150                                         text = text + new String(new char[diff]).replace('\0', ' ');
 151                                         break;
 152                                 }
 153                         }
 154                 }
 155
 156                 return text;
 157         }
 158
 159         /**
 160          * Justify a text into width-sized (at the maximum) lines.
 161          *
 162          * @param text
 163          *            the {@link String} to justify
 164          * @param width
 165          *            the maximum size of the resulting lines
 166          *
 167          * @return a list of justified text lines
 168          */
 169         static public List<String> justifyText(String text, int width) {
 170                 return justifyText(text, width, null);
 171         }
 172
 173         /**
 174          * Justify a text into width-sized (at the maximum) lines.
 175          *
 176          * @param text
 177          *            the {@link String} to justify
 178          * @param width
 179          *            the maximum size of the resulting lines
 180          * @param align
 181          *            align the lines in this position (default is
 182          *            Alignment.Beginning)
 183          *
 184          * @return a list of justified text lines
 185          */
 186         static public List<String> justifyText(String text, int width,
 187                         Alignment align) {
 188                 if (align == null) {
 189                         align = Alignment.LEFT;
 190                 }
 191
 192                 align = align.undeprecate();
 193
 194                 switch (align) {
 195                 case CENTER:
 196                         return StringJustifier.center(text, width);
 197                 case RIGHT:
 198                         return StringJustifier.right(text, width);
 199                 case JUSTIFY:
 200                         return StringJustifier.full(text, width);
 201                 case LEFT:
 202                 default:
 203                         return StringJustifier.left(text, width);
 204                 }
 205         }
 206
 207         /**
 208          * Justify a text into width-sized (at the maximum) lines.
 209          *
 210          * @param text
 211          *            the {@link String} to justify
 212          * @param width
 213          *            the maximum size of the resulting lines
 214          *
 215          * @return a list of justified text lines
 216          */
 217         static public List<String> justifyText(List<String> text, int width) {
 218                 return justifyText(text, width, null);
 219         }
 220
 221         /**
 222          * Justify a text into width-sized (at the maximum) lines.
 223          *
 224          * @param text
 225          *            the {@link String} to justify
 226          * @param width
 227          *            the maximum size of the resulting lines
 228          * @param align
 229          *            align the lines in this position (default is
 230          *            Alignment.Beginning)
 231          *
 232          * @return a list of justified text lines
 233          */
 234         static public List<String> justifyText(List<String> text, int width,
 235                         Alignment align) {
 236                 List<String> result = new ArrayList<String>();
 237
 238                 // Content <-> Bullet spacing (null = no spacing)
 239                 List<Entry<String, String>> lines = new ArrayList<Entry<String, String>>();
 240                 StringBuilder previous = null;
 241                 StringBuilder tmp = new StringBuilder();
 242                 String previousItemBulletSpacing = null;
 243                 String itemBulletSpacing = null;
 244                 for (String inputLine : text) {
 245                         boolean previousLineComplete = true;
 246
 247                         String current = inputLine.replace("\t", "    ");
 248                         itemBulletSpacing = getItemSpacing(current);
 249                         boolean bullet = isItemLine(current);
 250                         if ((previousItemBulletSpacing == null || itemBulletSpacing
 251                                         .length() <= previousItemBulletSpacing.length()) && !bullet) {
 252                                 itemBulletSpacing = null;
 253                         }
 254
 255                         if (itemBulletSpacing != null) {
 256                                 current = current.trim();
 257                                 if (!current.isEmpty() && bullet) {
 258                                         current = current.substring(1);
 259                                 }
 260                                 current = current.trim();
 261                                 previousLineComplete = bullet;
 262                         } else {
 263                                 tmp.setLength(0);
 264                                 for (String word : current.split(" ")) {
 265                                         if (word.isEmpty()) {
 266                                                 continue;
 267                                         }
 268
 269                                         if (tmp.length() > 0) {
 270                                                 tmp.append(' ');
 271                                         }
 272                                         tmp.append(word.trim());
 273                                 }
 274                                 current = tmp.toString();
 275
 276                                 previousLineComplete = current.isEmpty()
 277                                                 || previousItemBulletSpacing != null
 278                                                 || (previous != null && isFullLine(previous))
 279                                                 || isHrLine(current) || isHrLine(previous);
 280                         }
 281
 282                         if (previous == null) {
 283                                 previous = new StringBuilder();
 284                         } else {
 285                                 if (previousLineComplete) {
 286                                         lines.add(new AbstractMap.SimpleEntry<String, String>(
 287                                                         previous.toString(), previousItemBulletSpacing));
 288                                         previous.setLength(0);
 289                                         previousItemBulletSpacing = itemBulletSpacing;
 290                                 } else {
 291                                         previous.append(' ');
 292                                 }
 293                         }
 294
 295                         previous.append(current);
 296
 297                 }
 298
 299                 if (previous != null) {
 300                         lines.add(new AbstractMap.SimpleEntry<String, String>(previous
 301                                         .toString(), previousItemBulletSpacing));
 302                 }
 303
 304                 for (Entry<String, String> line : lines) {
 305                         String content = line.getKey();
 306                         String spacing = line.getValue();
 307
 308                         String bullet = "- ";
 309                         if (spacing == null) {
 310                                 bullet = "";
 311                                 spacing = "";
 312                         }
 313
 314                         if (spacing.length() > width + 3) {
 315                                 spacing = "";
 316                         }
 317
 318                         for (String subline : StringUtils.justifyText(content, width
 319                                         - (spacing.length() + bullet.length()), align)) {
 320                                 result.add(spacing + bullet + subline);
 321                                 if (!bullet.isEmpty()) {
 322                                         bullet = "  ";
 323                                 }
 324                         }
 325                 }
 326
 327                 return result;
 328         }
 329
 330         /**
 331          * Sanitise the given input to make it more Terminal-friendly by removing
 332          * combining characters.
 333          *
 334          * @param input
 335          *            the input to sanitise
 336          * @param allowUnicode
 337          *            allow Unicode or only allow ASCII Latin characters
 338          *
 339          * @return the sanitised {@link String}
 340          */
 341         static public String sanitize(String input, boolean allowUnicode) {
 342                 return sanitize(input, allowUnicode, !allowUnicode);
 343         }
 344
 345         /**
 346          * Sanitise the given input to make it more Terminal-friendly by removing
 347          * combining characters.
 348          *
 349          * @param input
 350          *            the input to sanitise
 351          * @param allowUnicode
 352          *            allow Unicode or only allow ASCII Latin characters
 353          * @param removeAllAccents
 354          *            TRUE to replace all accentuated characters by their non
 355          *            accentuated counter-parts
 356          *
 357          * @return the sanitised {@link String}
 358          */
 359         static public String sanitize(String input, boolean allowUnicode,
 360                         boolean removeAllAccents) {
 361
 362                 if (removeAllAccents) {
 363                         input = Normalizer.normalize(input, Form.NFKD);
 364                         if (marks != null) {
 365                                 input = marks.matcher(input).replaceAll("");
 366                         }
 367                 }
 368
 369                 input = Normalizer.normalize(input, Form.NFKC);
 370
 371                 if (!allowUnicode) {
 372                         StringBuilder builder = new StringBuilder();
 373                         for (int index = 0; index < input.length(); index++) {
 374                                 char car = input.charAt(index);
 375                                 // displayable chars in ASCII are in the range 32<->255,
 376                                 // except DEL (127)
 377                                 if (car >= 32 && car <= 255 && car != 127) {
 378                                         builder.append(car);
 379                                 }
 380                         }
 381                         input = builder.toString();
 382                 }
 383
 384                 return input;
 385         }
 386
 387         /**
 388          * Convert between the time in milliseconds to a {@link String} in a "fixed"
 389          * way (to exchange data over the wire, for instance).
 390          * <p>
 391          * Precise to the second.
 392          *
 393          * @param time
 394          *            the specified number of milliseconds since the standard base
 395          *            time known as "the epoch", namely January 1, 1970, 00:00:00
 396          *            GMT
 397          *
 398          * @return the time as a {@link String}
 399          */
 400         static public String fromTime(long time) {
 401                 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
 402                 return sdf.format(new Date(time));
 403         }
 404
 405         /**
 406          * Convert between the time as a {@link String} to milliseconds in a "fixed"
 407          * way (to exchange data over the wire, for instance).
 408          * <p>
 409          * Precise to the second.
 410          *
 411          * @param displayTime
 412          *            the time as a {@link String}
 413          *
 414          * @return the number of milliseconds since the standard base time known as
 415          *         "the epoch", namely January 1, 1970, 00:00:00 GMT, or -1 in case
 416          *         of error
 417          *
 418          * @throws ParseException
 419          *             in case of parse error
 420          */
 421         static public long toTime(String displayTime) throws ParseException {
 422                 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
 423                 return sdf.parse(displayTime).getTime();
 424         }
 425
 426         /**
 427          * Return a hash of the given {@link String}.
 428          *
 429          * @param input
 430          *            the input data
 431          *
 432          * @return the hash
 433          */
 434         static public String getMd5Hash(String input) {
 435                 try {
 436                         MessageDigest md = MessageDigest.getInstance("MD5");
 437                         md.update(getBytes(input));
 438                         byte byteData[] = md.digest();
 439
 440                         StringBuffer hexString = new StringBuffer();
 441                         for (int i = 0; i < byteData.length; i++) {
 442                                 String hex = Integer.toHexString(0xff & byteData[i]);
 443                                 if (hex.length() == 1)
 444                                         hexString.append('0');
 445                                 hexString.append(hex);
 446                         }
 447
 448                         return hexString.toString();
 449                 } catch (NoSuchAlgorithmException e) {
 450                         return input;
 451                 }
 452         }
 453
 454         /**
 455          * Remove the HTML content from the given input, and un-html-ize the rest.
 456          *
 457          * @param html
 458          *            the HTML-encoded content
 459          *
 460          * @return the HTML-free equivalent content
 461          */
 462         public static String unhtml(String html) {
 463                 StringBuilder builder = new StringBuilder();
 464
 465                 int inTag = 0;
 466                 for (char car : html.toCharArray()) {
 467                         if (car == '<') {
 468                                 inTag++;
 469                         } else if (car == '>') {
 470                                 inTag--;
 471                         } else if (inTag <= 0) {
 472                                 builder.append(car);
 473                         }
 474                 }
 475
 476                 char nbsp = ' '; // non-breakable space (a special char)
 477                 char space = ' ';
 478                 return HtmlEscape.unescapeHtml(builder.toString()).replace(nbsp, space);
 479         }
 480
 481         /**
 482          * Escape the given {@link String} so it can be used in XML, as content.
 483          *
 484          * @param input
 485          *            the input {@link String}
 486          *
 487          * @return the escaped {@link String}
 488          */
 489         public static String xmlEscape(String input) {
 490                 if (input == null) {
 491                         return "";
 492                 }
 493
 494                 return HtmlEscape.escapeHtml(input,
 495                                 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
 496                                 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
 497         }
 498
 499         /**
 500          * Escape the given {@link String} so it can be used in XML, as text content
 501          * inside double-quotes.
 502          *
 503          * @param input
 504          *            the input {@link String}
 505          *
 506          * @return the escaped {@link String}
 507          */
 508         public static String xmlEscapeQuote(String input) {
 509                 if (input == null) {
 510                         return "";
 511                 }
 512
 513                 return HtmlEscape.escapeHtml(input,
 514                                 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
 515                                 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
 516         }
 517
 518         /**
 519          * Zip the data and then encode it into Base64.
 520          *
 521          * @param data
 522          *            the data
 523          *
 524          * @return the Base64 zipped version
 525          *
 526          * @throws IOException
 527          *             in case of I/O error
 528          */
 529         public static String zip64(String data) throws IOException {
 530                 try {
 531                         return zip64(getBytes(data));
 532                 } catch (UnsupportedEncodingException e) {
 533                         // All conforming JVM are required to support UTF-8
 534                         e.printStackTrace();
 535                         return null;
 536                 }
 537         }
 538
 539         /**
 540          * Zip the data and then encode it into Base64.
 541          *
 542          * @param data
 543          *            the data
 544          *
 545          * @return the Base64 zipped version
 546          *
 547          * @throws IOException
 548          *             in case of I/O error
 549          */
 550         public static String zip64(byte[] data) throws IOException {
 551                 // 1. compress
 552                 ByteArrayOutputStream bout = new ByteArrayOutputStream();
 553                 try {
 554                         OutputStream out = new GZIPOutputStream(bout);
 555                         try {
 556                                 out.write(data);
 557                         } finally {
 558                                 out.close();
 559                         }
 560                 } finally {
 561                         data = bout.toByteArray();
 562                         bout.close();
 563                 }
 564
 565                 // 2. base64
 566                 InputStream in = new ByteArrayInputStream(data);
 567                 try {
 568                         in = new Base64InputStream(in, true);
 569                         return new String(IOUtils.toByteArray(in), "UTF-8");
 570                 } finally {
 571                         in.close();
 572                 }
 573         }
 574
 575         /**
 576          * Unconvert from Base64 then unzip the content, which is assumed to be a
 577          * String.
 578          *
 579          * @param data
 580          *            the data in Base64 format
 581          *
 582          * @return the raw data
 583          *
 584          * @throws IOException
 585          *             in case of I/O error
 586          */
 587         public static String unzip64s(String data) throws IOException {
 588                 return new String(unzip64(data), "UTF-8");
 589         }
 590
 591         /**
 592          * Unconvert from Base64 then unzip the content.
 593          *
 594          * @param data
 595          *            the data in Base64 format
 596          *
 597          * @return the raw data
 598          *
 599          * @throws IOException
 600          *             in case of I/O error
 601          */
 602         public static byte[] unzip64(String data) throws IOException {
 603                 InputStream in = new Base64InputStream(new ByteArrayInputStream(
 604                                 getBytes(data)), false);
 605                 try {
 606                         in = new GZIPInputStream(in);
 607                         return IOUtils.toByteArray(in);
 608                 } finally {
 609                         in.close();
 610                 }
 611         }
 612
 613         /**
 614          * Convert the given data to Base64 format.
 615          *
 616          * @param data
 617          *            the data to convert
 618          *
 619          * @return the Base64 {@link String} representation of the data
 620          *
 621          * @throws IOException
 622          *             in case of I/O errors
 623          */
 624         public static String base64(String data) throws IOException {
 625                 return base64(getBytes(data));
 626         }
 627
 628         /**
 629          * Convert the given data to Base64 format.
 630          *
 631          * @param data
 632          *            the data to convert
 633          *
 634          * @return the Base64 {@link String} representation of the data
 635          *
 636          * @throws IOException
 637          *             in case of I/O errors
 638          */
 639         public static String base64(byte[] data) throws IOException {
 640                 Base64InputStream in = new Base64InputStream(new ByteArrayInputStream(
 641                                 data), true);
 642                 try {
 643                         return new String(IOUtils.toByteArray(in), "UTF-8");
 644                 } finally {
 645                         in.close();
 646                 }
 647         }
 648
 649         /**
 650          * Unconvert the given data from Base64 format back to a raw array of bytes.
 651          *
 652          * @param data
 653          *            the data to unconvert
 654          *
 655          * @return the raw data represented by the given Base64 {@link String},
 656          *
 657          * @throws IOException
 658          *             in case of I/O errors
 659          */
 660         public static byte[] unbase64(String data) throws IOException {
 661                 Base64InputStream in = new Base64InputStream(new ByteArrayInputStream(
 662                                 getBytes(data)), false);
 663                 try {
 664                         return IOUtils.toByteArray(in);
 665                 } finally {
 666                         in.close();
 667                 }
 668         }
 669
 670         /**
 671          * Unonvert the given data from Base64 format back to a {@link String}.
 672          *
 673          * @param data
 674          *            the data to unconvert
 675          *
 676          * @return the {@link String} represented by the given Base64 {@link String}
 677          *
 678          * @throws IOException
 679          *             in case of I/O errors
 680          */
 681         public static String unbase64s(String data) throws IOException {
 682                 return new String(unbase64(data), "UTF-8");
 683         }
 684
 685         /**
 686          * Return a display {@link String} for the given value, which can be
 687          * suffixed with "k" or "M" depending upon the number, if it is big enough.
 688          * <p>
 689          * <p>
 690          * Examples:
 691          * <ul>
 692          * <li><tt>8 765</tt> becomes "8k"</li>
 693          * <li><tt>998 765</tt> becomes "998k"</li>
 694          * <li><tt>12 987 364</tt> becomes "12M"</li>
 695          * <li><tt>5 534 333 221</tt> becomes "5G"</li>
 696          * </ul>
 697          *
 698          * @param value
 699          *            the value to convert
 700          *
 701          * @return the display value
 702          */
 703         public static String formatNumber(long value) {
 704                 return formatNumber(value, 0);
 705         }
 706
 707         /**
 708          * Return a display {@link String} for the given value, which can be
 709          * suffixed with "k" or "M" depending upon the number, if it is big enough.
 710          * <p>
 711          * Examples (assuming decimalPositions = 1):
 712          * <ul>
 713          * <li><tt>8 765</tt> becomes "8.7k"</li>
 714          * <li><tt>998 765</tt> becomes "998.7k"</li>
 715          * <li><tt>12 987 364</tt> becomes "12.9M"</li>
 716          * <li><tt>5 534 333 221</tt> becomes "5.5G"</li>
 717          * </ul>
 718          *
 719          * @param value
 720          *            the value to convert
 721          * @param decimalPositions
 722          *            the number of decimal positions to keep
 723          *
 724          * @return the display value
 725          */
 726         public static String formatNumber(long value, int decimalPositions) {
 727                 long userValue = value;
 728                 String suffix = "";
 729                 long mult = 1;
 730
 731                 if (value >= 1000000000l) {
 732                         mult = 1000000000l;
 733                         userValue = value / 1000000000l;
 734                         suffix = " G";
 735                 } else if (value >= 1000000l) {
 736                         mult = 1000000l;
 737                         userValue = value / 1000000l;
 738                         suffix = " M";
 739                 } else if (value >= 1000l) {
 740                         mult = 1000l;
 741                         userValue = value / 1000l;
 742                         suffix = " k";
 743                 }
 744
 745                 String deci = "";
 746                 if (decimalPositions > 0) {
 747                         deci = Long.toString(value % mult);
 748                         int size = Long.toString(mult).length() - 1;
 749                         while (deci.length() < size) {
 750                                 deci = "0" + deci;
 751                         }
 752
 753                         deci = deci.substring(0, Math.min(decimalPositions, deci.length()));
 754                         while (deci.length() < decimalPositions) {
 755                                 deci += "0";
 756                         }
 757
 758                         deci = "." + deci;
 759                 }
 760
 761                 return Long.toString(userValue) + deci + suffix;
 762         }
 763
 764         /**
 765          * The reverse operation to {@link StringUtils#formatNumber(long)}: it will
 766          * read a "display" number that can contain a "M" or "k" suffix and return
 767          * the full value.
 768          * <p>
 769          * Of course, the conversion to and from display form is lossy (example:
 770          * <tt>6870</tt> to "6.5k" to <tt>6500</tt>).
 771          *
 772          * @param value
 773          *            the value in display form with possible "M" and "k" suffixes,
 774          *            can be NULL
 775          *
 776          * @return the value as a number, or 0 if not possible to convert
 777          */
 778         public static long toNumber(String value) {
 779                 return toNumber(value, 0l);
 780         }
 781
 782         /**
 783          * The reverse operation to {@link StringUtils#formatNumber(long)}: it will
 784          * read a "display" number that can contain a "M" or "k" suffix and return
 785          * the full value.
 786          * <p>
 787          * Of course, the conversion to and from display form is lossy (example:
 788          * <tt>6870</tt> to "6.5k" to <tt>6500</tt>).
 789          *
 790          * @param value
 791          *            the value in display form with possible "M" and "k" suffixes,
 792          *            can be NULL
 793          * @param def
 794          *            the default value if it is not possible to convert the given
 795          *            value to a number
 796          *
 797          * @return the value as a number, or 0 if not possible to convert
 798          */
 799         public static long toNumber(String value, long def) {
 800                 long count = def;
 801                 if (value != null) {
 802                         value = value.trim().toLowerCase();
 803                         try {
 804                                 long mult = 1;
 805                                 if (value.endsWith("g")) {
 806                                         value = value.substring(0, value.length() - 1).trim();
 807                                         mult = 1000000000;
 808                                 } else if (value.endsWith("m")) {
 809                                         value = value.substring(0, value.length() - 1).trim();
 810                                         mult = 1000000;
 811                                 } else if (value.endsWith("k")) {
 812                                         value = value.substring(0, value.length() - 1).trim();
 813                                         mult = 1000;
 814                                 }
 815
 816                                 long deci = 0;
 817                                 if (value.contains(".")) {
 818                                         String[] tab = value.split("\\.");
 819                                         if (tab.length != 2) {
 820                                                 throw new NumberFormatException(value);
 821                                         }
 822                                         double decimal = Double.parseDouble("0."
 823                                                         + tab[tab.length - 1]);
 824                                         deci = ((long) (mult * decimal));
 825                                         value = tab[0];
 826                                 }
 827                                 count = mult * Long.parseLong(value) + deci;
 828                         } catch (Exception e) {
 829                         }
 830                 }
 831
 832                 return count;
 833         }
 834
 835         /**
 836          * Return the bytes array representation of the given {@link String} in
 837          * UTF-8.
 838          *
 839          * @param str
 840          *            the {@link String} to transform into bytes
 841          * @return the content in bytes
 842          */
 843         static public byte[] getBytes(String str) {
 844                 try {
 845                         return str.getBytes("UTF-8");
 846                 } catch (UnsupportedEncodingException e) {
 847                         // All conforming JVM must support UTF-8
 848                         e.printStackTrace();
 849                         return null;
 850                 }
 851         }
 852
 853         /**
 854          * The "remove accents" pattern.
 855          *
 856          * @return the pattern, or NULL if a problem happens
 857          */
 858         private static Pattern getMarks() {
 859                 try {
 860                         return Pattern
 861                                         .compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
 862                 } catch (Exception e) {
 863                         // Can fail on Android...
 864                         return null;
 865                 }
 866         }
 867
 868         //
 869         // justify List<String> related:
 870         //
 871
 872         /**
 873          * Check if this line ends as a complete line (ends with a "." or similar).
 874          * <p>
 875          * Note that we consider an empty line as full, and a line ending with
 876          * spaces as not complete.
 877          *
 878          * @param line
 879          *            the line to check
 880          *
 881          * @return TRUE if it does
 882          */
 883         static private boolean isFullLine(StringBuilder line) {
 884                 if (line.length() == 0) {
 885                         return true;
 886                 }
 887
 888                 char lastCar = line.charAt(line.length() - 1);
 889                 switch (lastCar) {
 890                 case '.': // points
 891                 case '?':
 892                 case '!':
 893
 894                 case '\'': // quotes
 895                 case '‘':
 896                 case '’':
 897
 898                 case '"': // double quotes
 899                 case '”':
 900                 case '“':
 901                 case '»':
 902                 case '«':
 903                         return true;
 904                 default:
 905                         return false;
 906                 }
 907         }
 908
 909         /**
 910          * Check if this line represent an item in a list or description (i.e.,
 911          * check that the first non-space char is "-").
 912          *
 913          * @param line
 914          *            the line to check
 915          *
 916          * @return TRUE if it is
 917          */
 918         static private boolean isItemLine(String line) {
 919                 String spacing = getItemSpacing(line);
 920                 return spacing != null && !spacing.isEmpty()
 921                                 && line.charAt(spacing.length()) == '-';
 922         }
 923
 924         /**
 925          * Return all the spaces that start this line (or Empty if none).
 926          *
 927          * @param line
 928          *            the line to get the starting spaces from
 929          *
 930          * @return the left spacing
 931          */
 932         static private String getItemSpacing(String line) {
 933                 int i;
 934                 for (i = 0; i < line.length(); i++) {
 935                         if (line.charAt(i) != ' ') {
 936                                 return line.substring(0, i);
 937                         }
 938                 }
 939
 940                 return "";
 941         }
 942
 943         /**
 944          * This line is an horizontal spacer line.
 945          *
 946          * @param line
 947          *            the line to test
 948          *
 949          * @return TRUE if it is
 950          */
 951         static private boolean isHrLine(CharSequence line) {
 952                 int count = 0;
 953                 if (line != null) {
 954                         for (int i = 0; i < line.length(); i++) {
 955                                 char car = line.charAt(i);
 956                                 if (car == ' ' || car == '\t' || car == '*' || car == '-'
 957                                                 || car == '_' || car == '~' || car == '=' || car == '/'
 958                                                 || car == '\\') {
 959                                         count++;
 960                                 } else {
 961                                         return false;
 962                                 }
 963                         }
 964                 }
 965
 966                 return count > 2;
 967         }
 968
 969         // Deprecated functions, please do not use //
 970
 971         /**
 972          * @deprecated please use {@link StringUtils#zip64(byte[])} or
 973          *             {@link StringUtils#base64(byte[])} instead.
 974          *
 975          * @param data
 976          *            the data to encode
 977          * @param zip
 978          *            TRUE to zip it before Base64 encoding it, FALSE for Base64
 979          *            encoding only
 980          *
 981          * @return the encoded data
 982          *
 983          * @throws IOException
 984          *             in case of I/O error
 985          */
 986         @Deprecated
 987         public static String base64(String data, boolean zip) throws IOException {
 988                 return base64(getBytes(data), zip);
 989         }
 990
 991         /**
 992          * @deprecated please use {@link StringUtils#zip64(String)} or
 993          *             {@link StringUtils#base64(String)} instead.
 994          *
 995          * @param data
 996          *            the data to encode
 997          * @param zip
 998          *            TRUE to zip it before Base64 encoding it, FALSE for Base64
 999          *            encoding only
1000          *
1001          * @return the encoded data
1002          *
1003          * @throws IOException
1004          *             in case of I/O error
1005          */
1006         @Deprecated
1007         public static String base64(byte[] data, boolean zip) throws IOException {
1008                 if (zip) {
1009                         return zip64(data);
1010                 }
1011
1012                 Base64InputStream b64 = new Base64InputStream(new ByteArrayInputStream(
1013                                 data), true);
1014                 try {
1015                         return IOUtils.readSmallStream(b64);
1016                 } finally {
1017                         b64.close();
1018                 }
1019         }
1020
1021         /**
1022          * @deprecated please use {@link Base64OutputStream} and
1023          *             {@link GZIPOutputStream} instead.
1024          *
1025          * @param breakLines
1026          *            NOT USED ANYMORE, it is always considered FALSE now
1027          */
1028         @Deprecated
1029         public static OutputStream base64(OutputStream data, boolean zip,
1030                         boolean breakLines) throws IOException {
1031                 OutputStream out = new Base64OutputStream(data);
1032                 if (zip) {
1033                         out = new java.util.zip.GZIPOutputStream(out);
1034                 }
1035
1036                 return out;
1037         }
1038
1039         /**
1040          * Unconvert the given data from Base64 format back to a raw array of bytes.
1041          * <p>
1042          * Will automatically detect zipped data and also uncompress it before
1043          * returning, unless ZIP is false.
1044          *
1045          * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped)
1046          *
1047          * @param data
1048          *            the data to unconvert
1049          * @param zip
1050          *            TRUE to also uncompress the data from a GZIP format
1051          *            automatically; if set to FALSE, zipped data can be returned
1052          *
1053          * @return the raw data represented by the given Base64 {@link String},
1054          *         optionally compressed with GZIP
1055          *
1056          * @throws IOException
1057          *             in case of I/O errors
1058          */
1059         @Deprecated
1060         public static byte[] unbase64(String data, boolean zip) throws IOException {
1061                 byte[] buffer = unbase64(data);
1062                 if (!zip) {
1063                         return buffer;
1064                 }
1065
1066                 try {
1067                         GZIPInputStream zipped = new GZIPInputStream(
1068                                         new ByteArrayInputStream(buffer));
1069                         try {
1070                                 ByteArrayOutputStream out = new ByteArrayOutputStream();
1071                                 try {
1072                                         IOUtils.write(zipped, out);
1073                                         return out.toByteArray();
1074                                 } finally {
1075                                         out.close();
1076                                 }
1077                         } finally {
1078                                 zipped.close();
1079                         }
1080                 } catch (Exception e) {
1081                         return buffer;
1082                 }
1083         }
1084
1085         /**
1086          * Unconvert the given data from Base64 format back to a raw array of bytes.
1087          * <p>
1088          * Will automatically detect zipped data and also uncompress it before
1089          * returning, unless ZIP is false.
1090          *
1091          * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped)
1092          *
1093          * @param data
1094          *            the data to unconvert
1095          * @param zip
1096          *            TRUE to also uncompress the data from a GZIP format
1097          *            automatically; if set to FALSE, zipped data can be returned
1098          *
1099          * @return the raw data represented by the given Base64 {@link String},
1100          *         optionally compressed with GZIP
1101          *
1102          * @throws IOException
1103          *             in case of I/O errors
1104          */
1105         @Deprecated
1106         public static InputStream unbase64(InputStream data, boolean zip)
1107                         throws IOException {
1108                 return new ByteArrayInputStream(unbase64(IOUtils.readSmallStream(data),
1109                                 zip));
1110         }
1111
1112         /**
1113          * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped)
1114          */
1115         @Deprecated
1116         public static byte[] unbase64(byte[] data, int offset, int count,
1117                         boolean zip) throws IOException {
1118                 byte[] dataPart = Arrays.copyOfRange(data, offset, offset + count);
1119                 return unbase64(new String(dataPart, "UTF-8"), zip);
1120         }
1121
1122         /**
1123          * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped)
1124          */
1125         @Deprecated
1126         public static String unbase64s(String data, boolean zip) throws IOException {
1127                 return new String(unbase64(data, zip), "UTF-8");
1128         }
1129
1130         /**
1131          * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped)
1132          */
1133         @Deprecated
1134         public static String unbase64s(byte[] data, int offset, int count,
1135                         boolean zip) throws IOException {
1136                 return new String(unbase64(data, offset, count, zip), "UTF-8");
1137         }
1138 }