src/be/nikiroo/utils/StringUtils.java

   1 package be.nikiroo.utils;
   2
   3 import java.io.ByteArrayInputStream;
   4 import java.io.ByteArrayOutputStream;
   5 import java.io.IOException;
   6 import java.io.InputStream;
   7 import java.io.OutputStream;
   8 import java.io.UnsupportedEncodingException;
   9 import java.security.MessageDigest;
  10 import java.security.NoSuchAlgorithmException;
  11 import java.text.Normalizer;
  12 import java.text.Normalizer.Form;
  13 import java.text.ParseException;
  14 import java.text.SimpleDateFormat;
  15 import java.util.AbstractMap;
  16 import java.util.ArrayList;
  17 import java.util.Arrays;
  18 import java.util.Date;
  19 import java.util.List;
  20 import java.util.Map.Entry;
  21 import java.util.regex.Pattern;
  22 import java.util.zip.GZIPInputStream;
  23 import java.util.zip.GZIPOutputStream;
  24
  25 import org.unbescape.html.HtmlEscape;
  26 import org.unbescape.html.HtmlEscapeLevel;
  27 import org.unbescape.html.HtmlEscapeType;
  28
  29 import be.nikiroo.utils.streams.Base64InputStream;
  30 import be.nikiroo.utils.streams.Base64OutputStream;
  31
  32 /**
  33  * This class offer some utilities based around {@link String}s.
  34  *
  35  * @author niki
  36  */
  37 public class StringUtils {
  38         /**
  39          * This enum type will decide the alignment of a {@link String} when padding
  40          * or justification is applied (if there is enough horizontal space for it
  41          * to be aligned).
  42          */
  43         public enum Alignment {
  44                 /** Aligned at left. */
  45                 LEFT,
  46                 /** Centered. */
  47                 CENTER,
  48                 /** Aligned at right. */
  49                 RIGHT,
  50                 /** Full justified (to both left and right). */
  51                 JUSTIFY,
  52
  53                 // Old Deprecated values:
  54
  55                 /** DEPRECATED: please use LEFT. */
  56                 @Deprecated
  57                 Beginning,
  58                 /** DEPRECATED: please use CENTER. */
  59                 @Deprecated
  60                 Center,
  61                 /** DEPRECATED: please use RIGHT. */
  62                 @Deprecated
  63                 End;
  64
  65                 /**
  66                  * Return the non-deprecated version of this enum if needed (or return
  67                  * self if not).
  68                  *
  69                  * @return the non-deprecated value
  70                  */
  71                 Alignment undeprecate() {
  72                         if (this == Beginning)
  73                                 return LEFT;
  74                         if (this == Center)
  75                                 return CENTER;
  76                         if (this == End)
  77                                 return RIGHT;
  78                         return this;
  79                 }
  80         }
  81
  82         static private Pattern marks = getMarks();
  83
  84         /**
  85          * Fix the size of the given {@link String} either with space-padding or by
  86          * shortening it.
  87          *
  88          * @param text
  89          *            the {@link String} to fix
  90          * @param width
  91          *            the size of the resulting {@link String} or -1 for a noop
  92          *
  93          * @return the resulting {@link String} of size <i>size</i>
  94          */
  95         static public String padString(String text, int width) {
  96                 return padString(text, width, true, null);
  97         }
  98
  99         /**
 100          * Fix the size of the given {@link String} either with space-padding or by
 101          * optionally shortening it.
 102          *
 103          * @param text
 104          *            the {@link String} to fix
 105          * @param width
 106          *            the size of the resulting {@link String} if the text fits or
 107          *            if cut is TRUE or -1 for a noop
 108          * @param cut
 109          *            cut the {@link String} shorter if needed
 110          * @param align
 111          *            align the {@link String} in this position if we have enough
 112          *            space (default is Alignment.Beginning)
 113          *
 114          * @return the resulting {@link String} of size <i>size</i> minimum
 115          */
 116         static public String padString(String text, int width, boolean cut,
 117                         Alignment align) {
 118
 119                 if (align == null) {
 120                         align = Alignment.LEFT;
 121                 }
 122
 123                 align = align.undeprecate();
 124
 125                 if (width >= 0) {
 126                         if (text == null)
 127                                 text = "";
 128
 129                         int diff = width - text.length();
 130
 131                         if (diff < 0) {
 132                                 if (cut)
 133                                         text = text.substring(0, width);
 134                         } else if (diff > 0) {
 135                                 if (diff < 2 && align != Alignment.RIGHT)
 136                                         align = Alignment.LEFT;
 137
 138                                 switch (align) {
 139                                 case RIGHT:
 140                                         text = new String(new char[diff]).replace('\0', ' ') + text;
 141                                         break;
 142                                 case CENTER:
 143                                         int pad1 = (diff) / 2;
 144                                         int pad2 = (diff + 1) / 2;
 145                                         text = new String(new char[pad1]).replace('\0', ' ') + text
 146                                                         + new String(new char[pad2]).replace('\0', ' ');
 147                                         break;
 148                                 case LEFT:
 149                                 default:
 150                                         text = text + new String(new char[diff]).replace('\0', ' ');
 151                                         break;
 152                                 }
 153                         }
 154                 }
 155
 156                 return text;
 157         }
 158
 159         /**
 160          * Justify a text into width-sized (at the maximum) lines and return all the
 161          * lines concatenated into a single '\\n'-separated line of text.
 162          *
 163          * @param text
 164          *            the {@link String} to justify
 165          * @param width
 166          *            the maximum size of the resulting lines
 167          *
 168          * @return a list of justified text lines concatenated into a single
 169          *         '\\n'-separated line of text
 170          */
 171         static public String justifyTexts(String text, int width) {
 172                 StringBuilder builder = new StringBuilder();
 173                 for (String line : justifyText(text, width, null)) {
 174                         if (builder.length() > 0) {
 175                                 builder.append('\n');
 176                         }
 177                         builder.append(line);
 178                 }
 179
 180                 return builder.toString();
 181         }
 182
 183         /**
 184          * Justify a text into width-sized (at the maximum) lines.
 185          *
 186          * @param text
 187          *            the {@link String} to justify
 188          * @param width
 189          *            the maximum size of the resulting lines
 190          *
 191          * @return a list of justified text lines
 192          */
 193         static public List<String> justifyText(String text, int width) {
 194                 return justifyText(text, width, null);
 195         }
 196
 197         /**
 198          * Justify a text into width-sized (at the maximum) lines.
 199          *
 200          * @param text
 201          *            the {@link String} to justify
 202          * @param width
 203          *            the maximum size of the resulting lines
 204          * @param align
 205          *            align the lines in this position (default is
 206          *            Alignment.Beginning)
 207          *
 208          * @return a list of justified text lines
 209          */
 210         static public List<String> justifyText(String text, int width,
 211                         Alignment align) {
 212                 if (align == null) {
 213                         align = Alignment.LEFT;
 214                 }
 215
 216                 align = align.undeprecate();
 217
 218                 switch (align) {
 219                 case CENTER:
 220                         return StringJustifier.center(text, width);
 221                 case RIGHT:
 222                         return StringJustifier.right(text, width);
 223                 case JUSTIFY:
 224                         return StringJustifier.full(text, width);
 225                 case LEFT:
 226                 default:
 227                         return StringJustifier.left(text, width);
 228                 }
 229         }
 230
 231         /**
 232          * Justify a text into width-sized (at the maximum) lines.
 233          *
 234          * @param text
 235          *            the {@link String} to justify
 236          * @param width
 237          *            the maximum size of the resulting lines
 238          *
 239          * @return a list of justified text lines
 240          */
 241         static public List<String> justifyText(List<String> text, int width) {
 242                 return justifyText(text, width, null);
 243         }
 244
 245         /**
 246          * Justify a text into width-sized (at the maximum) lines.
 247          *
 248          * @param text
 249          *            the {@link String} to justify
 250          * @param width
 251          *            the maximum size of the resulting lines
 252          * @param align
 253          *            align the lines in this position (default is
 254          *            Alignment.Beginning)
 255          *
 256          * @return a list of justified text lines
 257          */
 258         static public List<String> justifyText(List<String> text, int width,
 259                         Alignment align) {
 260                 List<String> result = new ArrayList<String>();
 261
 262                 // Content <-> Bullet spacing (null = no spacing)
 263                 List<Entry<String, String>> lines = new ArrayList<Entry<String, String>>();
 264                 StringBuilder previous = null;
 265                 StringBuilder tmp = new StringBuilder();
 266                 String previousItemBulletSpacing = null;
 267                 String itemBulletSpacing = null;
 268                 for (String inputLine : text) {
 269                         boolean previousLineComplete = true;
 270
 271                         String current = inputLine.replace("\t", "    ");
 272                         itemBulletSpacing = getItemSpacing(current);
 273                         boolean bullet = isItemLine(current);
 274                         if ((previousItemBulletSpacing == null || itemBulletSpacing
 275                                         .length() <= previousItemBulletSpacing.length()) && !bullet) {
 276                                 itemBulletSpacing = null;
 277                         }
 278
 279                         if (itemBulletSpacing != null) {
 280                                 current = current.trim();
 281                                 if (!current.isEmpty() && bullet) {
 282                                         current = current.substring(1);
 283                                 }
 284                                 current = current.trim();
 285                                 previousLineComplete = bullet;
 286                         } else {
 287                                 tmp.setLength(0);
 288                                 for (String word : current.split(" ")) {
 289                                         if (word.isEmpty()) {
 290                                                 continue;
 291                                         }
 292
 293                                         if (tmp.length() > 0) {
 294                                                 tmp.append(' ');
 295                                         }
 296                                         tmp.append(word.trim());
 297                                 }
 298                                 current = tmp.toString();
 299
 300                                 previousLineComplete = current.isEmpty()
 301                                                 || previousItemBulletSpacing != null
 302                                                 || (previous != null && isFullLine(previous))
 303                                                 || isHrLine(current) || isHrLine(previous);
 304                         }
 305
 306                         if (previous == null) {
 307                                 previous = new StringBuilder();
 308                         } else {
 309                                 if (previousLineComplete) {
 310                                         lines.add(new AbstractMap.SimpleEntry<String, String>(
 311                                                         previous.toString(), previousItemBulletSpacing));
 312                                         previous.setLength(0);
 313                                         previousItemBulletSpacing = itemBulletSpacing;
 314                                 } else {
 315                                         previous.append(' ');
 316                                 }
 317                         }
 318
 319                         previous.append(current);
 320
 321                 }
 322
 323                 if (previous != null) {
 324                         lines.add(new AbstractMap.SimpleEntry<String, String>(previous
 325                                         .toString(), previousItemBulletSpacing));
 326                 }
 327
 328                 for (Entry<String, String> line : lines) {
 329                         String content = line.getKey();
 330                         String spacing = line.getValue();
 331
 332                         String bullet = "- ";
 333                         if (spacing == null) {
 334                                 bullet = "";
 335                                 spacing = "";
 336                         }
 337
 338                         if (spacing.length() > width + 3) {
 339                                 spacing = "";
 340                         }
 341
 342                         for (String subline : StringUtils.justifyText(content, width
 343                                         - (spacing.length() + bullet.length()), align)) {
 344                                 result.add(spacing + bullet + subline);
 345                                 if (!bullet.isEmpty()) {
 346                                         bullet = "  ";
 347                                 }
 348                         }
 349                 }
 350
 351                 return result;
 352         }
 353
 354         /**
 355          * Sanitise the given input to make it more Terminal-friendly by removing
 356          * combining characters.
 357          *
 358          * @param input
 359          *            the input to sanitise
 360          * @param allowUnicode
 361          *            allow Unicode or only allow ASCII Latin characters
 362          *
 363          * @return the sanitised {@link String}
 364          */
 365         static public String sanitize(String input, boolean allowUnicode) {
 366                 return sanitize(input, allowUnicode, !allowUnicode);
 367         }
 368
 369         /**
 370          * Sanitise the given input to make it more Terminal-friendly by removing
 371          * combining characters.
 372          *
 373          * @param input
 374          *            the input to sanitise
 375          * @param allowUnicode
 376          *            allow Unicode or only allow ASCII Latin characters
 377          * @param removeAllAccents
 378          *            TRUE to replace all accentuated characters by their non
 379          *            accentuated counter-parts
 380          *
 381          * @return the sanitised {@link String}
 382          */
 383         static public String sanitize(String input, boolean allowUnicode,
 384                         boolean removeAllAccents) {
 385
 386                 if (removeAllAccents) {
 387                         input = Normalizer.normalize(input, Form.NFKD);
 388                         if (marks != null) {
 389                                 input = marks.matcher(input).replaceAll("");
 390                         }
 391                 }
 392
 393                 input = Normalizer.normalize(input, Form.NFKC);
 394
 395                 if (!allowUnicode) {
 396                         StringBuilder builder = new StringBuilder();
 397                         for (int index = 0; index < input.length(); index++) {
 398                                 char car = input.charAt(index);
 399                                 // displayable chars in ASCII are in the range 32<->255,
 400                                 // except DEL (127)
 401                                 if (car >= 32 && car <= 255 && car != 127) {
 402                                         builder.append(car);
 403                                 }
 404                         }
 405                         input = builder.toString();
 406                 }
 407
 408                 return input;
 409         }
 410
 411         /**
 412          * Convert between the time in milliseconds to a {@link String} in a "fixed"
 413          * way (to exchange data over the wire, for instance).
 414          * <p>
 415          * Precise to the second.
 416          *
 417          * @param time
 418          *            the specified number of milliseconds since the standard base
 419          *            time known as "the epoch", namely January 1, 1970, 00:00:00
 420          *            GMT
 421          *
 422          * @return the time as a {@link String}
 423          */
 424         static public String fromTime(long time) {
 425                 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
 426                 return sdf.format(new Date(time));
 427         }
 428
 429         /**
 430          * Convert between the time as a {@link String} to milliseconds in a "fixed"
 431          * way (to exchange data over the wire, for instance).
 432          * <p>
 433          * Precise to the second.
 434          *
 435          * @param displayTime
 436          *            the time as a {@link String}
 437          *
 438          * @return the number of milliseconds since the standard base time known as
 439          *         "the epoch", namely January 1, 1970, 00:00:00 GMT, or -1 in case
 440          *         of error
 441          *
 442          * @throws ParseException
 443          *             in case of parse error
 444          */
 445         static public long toTime(String displayTime) throws ParseException {
 446                 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
 447                 return sdf.parse(displayTime).getTime();
 448         }
 449
 450         /**
 451          * Return a hash of the given {@link String}.
 452          *
 453          * @param input
 454          *            the input data
 455          *
 456          * @return the hash
 457          */
 458         static public String getMd5Hash(String input) {
 459                 try {
 460                         MessageDigest md = MessageDigest.getInstance("MD5");
 461                         md.update(getBytes(input));
 462                         byte byteData[] = md.digest();
 463
 464                         StringBuffer hexString = new StringBuffer();
 465                         for (int i = 0; i < byteData.length; i++) {
 466                                 String hex = Integer.toHexString(0xff & byteData[i]);
 467                                 if (hex.length() == 1)
 468                                         hexString.append('0');
 469                                 hexString.append(hex);
 470                         }
 471
 472                         return hexString.toString();
 473                 } catch (NoSuchAlgorithmException e) {
 474                         return input;
 475                 }
 476         }
 477
 478         /**
 479          * Remove the HTML content from the given input, and un-html-ize the rest.
 480          *
 481          * @param html
 482          *            the HTML-encoded content
 483          *
 484          * @return the HTML-free equivalent content
 485          */
 486         public static String unhtml(String html) {
 487                 StringBuilder builder = new StringBuilder();
 488
 489                 int inTag = 0;
 490                 for (char car : html.toCharArray()) {
 491                         if (car == '<') {
 492                                 inTag++;
 493                         } else if (car == '>') {
 494                                 inTag--;
 495                         } else if (inTag <= 0) {
 496                                 builder.append(car);
 497                         }
 498                 }
 499
 500                 char nbsp = ' '; // non-breakable space (a special char)
 501                 char space = ' ';
 502                 return HtmlEscape.unescapeHtml(builder.toString()).replace(nbsp, space);
 503         }
 504
 505         /**
 506          * Escape the given {@link String} so it can be used in XML, as content.
 507          *
 508          * @param input
 509          *            the input {@link String}
 510          *
 511          * @return the escaped {@link String}
 512          */
 513         public static String xmlEscape(String input) {
 514                 if (input == null) {
 515                         return "";
 516                 }
 517
 518                 return HtmlEscape.escapeHtml(input,
 519                                 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
 520                                 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
 521         }
 522
 523         /**
 524          * Escape the given {@link String} so it can be used in XML, as text content
 525          * inside double-quotes.
 526          *
 527          * @param input
 528          *            the input {@link String}
 529          *
 530          * @return the escaped {@link String}
 531          */
 532         public static String xmlEscapeQuote(String input) {
 533                 if (input == null) {
 534                         return "";
 535                 }
 536
 537                 return HtmlEscape.escapeHtml(input,
 538                                 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
 539                                 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
 540         }
 541
 542         /**
 543          * Zip the data and then encode it into Base64.
 544          *
 545          * @param data
 546          *            the data
 547          *
 548          * @return the Base64 zipped version
 549          *
 550          * @throws IOException
 551          *             in case of I/O error
 552          */
 553         public static String zip64(String data) throws IOException {
 554                 try {
 555                         return zip64(getBytes(data));
 556                 } catch (UnsupportedEncodingException e) {
 557                         // All conforming JVM are required to support UTF-8
 558                         e.printStackTrace();
 559                         return null;
 560                 }
 561         }
 562
 563         /**
 564          * Zip the data and then encode it into Base64.
 565          *
 566          * @param data
 567          *            the data
 568          *
 569          * @return the Base64 zipped version
 570          *
 571          * @throws IOException
 572          *             in case of I/O error
 573          */
 574         public static String zip64(byte[] data) throws IOException {
 575                 // 1. compress
 576                 ByteArrayOutputStream bout = new ByteArrayOutputStream();
 577                 try {
 578                         OutputStream out = new GZIPOutputStream(bout);
 579                         try {
 580                                 out.write(data);
 581                         } finally {
 582                                 out.close();
 583                         }
 584                 } finally {
 585                         data = bout.toByteArray();
 586                         bout.close();
 587                 }
 588
 589                 // 2. base64
 590                 InputStream in = new ByteArrayInputStream(data);
 591                 try {
 592                         in = new Base64InputStream(in, true);
 593                         return new String(IOUtils.toByteArray(in), "UTF-8");
 594                 } finally {
 595                         in.close();
 596                 }
 597         }
 598
 599         /**
 600          * Unconvert from Base64 then unzip the content, which is assumed to be a
 601          * String.
 602          *
 603          * @param data
 604          *            the data in Base64 format
 605          *
 606          * @return the raw data
 607          *
 608          * @throws IOException
 609          *             in case of I/O error
 610          */
 611         public static String unzip64s(String data) throws IOException {
 612                 return new String(unzip64(data), "UTF-8");
 613         }
 614
 615         /**
 616          * Unconvert from Base64 then unzip the content.
 617          *
 618          * @param data
 619          *            the data in Base64 format
 620          *
 621          * @return the raw data
 622          *
 623          * @throws IOException
 624          *             in case of I/O error
 625          */
 626         public static byte[] unzip64(String data) throws IOException {
 627                 InputStream in = new Base64InputStream(new ByteArrayInputStream(
 628                                 getBytes(data)), false);
 629                 try {
 630                         in = new GZIPInputStream(in);
 631                         return IOUtils.toByteArray(in);
 632                 } finally {
 633                         in.close();
 634                 }
 635         }
 636
 637         /**
 638          * Convert the given data to Base64 format.
 639          *
 640          * @param data
 641          *            the data to convert
 642          *
 643          * @return the Base64 {@link String} representation of the data
 644          *
 645          * @throws IOException
 646          *             in case of I/O errors
 647          */
 648         public static String base64(String data) throws IOException {
 649                 return base64(getBytes(data));
 650         }
 651
 652         /**
 653          * Convert the given data to Base64 format.
 654          *
 655          * @param data
 656          *            the data to convert
 657          *
 658          * @return the Base64 {@link String} representation of the data
 659          *
 660          * @throws IOException
 661          *             in case of I/O errors
 662          */
 663         public static String base64(byte[] data) throws IOException {
 664                 Base64InputStream in = new Base64InputStream(new ByteArrayInputStream(
 665                                 data), true);
 666                 try {
 667                         return new String(IOUtils.toByteArray(in), "UTF-8");
 668                 } finally {
 669                         in.close();
 670                 }
 671         }
 672
 673         /**
 674          * Unconvert the given data from Base64 format back to a raw array of bytes.
 675          *
 676          * @param data
 677          *            the data to unconvert
 678          *
 679          * @return the raw data represented by the given Base64 {@link String},
 680          *
 681          * @throws IOException
 682          *             in case of I/O errors
 683          */
 684         public static byte[] unbase64(String data) throws IOException {
 685                 Base64InputStream in = new Base64InputStream(new ByteArrayInputStream(
 686                                 getBytes(data)), false);
 687                 try {
 688                         return IOUtils.toByteArray(in);
 689                 } finally {
 690                         in.close();
 691                 }
 692         }
 693
 694         /**
 695          * Unonvert the given data from Base64 format back to a {@link String}.
 696          *
 697          * @param data
 698          *            the data to unconvert
 699          *
 700          * @return the {@link String} represented by the given Base64 {@link String}
 701          *
 702          * @throws IOException
 703          *             in case of I/O errors
 704          */
 705         public static String unbase64s(String data) throws IOException {
 706                 return new String(unbase64(data), "UTF-8");
 707         }
 708
 709         /**
 710          * Return a display {@link String} for the given value, which can be
 711          * suffixed with "k" or "M" depending upon the number, if it is big enough.
 712          * <p>
 713          * <p>
 714          * Examples:
 715          * <ul>
 716          * <li><tt>8 765</tt> becomes "8 k"</li>
 717          * <li><tt>998 765</tt> becomes "998 k"</li>
 718          * <li><tt>12 987 364</tt> becomes "12 M"</li>
 719          * <li><tt>5 534 333 221</tt> becomes "5 G"</li>
 720          * </ul>
 721          *
 722          * @param value
 723          *            the value to convert
 724          *
 725          * @return the display value
 726          */
 727         public static String formatNumber(long value) {
 728                 return formatNumber(value, 0);
 729         }
 730
 731         /**
 732          * Return a display {@link String} for the given value, which can be
 733          * suffixed with "k" or "M" depending upon the number, if it is big enough.
 734          * <p>
 735          * Examples (assuming decimalPositions = 1):
 736          * <ul>
 737          * <li><tt>8 765</tt> becomes "8.7 k"</li>
 738          * <li><tt>998 765</tt> becomes "998.7 k"</li>
 739          * <li><tt>12 987 364</tt> becomes "12.9 M"</li>
 740          * <li><tt>5 534 333 221</tt> becomes "5.5 G"</li>
 741          * </ul>
 742          *
 743          * @param value
 744          *            the value to convert
 745          * @param decimalPositions
 746          *            the number of decimal positions to keep
 747          *
 748          * @return the display value
 749          */
 750         public static String formatNumber(long value, int decimalPositions) {
 751                 long userValue = value;
 752                 String suffix = " ";
 753                 long mult = 1;
 754
 755                 if (value >= 1000000000l) {
 756                         mult = 1000000000l;
 757                         userValue = value / 1000000000l;
 758                         suffix = " G";
 759                 } else if (value >= 1000000l) {
 760                         mult = 1000000l;
 761                         userValue = value / 1000000l;
 762                         suffix = " M";
 763                 } else if (value >= 1000l) {
 764                         mult = 1000l;
 765                         userValue = value / 1000l;
 766                         suffix = " k";
 767                 }
 768
 769                 String deci = "";
 770                 if (decimalPositions > 0) {
 771                         deci = Long.toString(value % mult);
 772                         int size = Long.toString(mult).length() - 1;
 773                         while (deci.length() < size) {
 774                                 deci = "0" + deci;
 775                         }
 776
 777                         deci = deci.substring(0, Math.min(decimalPositions, deci.length()));
 778                         while (deci.length() < decimalPositions) {
 779                                 deci += "0";
 780                         }
 781
 782                         deci = "." + deci;
 783                 }
 784
 785                 return Long.toString(userValue) + deci + suffix;
 786         }
 787
 788         /**
 789          * The reverse operation to {@link StringUtils#formatNumber(long)}: it will
 790          * read a "display" number that can contain a "M" or "k" suffix and return
 791          * the full value.
 792          * <p>
 793          * Of course, the conversion to and from display form is lossy (example:
 794          * <tt>6870</tt> to "6.5k" to <tt>6500</tt>).
 795          *
 796          * @param value
 797          *            the value in display form with possible "M" and "k" suffixes,
 798          *            can be NULL
 799          *
 800          * @return the value as a number, or 0 if not possible to convert
 801          */
 802         public static long toNumber(String value) {
 803                 return toNumber(value, 0l);
 804         }
 805
 806         /**
 807          * The reverse operation to {@link StringUtils#formatNumber(long)}: it will
 808          * read a "display" number that can contain a "M" or "k" suffix and return
 809          * the full value.
 810          * <p>
 811          * Of course, the conversion to and from display form is lossy (example:
 812          * <tt>6870</tt> to "6.5k" to <tt>6500</tt>).
 813          *
 814          * @param value
 815          *            the value in display form with possible "M" and "k" suffixes,
 816          *            can be NULL
 817          * @param def
 818          *            the default value if it is not possible to convert the given
 819          *            value to a number
 820          *
 821          * @return the value as a number, or 0 if not possible to convert
 822          */
 823         public static long toNumber(String value, long def) {
 824                 long count = def;
 825                 if (value != null) {
 826                         value = value.trim().toLowerCase();
 827                         try {
 828                                 long mult = 1;
 829                                 if (value.endsWith("g")) {
 830                                         value = value.substring(0, value.length() - 1).trim();
 831                                         mult = 1000000000;
 832                                 } else if (value.endsWith("m")) {
 833                                         value = value.substring(0, value.length() - 1).trim();
 834                                         mult = 1000000;
 835                                 } else if (value.endsWith("k")) {
 836                                         value = value.substring(0, value.length() - 1).trim();
 837                                         mult = 1000;
 838                                 }
 839
 840                                 long deci = 0;
 841                                 if (value.contains(".")) {
 842                                         String[] tab = value.split("\\.");
 843                                         if (tab.length != 2) {
 844                                                 throw new NumberFormatException(value);
 845                                         }
 846                                         double decimal = Double.parseDouble("0."
 847                                                         + tab[tab.length - 1]);
 848                                         deci = ((long) (mult * decimal));
 849                                         value = tab[0];
 850                                 }
 851                                 count = mult * Long.parseLong(value) + deci;
 852                         } catch (Exception e) {
 853                         }
 854                 }
 855
 856                 return count;
 857         }
 858
 859         /**
 860          * Return the bytes array representation of the given {@link String} in
 861          * UTF-8.
 862          *
 863          * @param str
 864          *            the {@link String} to transform into bytes
 865          * @return the content in bytes
 866          */
 867         static public byte[] getBytes(String str) {
 868                 try {
 869                         return str.getBytes("UTF-8");
 870                 } catch (UnsupportedEncodingException e) {
 871                         // All conforming JVM must support UTF-8
 872                         e.printStackTrace();
 873                         return null;
 874                 }
 875         }
 876
 877         /**
 878          * The "remove accents" pattern.
 879          *
 880          * @return the pattern, or NULL if a problem happens
 881          */
 882         private static Pattern getMarks() {
 883                 try {
 884                         return Pattern
 885                                         .compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
 886                 } catch (Exception e) {
 887                         // Can fail on Android...
 888                         return null;
 889                 }
 890         }
 891
 892         //
 893         // justify List<String> related:
 894         //
 895
 896         /**
 897          * Check if this line ends as a complete line (ends with a "." or similar).
 898          * <p>
 899          * Note that we consider an empty line as full, and a line ending with
 900          * spaces as not complete.
 901          *
 902          * @param line
 903          *            the line to check
 904          *
 905          * @return TRUE if it does
 906          */
 907         static private boolean isFullLine(StringBuilder line) {
 908                 if (line.length() == 0) {
 909                         return true;
 910                 }
 911
 912                 char lastCar = line.charAt(line.length() - 1);
 913                 switch (lastCar) {
 914                 case '.': // points
 915                 case '?':
 916                 case '!':
 917
 918                 case '\'': // quotes
 919                 case '‘':
 920                 case '’':
 921
 922                 case '"': // double quotes
 923                 case '”':
 924                 case '“':
 925                 case '»':
 926                 case '«':
 927                         return true;
 928                 default:
 929                         return false;
 930                 }
 931         }
 932
 933         /**
 934          * Check if this line represent an item in a list or description (i.e.,
 935          * check that the first non-space char is "-").
 936          *
 937          * @param line
 938          *            the line to check
 939          *
 940          * @return TRUE if it is
 941          */
 942         static private boolean isItemLine(String line) {
 943                 String spacing = getItemSpacing(line);
 944                 return spacing != null && !spacing.isEmpty()
 945                                 && line.charAt(spacing.length()) == '-';
 946         }
 947
 948         /**
 949          * Return all the spaces that start this line (or Empty if none).
 950          *
 951          * @param line
 952          *            the line to get the starting spaces from
 953          *
 954          * @return the left spacing
 955          */
 956         static private String getItemSpacing(String line) {
 957                 int i;
 958                 for (i = 0; i < line.length(); i++) {
 959                         if (line.charAt(i) != ' ') {
 960                                 return line.substring(0, i);
 961                         }
 962                 }
 963
 964                 return "";
 965         }
 966
 967         /**
 968          * This line is an horizontal spacer line.
 969          *
 970          * @param line
 971          *            the line to test
 972          *
 973          * @return TRUE if it is
 974          */
 975         static private boolean isHrLine(CharSequence line) {
 976                 int count = 0;
 977                 if (line != null) {
 978                         for (int i = 0; i < line.length(); i++) {
 979                                 char car = line.charAt(i);
 980                                 if (car == ' ' || car == '\t' || car == '*' || car == '-'
 981                                                 || car == '_' || car == '~' || car == '=' || car == '/'
 982                                                 || car == '\\') {
 983                                         count++;
 984                                 } else {
 985                                         return false;
 986                                 }
 987                         }
 988                 }
 989
 990                 return count > 2;
 991         }
 992
 993         // Deprecated functions, please do not use //
 994
 995         /**
 996          * @deprecated please use {@link StringUtils#zip64(byte[])} or
 997          *             {@link StringUtils#base64(byte[])} instead.
 998          *
 999          * @param data
1000          *            the data to encode
1001          * @param zip
1002          *            TRUE to zip it before Base64 encoding it, FALSE for Base64
1003          *            encoding only
1004          *
1005          * @return the encoded data
1006          *
1007          * @throws IOException
1008          *             in case of I/O error
1009          */
1010         @Deprecated
1011         public static String base64(String data, boolean zip) throws IOException {
1012                 return base64(getBytes(data), zip);
1013         }
1014
1015         /**
1016          * @deprecated please use {@link StringUtils#zip64(String)} or
1017          *             {@link StringUtils#base64(String)} instead.
1018          *
1019          * @param data
1020          *            the data to encode
1021          * @param zip
1022          *            TRUE to zip it before Base64 encoding it, FALSE for Base64
1023          *            encoding only
1024          *
1025          * @return the encoded data
1026          *
1027          * @throws IOException
1028          *             in case of I/O error
1029          */
1030         @Deprecated
1031         public static String base64(byte[] data, boolean zip) throws IOException {
1032                 if (zip) {
1033                         return zip64(data);
1034                 }
1035
1036                 Base64InputStream b64 = new Base64InputStream(new ByteArrayInputStream(
1037                                 data), true);
1038                 try {
1039                         return IOUtils.readSmallStream(b64);
1040                 } finally {
1041                         b64.close();
1042                 }
1043         }
1044
1045         /**
1046          * @deprecated please use {@link Base64OutputStream} and
1047          *             {@link GZIPOutputStream} instead.
1048          *
1049          * @param breakLines
1050          *            NOT USED ANYMORE, it is always considered FALSE now
1051          */
1052         @Deprecated
1053         public static OutputStream base64(OutputStream data, boolean zip,
1054                         boolean breakLines) throws IOException {
1055                 OutputStream out = new Base64OutputStream(data);
1056                 if (zip) {
1057                         out = new java.util.zip.GZIPOutputStream(out);
1058                 }
1059
1060                 return out;
1061         }
1062
1063         /**
1064          * Unconvert the given data from Base64 format back to a raw array of bytes.
1065          * <p>
1066          * Will automatically detect zipped data and also uncompress it before
1067          * returning, unless ZIP is false.
1068          *
1069          * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped)
1070          *
1071          * @param data
1072          *            the data to unconvert
1073          * @param zip
1074          *            TRUE to also uncompress the data from a GZIP format
1075          *            automatically; if set to FALSE, zipped data can be returned
1076          *
1077          * @return the raw data represented by the given Base64 {@link String},
1078          *         optionally compressed with GZIP
1079          *
1080          * @throws IOException
1081          *             in case of I/O errors
1082          */
1083         @Deprecated
1084         public static byte[] unbase64(String data, boolean zip) throws IOException {
1085                 byte[] buffer = unbase64(data);
1086                 if (!zip) {
1087                         return buffer;
1088                 }
1089
1090                 try {
1091                         GZIPInputStream zipped = new GZIPInputStream(
1092                                         new ByteArrayInputStream(buffer));
1093                         try {
1094                                 ByteArrayOutputStream out = new ByteArrayOutputStream();
1095                                 try {
1096                                         IOUtils.write(zipped, out);
1097                                         return out.toByteArray();
1098                                 } finally {
1099                                         out.close();
1100                                 }
1101                         } finally {
1102                                 zipped.close();
1103                         }
1104                 } catch (Exception e) {
1105                         return buffer;
1106                 }
1107         }
1108
1109         /**
1110          * Unconvert the given data from Base64 format back to a raw array of bytes.
1111          * <p>
1112          * Will automatically detect zipped data and also uncompress it before
1113          * returning, unless ZIP is false.
1114          *
1115          * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped)
1116          *
1117          * @param data
1118          *            the data to unconvert
1119          * @param zip
1120          *            TRUE to also uncompress the data from a GZIP format
1121          *            automatically; if set to FALSE, zipped data can be returned
1122          *
1123          * @return the raw data represented by the given Base64 {@link String},
1124          *         optionally compressed with GZIP
1125          *
1126          * @throws IOException
1127          *             in case of I/O errors
1128          */
1129         @Deprecated
1130         public static InputStream unbase64(InputStream data, boolean zip)
1131                         throws IOException {
1132                 return new ByteArrayInputStream(unbase64(IOUtils.readSmallStream(data),
1133                                 zip));
1134         }
1135
1136         /**
1137          * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped)
1138          */
1139         @Deprecated
1140         public static byte[] unbase64(byte[] data, int offset, int count,
1141                         boolean zip) throws IOException {
1142                 byte[] dataPart = Arrays.copyOfRange(data, offset, offset + count);
1143                 return unbase64(new String(dataPart, "UTF-8"), zip);
1144         }
1145
1146         /**
1147          * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped)
1148          */
1149         @Deprecated
1150         public static String unbase64s(String data, boolean zip) throws IOException {
1151                 return new String(unbase64(data, zip), "UTF-8");
1152         }
1153
1154         /**
1155          * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped)
1156          */
1157         @Deprecated
1158         public static String unbase64s(byte[] data, int offset, int count,
1159                         boolean zip) throws IOException {
1160                 return new String(unbase64(data, offset, count, zip), "UTF-8");
1161         }
1162 }