src/be/nikiroo/utils/StringUtils.java

   1 package be.nikiroo.utils;
   2
   3 import java.io.IOException;
   4 import java.io.InputStream;
   5 import java.io.OutputStream;
   6 import java.io.UnsupportedEncodingException;
   7 import java.security.MessageDigest;
   8 import java.security.NoSuchAlgorithmException;
   9 import java.text.Normalizer;
  10 import java.text.Normalizer.Form;
  11 import java.text.ParseException;
  12 import java.text.SimpleDateFormat;
  13 import java.util.AbstractMap;
  14 import java.util.ArrayList;
  15 import java.util.Date;
  16 import java.util.List;
  17 import java.util.Map.Entry;
  18 import java.util.regex.Pattern;
  19
  20 import org.unbescape.html.HtmlEscape;
  21 import org.unbescape.html.HtmlEscapeLevel;
  22 import org.unbescape.html.HtmlEscapeType;
  23
  24 import be.nikiroo.utils.streams.Base64InputStream;
  25 import be.nikiroo.utils.streams.Base64OutputStream;
  26
  27 /**
  28  * This class offer some utilities based around {@link String}s.
  29  *
  30  * @author niki
  31  */
  32 public class StringUtils {
  33         /**
  34          * This enum type will decide the alignment of a {@link String} when padding
  35          * or justification is applied (if there is enough horizontal space for it
  36          * to be aligned).
  37          */
  38         public enum Alignment {
  39                 /** Aligned at left. */
  40                 LEFT,
  41                 /** Centered. */
  42                 CENTER,
  43                 /** Aligned at right. */
  44                 RIGHT,
  45                 /** Full justified (to both left and right). */
  46                 JUSTIFY,
  47
  48                 // Old Deprecated values:
  49
  50                 /** DEPRECATED: please use LEFT. */
  51                 @Deprecated
  52                 Beginning,
  53                 /** DEPRECATED: please use CENTER. */
  54                 @Deprecated
  55                 Center,
  56                 /** DEPRECATED: please use RIGHT. */
  57                 @Deprecated
  58                 End;
  59
  60                 /**
  61                  * Return the non-deprecated version of this enum if needed (or return
  62                  * self if not).
  63                  *
  64                  * @return the non-deprecated value
  65                  */
  66                 Alignment undeprecate() {
  67                         if (this == Beginning)
  68                                 return LEFT;
  69                         if (this == Center)
  70                                 return CENTER;
  71                         if (this == End)
  72                                 return RIGHT;
  73                         return this;
  74                 }
  75         }
  76
  77         static private Pattern marks = getMarks();
  78
  79         /**
  80          * Fix the size of the given {@link String} either with space-padding or by
  81          * shortening it.
  82          *
  83          * @param text
  84          *            the {@link String} to fix
  85          * @param width
  86          *            the size of the resulting {@link String} or -1 for a noop
  87          *
  88          * @return the resulting {@link String} of size <i>size</i>
  89          */
  90         static public String padString(String text, int width) {
  91                 return padString(text, width, true, null);
  92         }
  93
  94         /**
  95          * Fix the size of the given {@link String} either with space-padding or by
  96          * optionally shortening it.
  97          *
  98          * @param text
  99          *            the {@link String} to fix
 100          * @param width
 101          *            the size of the resulting {@link String} if the text fits or
 102          *            if cut is TRUE or -1 for a noop
 103          * @param cut
 104          *            cut the {@link String} shorter if needed
 105          * @param align
 106          *            align the {@link String} in this position if we have enough
 107          *            space (default is Alignment.Beginning)
 108          *
 109          * @return the resulting {@link String} of size <i>size</i> minimum
 110          */
 111         static public String padString(String text, int width, boolean cut,
 112                         Alignment align) {
 113
 114                 if (align == null) {
 115                         align = Alignment.LEFT;
 116                 }
 117
 118                 align = align.undeprecate();
 119
 120                 if (width >= 0) {
 121                         if (text == null)
 122                                 text = "";
 123
 124                         int diff = width - text.length();
 125
 126                         if (diff < 0) {
 127                                 if (cut)
 128                                         text = text.substring(0, width);
 129                         } else if (diff > 0) {
 130                                 if (diff < 2 && align != Alignment.RIGHT)
 131                                         align = Alignment.LEFT;
 132
 133                                 switch (align) {
 134                                 case RIGHT:
 135                                         text = new String(new char[diff]).replace('\0', ' ') + text;
 136                                         break;
 137                                 case CENTER:
 138                                         int pad1 = (diff) / 2;
 139                                         int pad2 = (diff + 1) / 2;
 140                                         text = new String(new char[pad1]).replace('\0', ' ') + text
 141                                                         + new String(new char[pad2]).replace('\0', ' ');
 142                                         break;
 143                                 case LEFT:
 144                                 default:
 145                                         text = text + new String(new char[diff]).replace('\0', ' ');
 146                                         break;
 147                                 }
 148                         }
 149                 }
 150
 151                 return text;
 152         }
 153
 154         /**
 155          * Justify a text into width-sized (at the maximum) lines.
 156          *
 157          * @param text
 158          *            the {@link String} to justify
 159          * @param width
 160          *            the maximum size of the resulting lines
 161          *
 162          * @return a list of justified text lines
 163          */
 164         static public List<String> justifyText(String text, int width) {
 165                 return justifyText(text, width, null);
 166         }
 167
 168         /**
 169          * Justify a text into width-sized (at the maximum) lines.
 170          *
 171          * @param text
 172          *            the {@link String} to justify
 173          * @param width
 174          *            the maximum size of the resulting lines
 175          * @param align
 176          *            align the lines in this position (default is
 177          *            Alignment.Beginning)
 178          *
 179          * @return a list of justified text lines
 180          */
 181         static public List<String> justifyText(String text, int width,
 182                         Alignment align) {
 183                 if (align == null) {
 184                         align = Alignment.LEFT;
 185                 }
 186
 187                 align = align.undeprecate();
 188
 189                 switch (align) {
 190                 case CENTER:
 191                         return StringJustifier.center(text, width);
 192                 case RIGHT:
 193                         return StringJustifier.right(text, width);
 194                 case JUSTIFY:
 195                         return StringJustifier.full(text, width);
 196                 case LEFT:
 197                 default:
 198                         return StringJustifier.left(text, width);
 199                 }
 200         }
 201
 202         /**
 203          * Justify a text into width-sized (at the maximum) lines.
 204          *
 205          * @param text
 206          *            the {@link String} to justify
 207          * @param width
 208          *            the maximum size of the resulting lines
 209          *
 210          * @return a list of justified text lines
 211          */
 212         static public List<String> justifyText(List<String> text, int width) {
 213                 return justifyText(text, width, null);
 214         }
 215
 216         /**
 217          * Justify a text into width-sized (at the maximum) lines.
 218          *
 219          * @param text
 220          *            the {@link String} to justify
 221          * @param width
 222          *            the maximum size of the resulting lines
 223          * @param align
 224          *            align the lines in this position (default is
 225          *            Alignment.Beginning)
 226          *
 227          * @return a list of justified text lines
 228          */
 229         static public List<String> justifyText(List<String> text, int width,
 230                         Alignment align) {
 231                 List<String> result = new ArrayList<String>();
 232
 233                 // Content <-> Bullet spacing (null = no spacing)
 234                 List<Entry<String, String>> lines = new ArrayList<Entry<String, String>>();
 235                 StringBuilder previous = null;
 236                 StringBuilder tmp = new StringBuilder();
 237                 String previousItemBulletSpacing = null;
 238                 String itemBulletSpacing = null;
 239                 for (String inputLine : text) {
 240                         boolean previousLineComplete = true;
 241
 242                         String current = inputLine.replace("\t", "    ");
 243                         itemBulletSpacing = getItemSpacing(current);
 244                         boolean bullet = isItemLine(current);
 245                         if ((previousItemBulletSpacing == null || itemBulletSpacing
 246                                         .length() <= previousItemBulletSpacing.length()) && !bullet) {
 247                                 itemBulletSpacing = null;
 248                         }
 249
 250                         if (itemBulletSpacing != null) {
 251                                 current = current.trim();
 252                                 if (!current.isEmpty() && bullet) {
 253                                         current = current.substring(1);
 254                                 }
 255                                 current = current.trim();
 256                                 previousLineComplete = bullet;
 257                         } else {
 258                                 tmp.setLength(0);
 259                                 for (String word : current.split(" ")) {
 260                                         if (word.isEmpty()) {
 261                                                 continue;
 262                                         }
 263
 264                                         if (tmp.length() > 0) {
 265                                                 tmp.append(' ');
 266                                         }
 267                                         tmp.append(word.trim());
 268                                 }
 269                                 current = tmp.toString();
 270
 271                                 previousLineComplete = current.isEmpty()
 272                                                 || previousItemBulletSpacing != null
 273                                                 || (previous != null && isFullLine(previous))
 274                                                 || isHrLine(current) || isHrLine(previous);
 275                         }
 276
 277                         if (previous == null) {
 278                                 previous = new StringBuilder();
 279                         } else {
 280                                 if (previousLineComplete) {
 281                                         lines.add(new AbstractMap.SimpleEntry<String, String>(
 282                                                         previous.toString(), previousItemBulletSpacing));
 283                                         previous.setLength(0);
 284                                         previousItemBulletSpacing = itemBulletSpacing;
 285                                 } else {
 286                                         previous.append(' ');
 287                                 }
 288                         }
 289
 290                         previous.append(current);
 291
 292                 }
 293
 294                 if (previous != null) {
 295                         lines.add(new AbstractMap.SimpleEntry<String, String>(previous
 296                                         .toString(), previousItemBulletSpacing));
 297                 }
 298
 299                 for (Entry<String, String> line : lines) {
 300                         String content = line.getKey();
 301                         String spacing = line.getValue();
 302
 303                         String bullet = "- ";
 304                         if (spacing == null) {
 305                                 bullet = "";
 306                                 spacing = "";
 307                         }
 308
 309                         if (spacing.length() > width + 3) {
 310                                 spacing = "";
 311                         }
 312
 313                         for (String subline : StringUtils.justifyText(content, width
 314                                         - (spacing.length() + bullet.length()), align)) {
 315                                 result.add(spacing + bullet + subline);
 316                                 if (!bullet.isEmpty()) {
 317                                         bullet = "  ";
 318                                 }
 319                         }
 320                 }
 321
 322                 return result;
 323         }
 324
 325         /**
 326          * Sanitise the given input to make it more Terminal-friendly by removing
 327          * combining characters.
 328          *
 329          * @param input
 330          *            the input to sanitise
 331          * @param allowUnicode
 332          *            allow Unicode or only allow ASCII Latin characters
 333          *
 334          * @return the sanitised {@link String}
 335          */
 336         static public String sanitize(String input, boolean allowUnicode) {
 337                 return sanitize(input, allowUnicode, !allowUnicode);
 338         }
 339
 340         /**
 341          * Sanitise the given input to make it more Terminal-friendly by removing
 342          * combining characters.
 343          *
 344          * @param input
 345          *            the input to sanitise
 346          * @param allowUnicode
 347          *            allow Unicode or only allow ASCII Latin characters
 348          * @param removeAllAccents
 349          *            TRUE to replace all accentuated characters by their non
 350          *            accentuated counter-parts
 351          *
 352          * @return the sanitised {@link String}
 353          */
 354         static public String sanitize(String input, boolean allowUnicode,
 355                         boolean removeAllAccents) {
 356
 357                 if (removeAllAccents) {
 358                         input = Normalizer.normalize(input, Form.NFKD);
 359                         if (marks != null) {
 360                                 input = marks.matcher(input).replaceAll("");
 361                         }
 362                 }
 363
 364                 input = Normalizer.normalize(input, Form.NFKC);
 365
 366                 if (!allowUnicode) {
 367                         StringBuilder builder = new StringBuilder();
 368                         for (int index = 0; index < input.length(); index++) {
 369                                 char car = input.charAt(index);
 370                                 // displayable chars in ASCII are in the range 32<->255,
 371                                 // except DEL (127)
 372                                 if (car >= 32 && car <= 255 && car != 127) {
 373                                         builder.append(car);
 374                                 }
 375                         }
 376                         input = builder.toString();
 377                 }
 378
 379                 return input;
 380         }
 381
 382         /**
 383          * Convert between the time in milliseconds to a {@link String} in a "fixed"
 384          * way (to exchange data over the wire, for instance).
 385          * <p>
 386          * Precise to the second.
 387          *
 388          * @param time
 389          *            the specified number of milliseconds since the standard base
 390          *            time known as "the epoch", namely January 1, 1970, 00:00:00
 391          *            GMT
 392          *
 393          * @return the time as a {@link String}
 394          */
 395         static public String fromTime(long time) {
 396                 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
 397                 return sdf.format(new Date(time));
 398         }
 399
 400         /**
 401          * Convert between the time as a {@link String} to milliseconds in a "fixed"
 402          * way (to exchange data over the wire, for instance).
 403          * <p>
 404          * Precise to the second.
 405          *
 406          * @param displayTime
 407          *            the time as a {@link String}
 408          *
 409          * @return the number of milliseconds since the standard base time known as
 410          *         "the epoch", namely January 1, 1970, 00:00:00 GMT, or -1 in case
 411          *         of error
 412          *
 413          * @throws ParseException
 414          *             in case of parse error
 415          */
 416         static public long toTime(String displayTime) throws ParseException {
 417                 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
 418                 return sdf.parse(displayTime).getTime();
 419         }
 420
 421         /**
 422          * Return a hash of the given {@link String}.
 423          *
 424          * @param input
 425          *            the input data
 426          *
 427          * @return the hash
 428          */
 429         static public String getMd5Hash(String input) {
 430                 try {
 431                         MessageDigest md = MessageDigest.getInstance("MD5");
 432                         md.update(input.getBytes("UTF-8"));
 433                         byte byteData[] = md.digest();
 434
 435                         StringBuffer hexString = new StringBuffer();
 436                         for (int i = 0; i < byteData.length; i++) {
 437                                 String hex = Integer.toHexString(0xff & byteData[i]);
 438                                 if (hex.length() == 1)
 439                                         hexString.append('0');
 440                                 hexString.append(hex);
 441                         }
 442
 443                         return hexString.toString();
 444                 } catch (NoSuchAlgorithmException e) {
 445                         return input;
 446                 } catch (UnsupportedEncodingException e) {
 447                         return input;
 448                 }
 449         }
 450
 451         /**
 452          * Remove the HTML content from the given input, and un-html-ize the rest.
 453          *
 454          * @param html
 455          *            the HTML-encoded content
 456          *
 457          * @return the HTML-free equivalent content
 458          */
 459         public static String unhtml(String html) {
 460                 StringBuilder builder = new StringBuilder();
 461
 462                 int inTag = 0;
 463                 for (char car : html.toCharArray()) {
 464                         if (car == '<') {
 465                                 inTag++;
 466                         } else if (car == '>') {
 467                                 inTag--;
 468                         } else if (inTag <= 0) {
 469                                 builder.append(car);
 470                         }
 471                 }
 472
 473                 char nbsp = ' '; // non-breakable space (a special char)
 474                 char space = ' ';
 475                 return HtmlEscape.unescapeHtml(builder.toString()).replace(nbsp, space);
 476         }
 477
 478         /**
 479          * Escape the given {@link String} so it can be used in XML, as content.
 480          *
 481          * @param input
 482          *            the input {@link String}
 483          *
 484          * @return the escaped {@link String}
 485          */
 486         public static String xmlEscape(String input) {
 487                 if (input == null) {
 488                         return "";
 489                 }
 490
 491                 return HtmlEscape.escapeHtml(input,
 492                                 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
 493                                 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
 494         }
 495
 496         /**
 497          * Escape the given {@link String} so it can be used in XML, as text content
 498          * inside double-quotes.
 499          *
 500          * @param input
 501          *            the input {@link String}
 502          *
 503          * @return the escaped {@link String}
 504          */
 505         public static String xmlEscapeQuote(String input) {
 506                 if (input == null) {
 507                         return "";
 508                 }
 509
 510                 return HtmlEscape.escapeHtml(input,
 511                                 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
 512                                 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
 513         }
 514
 515         /**
 516          * Zip the data and then encode it into Base64.
 517          *
 518          * @deprecated use {@link StringUtils#base64(byte[], boolean)} with the
 519          *             correct parameter instead
 520          *
 521          * @param data
 522          *            the data
 523          *
 524          * @return the Base64 zipped version
 525          */
 526         @Deprecated
 527         public static String zip64(String data) {
 528                 try {
 529                         return Base64.encodeBytes(data.getBytes("UTF-8"), Base64.GZIP);
 530                 } catch (IOException e) {
 531                         e.printStackTrace();
 532                         return null;
 533                 }
 534         }
 535
 536         /**
 537          * Unconvert from Base64 then unzip the content.
 538          *
 539          * @deprecated use {@link StringUtils#unbase64s(String, boolean)} with the
 540          *             correct parameter instead
 541          *
 542          * @param data
 543          *            the data in Base64 format
 544          *
 545          * @return the raw data
 546          *
 547          * @throws IOException
 548          *             in case of I/O error
 549          */
 550         @Deprecated
 551         public static String unzip64(String data) throws IOException {
 552                 return new String(Base64.decode(data, Base64.GZIP), "UTF-8");
 553         }
 554
 555         /**
 556          * Convert the given data to Base64 format.
 557          *
 558          * @param data
 559          *            the data to convert
 560          * @param zip
 561          *            TRUE to also compress the data in GZIP format; remember that
 562          *            compressed and not-compressed content are different; you need
 563          *            to know which is which when decoding
 564          *
 565          * @return the Base64 {@link String} representation of the data
 566          *
 567          * @throws IOException
 568          *             in case of I/O errors
 569          */
 570         public static String base64(String data, boolean zip) throws IOException {
 571                 return base64(data.getBytes("UTF-8"), zip);
 572         }
 573
 574         /**
 575          * Convert the given data to Base64 format.
 576          *
 577          * @param data
 578          *            the data to convert
 579          * @param zip
 580          *            TRUE to also compress the data in GZIP format; remember that
 581          *            compressed and not-compressed content are different; you need
 582          *            to know which is which when decoding
 583          *
 584          * @return the Base64 {@link String} representation of the data
 585          *
 586          * @throws IOException
 587          *             in case of I/O errors
 588          */
 589         public static String base64(byte[] data, boolean zip) throws IOException {
 590                 return Base64.encodeBytes(data, zip ? Base64.GZIP : Base64.NO_OPTIONS);
 591         }
 592
 593         /**
 594          * Convert the given data to Base64 format.
 595          *
 596          * @param data
 597          *            the data to convert
 598          * @param zip
 599          *            TRUE to also uncompress the data from a GZIP format; take care
 600          *            about this flag, as it could easily cause errors in the
 601          *            returned content or an {@link IOException}
 602          * @param breakLines
 603          *            TRUE to break lines on every 76th character
 604          *
 605          * @return the Base64 {@link String} representation of the data
 606          *
 607          * @throws IOException
 608          *             in case of I/O errors
 609          */
 610         @Deprecated
 611         public static OutputStream base64(OutputStream data, boolean zip,
 612                         boolean breakLines) throws IOException {
 613                 OutputStream out = new Base64OutputStream(data, true);
 614
 615                 if (zip) {
 616                         out = new java.util.zip.GZIPOutputStream(out);
 617                 }
 618
 619                 return out;
 620         }
 621
 622         /**
 623          * Convert the given data to Base64 format.
 624          *
 625          * @param data
 626          *            the data to convert
 627          * @param zip
 628          *            TRUE to also uncompress the data from a GZIP format; take care
 629          *            about this flag, as it could easily cause errors in the
 630          *            returned content or an {@link IOException}
 631          * @param breakLines
 632          *            TRUE to break lines on every 76th character
 633          *
 634          * @return the Base64 {@link String} representation of the data
 635          *
 636          * @throws IOException
 637          *             in case of I/O errors
 638          */
 639         @Deprecated
 640         public static InputStream base64(InputStream data, boolean zip,
 641                         boolean breakLines) throws IOException {
 642                 if (zip) {
 643                         data = new java.util.zip.GZIPInputStream(data);
 644                 }
 645
 646                 return new Base64InputStream(data, true);
 647         }
 648
 649         /**
 650          * Unconvert the given data from Base64 format back to a raw array of bytes.
 651          * <p>
 652          * Will automatically detect zipped data and also uncompress it before
 653          * returning, unless ZIP is false.
 654          *
 655          * @param data
 656          *            the data to unconvert
 657          * @param zip
 658          *            TRUE to also uncompress the data from a GZIP format
 659          *            automatically; if set to FALSE, zipped data can be returned
 660          *
 661          * @return the raw data represented by the given Base64 {@link String},
 662          *         optionally compressed with GZIP
 663          *
 664          * @throws IOException
 665          *             in case of I/O errors
 666          */
 667         public static byte[] unbase64(String data, boolean zip) throws IOException {
 668                 return Base64
 669                                 .decode(data, zip ? Base64.NO_OPTIONS : Base64.DONT_GUNZIP);
 670         }
 671
 672         /**
 673          * Unconvert the given data from Base64 format back to a raw array of bytes.
 674          *
 675          * @param data
 676          *            the data to unconvert
 677          * @param zip
 678          *            TRUE to also uncompress the data from a GZIP format; take care
 679          *            about this flag, as it could easily cause errors in the
 680          *            returned content or an {@link IOException}
 681          *
 682          * @return the raw data represented by the given Base64 {@link String}
 683          *
 684          * @throws IOException
 685          *             in case of I/O errors
 686          */
 687         @Deprecated
 688         public static OutputStream unbase64(OutputStream data, boolean zip)
 689                         throws IOException {
 690                 OutputStream out = new Base64OutputStream(data, false);
 691
 692
 693                 if (zip) {
 694                         out = new java.util.zip.GZIPOutputStream(out);
 695                 }
 696
 697                 return out;
 698         }
 699
 700         /**
 701          * Unconvert the given data from Base64 format back to a raw array of bytes.
 702          *
 703          * @param data
 704          *            the data to unconvert
 705          * @param zip
 706          *            TRUE to also uncompress the data from a GZIP format; take care
 707          *            about this flag, as it could easily cause errors in the
 708          *            returned content or an {@link IOException}
 709          *
 710          * @return the raw data represented by the given Base64 {@link String}
 711          *
 712          * @throws IOException
 713          *             in case of I/O errors
 714          */
 715         @Deprecated
 716         public static InputStream unbase64(InputStream data, boolean zip)
 717                         throws IOException {
 718                 if (zip) {
 719                         data = new java.util.zip.GZIPInputStream(data);
 720                 }
 721
 722                 return new Base64InputStream(data, false);
 723         }
 724
 725         /**
 726          * Unconvert the given data from Base64 format back to a raw array of bytes.
 727          * <p>
 728          * Will automatically detect zipped data and also uncompress it before
 729          * returning, unless ZIP is false.
 730          *
 731          * @param data
 732          *            the data to unconvert
 733          * @param offset
 734          *            the offset at which to start taking the data (do not take the
 735          *            data before it into account)
 736          * @param count
 737          *            the number of bytes to take into account (do not process after
 738          *            this number of bytes has been processed)
 739          * @param zip
 740          *            TRUE to also uncompress the data from a GZIP format
 741          *            automatically; if set to FALSE, zipped data can be returned
 742          *
 743          * @return the raw data represented by the given Base64 {@link String}
 744          *
 745          * @throws IOException
 746          *             in case of I/O errors
 747          */
 748         public static byte[] unbase64(byte[] data, int offset, int count,
 749                         boolean zip) throws IOException {
 750                 return Base64.niki_decode(data, offset, count, zip ? Base64.NO_OPTIONS
 751                                 : Base64.DONT_GUNZIP);
 752         }
 753
 754         /**
 755          * Unonvert the given data from Base64 format back to a {@link String}.
 756          * <p>
 757          * Will automatically detect zipped data and also uncompress it before
 758          * returning, unless ZIP is false.
 759          *
 760          * @param data
 761          *            the data to unconvert
 762          * @param zip
 763          *            TRUE to also uncompress the data from a GZIP format
 764          *            automatically; if set to FALSE, zipped data can be returned
 765          *
 766          * @return the {@link String} represented by the given Base64 {@link String}
 767          *         , optionally compressed with GZIP
 768          *
 769          * @throws IOException
 770          *             in case of I/O errors
 771          */
 772         public static String unbase64s(String data, boolean zip) throws IOException {
 773                 return new String(unbase64(data, zip), "UTF-8");
 774         }
 775
 776         /**
 777          * Unconvert the given data from Base64 format back into a {@link String}.
 778          *
 779          * @param data
 780          *            the data to unconvert
 781          * @param offset
 782          *            the offset at which to start taking the data (do not take the
 783          *            data before it into account)
 784          * @param count
 785          *            the number of bytes to take into account (do not process after
 786          *            this number of bytes has been processed)
 787          * @param zip
 788          *            TRUE to also uncompress the data from a GZIP format; take care
 789          *            about this flag, as it could easily cause errors in the
 790          *            returned content or an {@link IOException}
 791          *
 792          * @return the {@link String} represented by the given Base64 {@link String}
 793          *         , optionally compressed with GZIP
 794          *
 795          * @throws IOException
 796          *             in case of I/O errors
 797          */
 798         public static String unbase64s(byte[] data, int offset, int count,
 799                         boolean zip) throws IOException {
 800                 return new String(unbase64(data, offset, count, zip), "UTF-8");
 801         }
 802
 803         /**
 804          * Return a display {@link String} for the given value, which can be
 805          * suffixed with "k" or "M" depending upon the number, if it is big enough.
 806          * <p>
 807          * <p>
 808          * Examples:
 809          * <ul>
 810          * <li><tt>8 765</tt> becomes "8k"</li>
 811          * <li><tt>998 765</tt> becomes "998k"</li>
 812          * <li><tt>12 987 364</tt> becomes "12M"</li>
 813          * <li><tt>5 534 333 221</tt> becomes "5G"</li>
 814          * </ul>
 815          *
 816          * @param value
 817          *            the value to convert
 818          *
 819          * @return the display value
 820          */
 821         public static String formatNumber(long value) {
 822                 return formatNumber(value, 0);
 823         }
 824
 825         /**
 826          * Return a display {@link String} for the given value, which can be
 827          * suffixed with "k" or "M" depending upon the number, if it is big enough.
 828          * <p>
 829          * Examples (assuming decimalPositions = 1):
 830          * <ul>
 831          * <li><tt>8 765</tt> becomes "8.7k"</li>
 832          * <li><tt>998 765</tt> becomes "998.7k"</li>
 833          * <li><tt>12 987 364</tt> becomes "12.9M"</li>
 834          * <li><tt>5 534 333 221</tt> becomes "5.5G"</li>
 835          * </ul>
 836          *
 837          * @param value
 838          *            the value to convert
 839          * @param decimalPositions
 840          *            the number of decimal positions to keep
 841          *
 842          * @return the display value
 843          */
 844         public static String formatNumber(long value, int decimalPositions) {
 845                 long userValue = value;
 846                 String suffix = "";
 847                 long mult = 1;
 848
 849                 if (value >= 1000000000l) {
 850                         mult = 1000000000l;
 851                         userValue = value / 1000000000l;
 852                         suffix = " G";
 853                 } else if (value >= 1000000l) {
 854                         mult = 1000000l;
 855                         userValue = value / 1000000l;
 856                         suffix = " M";
 857                 } else if (value >= 1000l) {
 858                         mult = 1000l;
 859                         userValue = value / 1000l;
 860                         suffix = " k";
 861                 }
 862
 863                 String deci = "";
 864                 if (decimalPositions > 0) {
 865                         deci = Long.toString(value % mult);
 866                         int size = Long.toString(mult).length() - 1;
 867                         while (deci.length() < size) {
 868                                 deci = "0" + deci;
 869                         }
 870
 871                         deci = deci.substring(0, Math.min(decimalPositions, deci.length()));
 872                         while (deci.length() < decimalPositions) {
 873                                 deci += "0";
 874                         }
 875
 876                         deci = "." + deci;
 877                 }
 878
 879                 return Long.toString(userValue) + deci + suffix;
 880         }
 881
 882         /**
 883          * The reverse operation to {@link StringUtils#formatNumber(long)}: it will
 884          * read a "display" number that can contain a "M" or "k" suffix and return
 885          * the full value.
 886          * <p>
 887          * Of course, the conversion to and from display form is lossy (example:
 888          * <tt>6870</tt> to "6.5k" to <tt>6500</tt>).
 889          *
 890          * @param value
 891          *            the value in display form with possible "M" and "k" suffixes,
 892          *            can be NULL
 893          *
 894          * @return the value as a number, or 0 if not possible to convert
 895          */
 896         public static long toNumber(String value) {
 897                 return toNumber(value, 0l);
 898         }
 899
 900         /**
 901          * The reverse operation to {@link StringUtils#formatNumber(long)}: it will
 902          * read a "display" number that can contain a "M" or "k" suffix and return
 903          * the full value.
 904          * <p>
 905          * Of course, the conversion to and from display form is lossy (example:
 906          * <tt>6870</tt> to "6.5k" to <tt>6500</tt>).
 907          *
 908          * @param value
 909          *            the value in display form with possible "M" and "k" suffixes,
 910          *            can be NULL
 911          * @param def
 912          *            the default value if it is not possible to convert the given
 913          *            value to a number
 914          *
 915          * @return the value as a number, or 0 if not possible to convert
 916          */
 917         public static long toNumber(String value, long def) {
 918                 long count = def;
 919                 if (value != null) {
 920                         value = value.trim().toLowerCase();
 921                         try {
 922                                 long mult = 1;
 923                                 if (value.endsWith("g")) {
 924                                         value = value.substring(0, value.length() - 1).trim();
 925                                         mult = 1000000000;
 926                                 } else if (value.endsWith("m")) {
 927                                         value = value.substring(0, value.length() - 1).trim();
 928                                         mult = 1000000;
 929                                 } else if (value.endsWith("k")) {
 930                                         value = value.substring(0, value.length() - 1).trim();
 931                                         mult = 1000;
 932                                 }
 933
 934                                 long deci = 0;
 935                                 if (value.contains(".")) {
 936                                         String[] tab = value.split("\\.");
 937                                         if (tab.length != 2) {
 938                                                 throw new NumberFormatException(value);
 939                                         }
 940                                         double decimal = Double.parseDouble("0."
 941                                                         + tab[tab.length - 1]);
 942                                         deci = ((long) (mult * decimal));
 943                                         value = tab[0];
 944                                 }
 945                                 count = mult * Long.parseLong(value) + deci;
 946                         } catch (Exception e) {
 947                         }
 948                 }
 949
 950                 return count;
 951         }
 952
 953         /**
 954          * The "remove accents" pattern.
 955          *
 956          * @return the pattern, or NULL if a problem happens
 957          */
 958         private static Pattern getMarks() {
 959                 try {
 960                         return Pattern
 961                                         .compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
 962                 } catch (Exception e) {
 963                         // Can fail on Android...
 964                         return null;
 965                 }
 966         }
 967
 968         //
 969         // justify List<String> related:
 970         //
 971
 972         /**
 973          * Check if this line ends as a complete line (ends with a "." or similar).
 974          * <p>
 975          * Note that we consider an empty line as full, and a line ending with
 976          * spaces as not complete.
 977          *
 978          * @param line
 979          *            the line to check
 980          *
 981          * @return TRUE if it does
 982          */
 983         static private boolean isFullLine(StringBuilder line) {
 984                 if (line.length() == 0) {
 985                         return true;
 986                 }
 987
 988                 char lastCar = line.charAt(line.length() - 1);
 989                 switch (lastCar) {
 990                 case '.': // points
 991                 case '?':
 992                 case '!':
 993
 994                 case '\'': // quotes
 995                 case '‘':
 996                 case '’':
 997
 998                 case '"': // double quotes
 999                 case '”':
1000                 case '“':
1001                 case '»':
1002                 case '«':
1003                         return true;
1004                 default:
1005                         return false;
1006                 }
1007         }
1008
1009         /**
1010          * Check if this line represent an item in a list or description (i.e.,
1011          * check that the first non-space char is "-").
1012          *
1013          * @param line
1014          *            the line to check
1015          *
1016          * @return TRUE if it is
1017          */
1018         static private boolean isItemLine(String line) {
1019                 String spacing = getItemSpacing(line);
1020                 return spacing != null && !spacing.isEmpty()
1021                                 && line.charAt(spacing.length()) == '-';
1022         }
1023
1024         /**
1025          * Return all the spaces that start this line (or Empty if none).
1026          *
1027          * @param line
1028          *            the line to get the starting spaces from
1029          *
1030          * @return the left spacing
1031          */
1032         static private String getItemSpacing(String line) {
1033                 int i;
1034                 for (i = 0; i < line.length(); i++) {
1035                         if (line.charAt(i) != ' ') {
1036                                 return line.substring(0, i);
1037                         }
1038                 }
1039
1040                 return "";
1041         }
1042
1043         /**
1044          * This line is an horizontal spacer line.
1045          *
1046          * @param line
1047          *            the line to test
1048          *
1049          * @return TRUE if it is
1050          */
1051         static private boolean isHrLine(CharSequence line) {
1052                 int count = 0;
1053                 if (line != null) {
1054                         for (int i = 0; i < line.length(); i++) {
1055                                 char car = line.charAt(i);
1056                                 if (car == ' ' || car == '\t' || car == '*' || car == '-'
1057                                                 || car == '_' || car == '~' || car == '=' || car == '/'
1058                                                 || car == '\\') {
1059                                         count++;
1060                                 } else {
1061                                         return false;
1062                                 }
1063                         }
1064                 }
1065
1066                 return count > 2;
1067         }
1068 }