src/be/nikiroo/utils/StringUtils.java

   1 package be.nikiroo.utils;
   2
   3 import java.io.IOException;
   4 import java.io.InputStream;
   5 import java.io.OutputStream;
   6 import java.io.UnsupportedEncodingException;
   7 import java.security.MessageDigest;
   8 import java.security.NoSuchAlgorithmException;
   9 import java.text.Normalizer;
  10 import java.text.Normalizer.Form;
  11 import java.text.ParseException;
  12 import java.text.SimpleDateFormat;
  13 import java.util.AbstractMap;
  14 import java.util.ArrayList;
  15 import java.util.Date;
  16 import java.util.List;
  17 import java.util.Map.Entry;
  18 import java.util.regex.Pattern;
  19
  20 import org.unbescape.html.HtmlEscape;
  21 import org.unbescape.html.HtmlEscapeLevel;
  22 import org.unbescape.html.HtmlEscapeType;
  23
  24 /**
  25  * This class offer some utilities based around {@link String}s.
  26  *
  27  * @author niki
  28  */
  29 public class StringUtils {
  30         /**
  31          * This enum type will decide the alignment of a {@link String} when padding
  32          * or justification is applied (if there is enough horizontal space for it
  33          * to be aligned).
  34          */
  35         public enum Alignment {
  36                 /** Aligned at left. */
  37                 LEFT,
  38                 /** Centered. */
  39                 CENTER,
  40                 /** Aligned at right. */
  41                 RIGHT,
  42                 /** Full justified (to both left and right). */
  43                 JUSTIFY,
  44
  45                 // Old Deprecated values:
  46
  47                 /** DEPRECATED: please use LEFT. */
  48                 @Deprecated
  49                 Beginning,
  50                 /** DEPRECATED: please use CENTER. */
  51                 @Deprecated
  52                 Center,
  53                 /** DEPRECATED: please use RIGHT. */
  54                 @Deprecated
  55                 End;
  56
  57                 /**
  58                  * Return the non-deprecated version of this enum if needed (or return
  59                  * self if not).
  60                  *
  61                  * @return the non-deprecated value
  62                  */
  63                 Alignment undeprecate() {
  64                         if (this == Beginning)
  65                                 return LEFT;
  66                         if (this == Center)
  67                                 return CENTER;
  68                         if (this == End)
  69                                 return RIGHT;
  70                         return this;
  71                 }
  72         }
  73
  74         static private Pattern marks = getMarks();
  75
  76         /**
  77          * Fix the size of the given {@link String} either with space-padding or by
  78          * shortening it.
  79          *
  80          * @param text
  81          *            the {@link String} to fix
  82          * @param width
  83          *            the size of the resulting {@link String} or -1 for a noop
  84          *
  85          * @return the resulting {@link String} of size <i>size</i>
  86          */
  87         static public String padString(String text, int width) {
  88                 return padString(text, width, true, null);
  89         }
  90
  91         /**
  92          * Fix the size of the given {@link String} either with space-padding or by
  93          * optionally shortening it.
  94          *
  95          * @param text
  96          *            the {@link String} to fix
  97          * @param width
  98          *            the size of the resulting {@link String} if the text fits or
  99          *            if cut is TRUE or -1 for a noop
 100          * @param cut
 101          *            cut the {@link String} shorter if needed
 102          * @param align
 103          *            align the {@link String} in this position if we have enough
 104          *            space (default is Alignment.Beginning)
 105          *
 106          * @return the resulting {@link String} of size <i>size</i> minimum
 107          */
 108         static public String padString(String text, int width, boolean cut,
 109                         Alignment align) {
 110
 111                 if (align == null) {
 112                         align = Alignment.LEFT;
 113                 }
 114
 115                 align = align.undeprecate();
 116
 117                 if (width >= 0) {
 118                         if (text == null)
 119                                 text = "";
 120
 121                         int diff = width - text.length();
 122
 123                         if (diff < 0) {
 124                                 if (cut)
 125                                         text = text.substring(0, width);
 126                         } else if (diff > 0) {
 127                                 if (diff < 2 && align != Alignment.RIGHT)
 128                                         align = Alignment.LEFT;
 129
 130                                 switch (align) {
 131                                 case RIGHT:
 132                                         text = new String(new char[diff]).replace('\0', ' ') + text;
 133                                         break;
 134                                 case CENTER:
 135                                         int pad1 = (diff) / 2;
 136                                         int pad2 = (diff + 1) / 2;
 137                                         text = new String(new char[pad1]).replace('\0', ' ') + text
 138                                                         + new String(new char[pad2]).replace('\0', ' ');
 139                                         break;
 140                                 case LEFT:
 141                                 default:
 142                                         text = text + new String(new char[diff]).replace('\0', ' ');
 143                                         break;
 144                                 }
 145                         }
 146                 }
 147
 148                 return text;
 149         }
 150
 151         /**
 152          * Justify a text into width-sized (at the maximum) lines.
 153          *
 154          * @param text
 155          *            the {@link String} to justify
 156          * @param width
 157          *            the maximum size of the resulting lines
 158          *
 159          * @return a list of justified text lines
 160          */
 161         static public List<String> justifyText(String text, int width) {
 162                 return justifyText(text, width, null);
 163         }
 164
 165         /**
 166          * Justify a text into width-sized (at the maximum) lines.
 167          *
 168          * @param text
 169          *            the {@link String} to justify
 170          * @param width
 171          *            the maximum size of the resulting lines
 172          * @param align
 173          *            align the lines in this position (default is
 174          *            Alignment.Beginning)
 175          *
 176          * @return a list of justified text lines
 177          */
 178         static public List<String> justifyText(String text, int width,
 179                         Alignment align) {
 180                 if (align == null) {
 181                         align = Alignment.LEFT;
 182                 }
 183
 184                 align = align.undeprecate();
 185
 186                 switch (align) {
 187                 case CENTER:
 188                         return StringJustifier.center(text, width);
 189                 case RIGHT:
 190                         return StringJustifier.right(text, width);
 191                 case JUSTIFY:
 192                         return StringJustifier.full(text, width);
 193                 case LEFT:
 194                 default:
 195                         return StringJustifier.left(text, width);
 196                 }
 197         }
 198
 199         /**
 200          * Justify a text into width-sized (at the maximum) lines.
 201          *
 202          * @param text
 203          *            the {@link String} to justify
 204          * @param width
 205          *            the maximum size of the resulting lines
 206          *
 207          * @return a list of justified text lines
 208          */
 209         static public List<String> justifyText(List<String> text, int width) {
 210                 return justifyText(text, width, null);
 211         }
 212
 213         /**
 214          * Justify a text into width-sized (at the maximum) lines.
 215          *
 216          * @param text
 217          *            the {@link String} to justify
 218          * @param width
 219          *            the maximum size of the resulting lines
 220          * @param align
 221          *            align the lines in this position (default is
 222          *            Alignment.Beginning)
 223          *
 224          * @return a list of justified text lines
 225          */
 226         static public List<String> justifyText(List<String> text, int width,
 227                         Alignment align) {
 228                 List<String> result = new ArrayList<String>();
 229
 230                 // Content <-> Bullet spacing (null = no spacing)
 231                 List<Entry<String, String>> lines = new ArrayList<Entry<String, String>>();
 232                 StringBuilder previous = null;
 233                 StringBuilder tmp = new StringBuilder();
 234                 String previousItemBulletSpacing = null;
 235                 String itemBulletSpacing = null;
 236                 for (String inputLine : text) {
 237                         boolean previousLineComplete = true;
 238
 239                         String current = inputLine.replace("\t", "    ");
 240                         itemBulletSpacing = getItemSpacing(current);
 241                         boolean bullet = isItemLine(current);
 242                         if ((previousItemBulletSpacing == null || itemBulletSpacing
 243                                         .length() <= previousItemBulletSpacing.length()) && !bullet) {
 244                                 itemBulletSpacing = null;
 245                         }
 246
 247                         if (itemBulletSpacing != null) {
 248                                 current = current.trim();
 249                                 if (!current.isEmpty() && bullet) {
 250                                         current = current.substring(1);
 251                                 }
 252                                 current = current.trim();
 253                                 previousLineComplete = bullet;
 254                         } else {
 255                                 tmp.setLength(0);
 256                                 for (String word : current.split(" ")) {
 257                                         if (word.isEmpty()) {
 258                                                 continue;
 259                                         }
 260
 261                                         if (tmp.length() > 0) {
 262                                                 tmp.append(' ');
 263                                         }
 264                                         tmp.append(word.trim());
 265                                 }
 266                                 current = tmp.toString();
 267
 268                                 previousLineComplete = current.isEmpty()
 269                                                 || previousItemBulletSpacing != null
 270                                                 || (previous != null && isFullLine(previous))
 271                                                 || isHrLine(current) || isHrLine(previous);
 272                         }
 273
 274                         if (previous == null) {
 275                                 previous = new StringBuilder();
 276                         } else {
 277                                 if (previousLineComplete) {
 278                                         lines.add(new AbstractMap.SimpleEntry<String, String>(
 279                                                         previous.toString(), previousItemBulletSpacing));
 280                                         previous.setLength(0);
 281                                         previousItemBulletSpacing = itemBulletSpacing;
 282                                 } else {
 283                                         previous.append(' ');
 284                                 }
 285                         }
 286
 287                         previous.append(current);
 288
 289                 }
 290
 291                 if (previous != null) {
 292                         lines.add(new AbstractMap.SimpleEntry<String, String>(previous
 293                                         .toString(), previousItemBulletSpacing));
 294                 }
 295
 296                 for (Entry<String, String> line : lines) {
 297                         String content = line.getKey();
 298                         String spacing = line.getValue();
 299
 300                         String bullet = "- ";
 301                         if (spacing == null) {
 302                                 bullet = "";
 303                                 spacing = "";
 304                         }
 305
 306                         if (spacing.length() > width + 3) {
 307                                 spacing = "";
 308                         }
 309
 310                         for (String subline : StringUtils.justifyText(content, width
 311                                         - (spacing.length() + bullet.length()), align)) {
 312                                 result.add(spacing + bullet + subline);
 313                                 if (!bullet.isEmpty()) {
 314                                         bullet = "  ";
 315                                 }
 316                         }
 317                 }
 318
 319                 return result;
 320         }
 321
 322         /**
 323          * Sanitise the given input to make it more Terminal-friendly by removing
 324          * combining characters.
 325          *
 326          * @param input
 327          *            the input to sanitise
 328          * @param allowUnicode
 329          *            allow Unicode or only allow ASCII Latin characters
 330          *
 331          * @return the sanitised {@link String}
 332          */
 333         static public String sanitize(String input, boolean allowUnicode) {
 334                 return sanitize(input, allowUnicode, !allowUnicode);
 335         }
 336
 337         /**
 338          * Sanitise the given input to make it more Terminal-friendly by removing
 339          * combining characters.
 340          *
 341          * @param input
 342          *            the input to sanitise
 343          * @param allowUnicode
 344          *            allow Unicode or only allow ASCII Latin characters
 345          * @param removeAllAccents
 346          *            TRUE to replace all accentuated characters by their non
 347          *            accentuated counter-parts
 348          *
 349          * @return the sanitised {@link String}
 350          */
 351         static public String sanitize(String input, boolean allowUnicode,
 352                         boolean removeAllAccents) {
 353
 354                 if (removeAllAccents) {
 355                         input = Normalizer.normalize(input, Form.NFKD);
 356                         if (marks != null) {
 357                                 input = marks.matcher(input).replaceAll("");
 358                         }
 359                 }
 360
 361                 input = Normalizer.normalize(input, Form.NFKC);
 362
 363                 if (!allowUnicode) {
 364                         StringBuilder builder = new StringBuilder();
 365                         for (int index = 0; index < input.length(); index++) {
 366                                 char car = input.charAt(index);
 367                                 // displayable chars in ASCII are in the range 32<->255,
 368                                 // except DEL (127)
 369                                 if (car >= 32 && car <= 255 && car != 127) {
 370                                         builder.append(car);
 371                                 }
 372                         }
 373                         input = builder.toString();
 374                 }
 375
 376                 return input;
 377         }
 378
 379         /**
 380          * Convert between the time in milliseconds to a {@link String} in a "fixed"
 381          * way (to exchange data over the wire, for instance).
 382          * <p>
 383          * Precise to the second.
 384          *
 385          * @param time
 386          *            the specified number of milliseconds since the standard base
 387          *            time known as "the epoch", namely January 1, 1970, 00:00:00
 388          *            GMT
 389          *
 390          * @return the time as a {@link String}
 391          */
 392         static public String fromTime(long time) {
 393                 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
 394                 return sdf.format(new Date(time));
 395         }
 396
 397         /**
 398          * Convert between the time as a {@link String} to milliseconds in a "fixed"
 399          * way (to exchange data over the wire, for instance).
 400          * <p>
 401          * Precise to the second.
 402          *
 403          * @param displayTime
 404          *            the time as a {@link String}
 405          *
 406          * @return the number of milliseconds since the standard base time known as
 407          *         "the epoch", namely January 1, 1970, 00:00:00 GMT, or -1 in case
 408          *         of error
 409          *
 410          * @throws ParseException
 411          *             in case of parse error
 412          */
 413         static public long toTime(String displayTime) throws ParseException {
 414                 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
 415                 return sdf.parse(displayTime).getTime();
 416         }
 417
 418         /**
 419          * Return a hash of the given {@link String}.
 420          *
 421          * @param input
 422          *            the input data
 423          *
 424          * @return the hash
 425          */
 426         static public String getMd5Hash(String input) {
 427                 try {
 428                         MessageDigest md = MessageDigest.getInstance("MD5");
 429                         md.update(input.getBytes("UTF-8"));
 430                         byte byteData[] = md.digest();
 431
 432                         StringBuffer hexString = new StringBuffer();
 433                         for (int i = 0; i < byteData.length; i++) {
 434                                 String hex = Integer.toHexString(0xff & byteData[i]);
 435                                 if (hex.length() == 1)
 436                                         hexString.append('0');
 437                                 hexString.append(hex);
 438                         }
 439
 440                         return hexString.toString();
 441                 } catch (NoSuchAlgorithmException e) {
 442                         return input;
 443                 } catch (UnsupportedEncodingException e) {
 444                         return input;
 445                 }
 446         }
 447
 448         /**
 449          * Remove the HTML content from the given input, and un-html-ize the rest.
 450          *
 451          * @param html
 452          *            the HTML-encoded content
 453          *
 454          * @return the HTML-free equivalent content
 455          */
 456         public static String unhtml(String html) {
 457                 StringBuilder builder = new StringBuilder();
 458
 459                 int inTag = 0;
 460                 for (char car : html.toCharArray()) {
 461                         if (car == '<') {
 462                                 inTag++;
 463                         } else if (car == '>') {
 464                                 inTag--;
 465                         } else if (inTag <= 0) {
 466                                 builder.append(car);
 467                         }
 468                 }
 469
 470                 char nbsp = ' '; // non-breakable space (a special char)
 471                 char space = ' ';
 472                 return HtmlEscape.unescapeHtml(builder.toString()).replace(nbsp, space);
 473         }
 474
 475         /**
 476          * Escape the given {@link String} so it can be used in XML, as content.
 477          *
 478          * @param input
 479          *            the input {@link String}
 480          *
 481          * @return the escaped {@link String}
 482          */
 483         public static String xmlEscape(String input) {
 484                 if (input == null) {
 485                         return "";
 486                 }
 487
 488                 return HtmlEscape.escapeHtml(input,
 489                                 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
 490                                 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
 491         }
 492
 493         /**
 494          * Escape the given {@link String} so it can be used in XML, as text content
 495          * inside double-quotes.
 496          *
 497          * @param input
 498          *            the input {@link String}
 499          *
 500          * @return the escaped {@link String}
 501          */
 502         public static String xmlEscapeQuote(String input) {
 503                 if (input == null) {
 504                         return "";
 505                 }
 506
 507                 return HtmlEscape.escapeHtml(input,
 508                                 HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
 509                                 HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
 510         }
 511
 512         /**
 513          * Zip the data and then encode it into Base64.
 514          *
 515          * @deprecated use {@link StringUtils#base64(byte[], boolean)} with the
 516          *             correct parameter instead
 517          *
 518          * @param data
 519          *            the data
 520          *
 521          * @return the Base64 zipped version
 522          */
 523         @Deprecated
 524         public static String zip64(String data) {
 525                 try {
 526                         return Base64.encodeBytes(data.getBytes("UTF-8"), Base64.GZIP);
 527                 } catch (IOException e) {
 528                         e.printStackTrace();
 529                         return null;
 530                 }
 531         }
 532
 533         /**
 534          * Unconvert from Base64 then unzip the content.
 535          *
 536          * @deprecated use {@link StringUtils#unbase64s(String, boolean)} with the
 537          *             correct parameter instead
 538          *
 539          * @param data
 540          *            the data in Base64 format
 541          *
 542          * @return the raw data
 543          *
 544          * @throws IOException
 545          *             in case of I/O error
 546          */
 547         @Deprecated
 548         public static String unzip64(String data) throws IOException {
 549                 return new String(Base64.decode(data, Base64.GZIP), "UTF-8");
 550         }
 551
 552         /**
 553          * Convert the given data to Base64 format.
 554          *
 555          * @param data
 556          *            the data to convert
 557          * @param zip
 558          *            TRUE to also compress the data in GZIP format; remember that
 559          *            compressed and not-compressed content are different; you need
 560          *            to know which is which when decoding
 561          *
 562          * @return the Base64 {@link String} representation of the data
 563          *
 564          * @throws IOException
 565          *             in case of I/O errors
 566          */
 567         public static String base64(String data, boolean zip) throws IOException {
 568                 return base64(data.getBytes("UTF-8"), zip);
 569         }
 570
 571         /**
 572          * Convert the given data to Base64 format.
 573          *
 574          * @param data
 575          *            the data to convert
 576          * @param zip
 577          *            TRUE to also compress the data in GZIP format; remember that
 578          *            compressed and not-compressed content are different; you need
 579          *            to know which is which when decoding
 580          *
 581          * @return the Base64 {@link String} representation of the data
 582          *
 583          * @throws IOException
 584          *             in case of I/O errors
 585          */
 586         public static String base64(byte[] data, boolean zip) throws IOException {
 587                 return Base64.encodeBytes(data, zip ? Base64.GZIP : Base64.NO_OPTIONS);
 588         }
 589
 590         /**
 591          * Convert the given data to Base64 format.
 592          *
 593          * @param data
 594          *            the data to convert
 595          * @param zip
 596          *            TRUE to also uncompress the data from a GZIP format; take care
 597          *            about this flag, as it could easily cause errors in the
 598          *            returned content or an {@link IOException}
 599          * @param breakLines
 600          *            TRUE to break lines on every 76th character
 601          *
 602          * @return the Base64 {@link String} representation of the data
 603          *
 604          * @throws IOException
 605          *             in case of I/O errors
 606          */
 607         public static OutputStream base64(OutputStream data, boolean zip,
 608                         boolean breakLines) throws IOException {
 609                 OutputStream out = new Base64.OutputStream(data,
 610                                 breakLines ? Base64.DO_BREAK_LINES & Base64.ENCODE
 611                                                 : Base64.ENCODE);
 612
 613                 if (zip) {
 614                         out = new java.util.zip.GZIPOutputStream(out);
 615                 }
 616
 617                 return out;
 618         }
 619
 620         /**
 621          * Convert the given data to Base64 format.
 622          *
 623          * @param data
 624          *            the data to convert
 625          * @param zip
 626          *            TRUE to also uncompress the data from a GZIP format; take care
 627          *            about this flag, as it could easily cause errors in the
 628          *            returned content or an {@link IOException}
 629          * @param breakLines
 630          *            TRUE to break lines on every 76th character
 631          *
 632          * @return the Base64 {@link String} representation of the data
 633          *
 634          * @throws IOException
 635          *             in case of I/O errors
 636          */
 637         public static InputStream base64(InputStream data, boolean zip,
 638                         boolean breakLines) throws IOException {
 639                 if (zip) {
 640                         data = new java.util.zip.GZIPInputStream(data);
 641                 }
 642
 643                 return new Base64.InputStream(data, breakLines ? Base64.DO_BREAK_LINES
 644                                 & Base64.ENCODE : Base64.ENCODE);
 645         }
 646
 647         /**
 648          * Unconvert the given data from Base64 format back to a raw array of bytes.
 649          * <p>
 650          * Will automatically detect zipped data and also uncompress it before
 651          * returning, unless ZIP is false.
 652          *
 653          * @param data
 654          *            the data to unconvert
 655          * @param zip
 656          *            TRUE to also uncompress the data from a GZIP format
 657          *            automatically; if set to FALSE, zipped data can be returned
 658          *
 659          * @return the raw data represented by the given Base64 {@link String},
 660          *         optionally compressed with GZIP
 661          *
 662          * @throws IOException
 663          *             in case of I/O errors
 664          */
 665         public static byte[] unbase64(String data, boolean zip) throws IOException {
 666                 return Base64
 667                                 .decode(data, zip ? Base64.NO_OPTIONS : Base64.DONT_GUNZIP);
 668         }
 669
 670         /**
 671          * Unconvert the given data from Base64 format back to a raw array of bytes.
 672          *
 673          * @param data
 674          *            the data to unconvert
 675          * @param zip
 676          *            TRUE to also uncompress the data from a GZIP format; take care
 677          *            about this flag, as it could easily cause errors in the
 678          *            returned content or an {@link IOException}
 679          *
 680          * @return the raw data represented by the given Base64 {@link String}
 681          *
 682          * @throws IOException
 683          *             in case of I/O errors
 684          */
 685         public static OutputStream unbase64(OutputStream data, boolean zip)
 686                         throws IOException {
 687                 OutputStream out = new Base64.OutputStream(data, Base64.DECODE);
 688
 689                 if (zip) {
 690                         out = new java.util.zip.GZIPOutputStream(out);
 691                 }
 692
 693                 return out;
 694         }
 695
 696         /**
 697          * Unconvert the given data from Base64 format back to a raw array of bytes.
 698          *
 699          * @param data
 700          *            the data to unconvert
 701          * @param zip
 702          *            TRUE to also uncompress the data from a GZIP format; take care
 703          *            about this flag, as it could easily cause errors in the
 704          *            returned content or an {@link IOException}
 705          *
 706          * @return the raw data represented by the given Base64 {@link String}
 707          *
 708          * @throws IOException
 709          *             in case of I/O errors
 710          */
 711         public static InputStream unbase64(InputStream data, boolean zip)
 712                         throws IOException {
 713                 if (zip) {
 714                         data = new java.util.zip.GZIPInputStream(data);
 715                 }
 716
 717                 return new Base64.InputStream(data, Base64.DECODE);
 718         }
 719
 720         /**
 721          * Unconvert the given data from Base64 format back to a raw array of bytes.
 722          * <p>
 723          * Will automatically detect zipped data and also uncompress it before
 724          * returning, unless ZIP is false.
 725          *
 726          * @param data
 727          *            the data to unconvert
 728          * @param offset
 729          *            the offset at which to start taking the data (do not take the
 730          *            data before it into account)
 731          * @param count
 732          *            the number of bytes to take into account (do not process after
 733          *            this number of bytes has been processed)
 734          * @param zip
 735          *            TRUE to also uncompress the data from a GZIP format
 736          *            automatically; if set to FALSE, zipped data can be returned
 737          *
 738          * @return the raw data represented by the given Base64 {@link String}
 739          *
 740          * @throws IOException
 741          *             in case of I/O errors
 742          */
 743         public static byte[] unbase64(byte[] data, int offset, int count,
 744                         boolean zip) throws IOException {
 745                 return Base64.niki_decode(data, offset, count, zip ? Base64.NO_OPTIONS
 746                                 : Base64.DONT_GUNZIP);
 747         }
 748
 749         /**
 750          * Unonvert the given data from Base64 format back to a {@link String}.
 751          * <p>
 752          * Will automatically detect zipped data and also uncompress it before
 753          * returning, unless ZIP is false.
 754          *
 755          * @param data
 756          *            the data to unconvert
 757          * @param zip
 758          *            TRUE to also uncompress the data from a GZIP format
 759          *            automatically; if set to FALSE, zipped data can be returned
 760          *
 761          * @return the {@link String} represented by the given Base64 {@link String}
 762          *         , optionally compressed with GZIP
 763          *
 764          * @throws IOException
 765          *             in case of I/O errors
 766          */
 767         public static String unbase64s(String data, boolean zip) throws IOException {
 768                 return new String(unbase64(data, zip), "UTF-8");
 769         }
 770
 771         /**
 772          * Unconvert the given data from Base64 format back into a {@link String}.
 773          *
 774          * @param data
 775          *            the data to unconvert
 776          * @param offset
 777          *            the offset at which to start taking the data (do not take the
 778          *            data before it into account)
 779          * @param count
 780          *            the number of bytes to take into account (do not process after
 781          *            this number of bytes has been processed)
 782          * @param zip
 783          *            TRUE to also uncompress the data from a GZIP format; take care
 784          *            about this flag, as it could easily cause errors in the
 785          *            returned content or an {@link IOException}
 786          *
 787          * @return the {@link String} represented by the given Base64 {@link String}
 788          *         , optionally compressed with GZIP
 789          *
 790          * @throws IOException
 791          *             in case of I/O errors
 792          */
 793         public static String unbase64s(byte[] data, int offset, int count,
 794                         boolean zip) throws IOException {
 795                 return new String(unbase64(data, offset, count, zip), "UTF-8");
 796         }
 797
 798         /**
 799          * Return a display {@link String} for the given value, which can be
 800          * suffixed with "k" or "M" depending upon the number, if it is big enough.
 801          * <p>
 802          * <p>
 803          * Examples:
 804          * <ul>
 805          * <li><tt>8 765</tt> becomes "8k"</li>
 806          * <li><tt>998 765</tt> becomes "998k"</li>
 807          * <li><tt>12 987 364</tt> becomes "12M"</li>
 808          * <li><tt>5 534 333 221</tt> becomes "5G"</li>
 809          * </ul>
 810          *
 811          * @param value
 812          *            the value to convert
 813          *
 814          * @return the display value
 815          */
 816         public static String formatNumber(long value) {
 817                 return formatNumber(value, 0);
 818         }
 819
 820         /**
 821          * Return a display {@link String} for the given value, which can be
 822          * suffixed with "k" or "M" depending upon the number, if it is big enough.
 823          * <p>
 824          * Examples (assuming decimalPositions = 1):
 825          * <ul>
 826          * <li><tt>8 765</tt> becomes "8.7k"</li>
 827          * <li><tt>998 765</tt> becomes "998.7k"</li>
 828          * <li><tt>12 987 364</tt> becomes "12.9M"</li>
 829          * <li><tt>5 534 333 221</tt> becomes "5.5G"</li>
 830          * </ul>
 831          *
 832          * @param value
 833          *            the value to convert
 834          * @param decimalPositions
 835          *            the number of decimal positions to keep
 836          *
 837          * @return the display value
 838          */
 839         public static String formatNumber(long value, int decimalPositions) {
 840                 long userValue = value;
 841                 String suffix = "";
 842                 long mult = 1;
 843
 844                 if (value >= 1000000000l) {
 845                         mult = 1000000000l;
 846                         userValue = value / 1000000000l;
 847                         suffix = " G";
 848                 } else if (value >= 1000000l) {
 849                         mult = 1000000l;
 850                         userValue = value / 1000000l;
 851                         suffix = " M";
 852                 } else if (value >= 1000l) {
 853                         mult = 1000l;
 854                         userValue = value / 1000l;
 855                         suffix = " k";
 856                 }
 857
 858                 String deci = "";
 859                 if (decimalPositions > 0) {
 860                         deci = Long.toString(value % mult);
 861                         int size = Long.toString(mult).length() - 1;
 862                         while (deci.length() < size) {
 863                                 deci = "0" + deci;
 864                         }
 865
 866                         deci = deci.substring(0, Math.min(decimalPositions, deci.length()));
 867                         while (deci.length() < decimalPositions) {
 868                                 deci += "0";
 869                         }
 870
 871                         deci = "." + deci;
 872                 }
 873
 874                 return Long.toString(userValue) + deci + suffix;
 875         }
 876
 877         /**
 878          * The reverse operation to {@link StringUtils#formatNumber(long)}: it will
 879          * read a "display" number that can contain a "M" or "k" suffix and return
 880          * the full value.
 881          * <p>
 882          * Of course, the conversion to and from display form is lossy (example:
 883          * <tt>6870</tt> to "6.5k" to <tt>6500</tt>).
 884          *
 885          * @param value
 886          *            the value in display form with possible "M" and "k" suffixes,
 887          *            can be NULL
 888          *
 889          * @return the value as a number, or 0 if not possible to convert
 890          */
 891         public static long toNumber(String value) {
 892                 return toNumber(value, 0l);
 893         }
 894
 895         /**
 896          * The reverse operation to {@link StringUtils#formatNumber(long)}: it will
 897          * read a "display" number that can contain a "M" or "k" suffix and return
 898          * the full value.
 899          * <p>
 900          * Of course, the conversion to and from display form is lossy (example:
 901          * <tt>6870</tt> to "6.5k" to <tt>6500</tt>).
 902          *
 903          * @param value
 904          *            the value in display form with possible "M" and "k" suffixes,
 905          *            can be NULL
 906          * @param def
 907          *            the default value if it is not possible to convert the given
 908          *            value to a number
 909          *
 910          * @return the value as a number, or 0 if not possible to convert
 911          */
 912         public static long toNumber(String value, long def) {
 913                 long count = def;
 914                 if (value != null) {
 915                         value = value.trim().toLowerCase();
 916                         try {
 917                                 long mult = 1;
 918                                 if (value.endsWith("g")) {
 919                                         value = value.substring(0, value.length() - 1).trim();
 920                                         mult = 1000000000;
 921                                 } else if (value.endsWith("m")) {
 922                                         value = value.substring(0, value.length() - 1).trim();
 923                                         mult = 1000000;
 924                                 } else if (value.endsWith("k")) {
 925                                         value = value.substring(0, value.length() - 1).trim();
 926                                         mult = 1000;
 927                                 }
 928
 929                                 long deci = 0;
 930                                 if (value.contains(".")) {
 931                                         String[] tab = value.split("\\.");
 932                                         if (tab.length != 2) {
 933                                                 throw new NumberFormatException(value);
 934                                         }
 935                                         double decimal = Double.parseDouble("0."
 936                                                         + tab[tab.length - 1]);
 937                                         deci = ((long) (mult * decimal));
 938                                         value = tab[0];
 939                                 }
 940                                 count = mult * Long.parseLong(value) + deci;
 941                         } catch (Exception e) {
 942                         }
 943                 }
 944
 945                 return count;
 946         }
 947
 948         /**
 949          * The "remove accents" pattern.
 950          *
 951          * @return the pattern, or NULL if a problem happens
 952          */
 953         private static Pattern getMarks() {
 954                 try {
 955                         return Pattern
 956                                         .compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
 957                 } catch (Exception e) {
 958                         // Can fail on Android...
 959                         return null;
 960                 }
 961         }
 962
 963         //
 964         // justify List<String> related:
 965         //
 966
 967         /**
 968          * Check if this line ends as a complete line (ends with a "." or similar).
 969          * <p>
 970          * Note that we consider an empty line as full, and a line ending with
 971          * spaces as not complete.
 972          *
 973          * @param line
 974          *            the line to check
 975          *
 976          * @return TRUE if it does
 977          */
 978         static private boolean isFullLine(StringBuilder line) {
 979                 if (line.length() == 0) {
 980                         return true;
 981                 }
 982
 983                 char lastCar = line.charAt(line.length() - 1);
 984                 switch (lastCar) {
 985                 case '.': // points
 986                 case '?':
 987                 case '!':
 988
 989                 case '\'': // quotes
 990                 case '‘':
 991                 case '’':
 992
 993                 case '"': // double quotes
 994                 case '”':
 995                 case '“':
 996                 case '»':
 997                 case '«':
 998                         return true;
 999                 default:
1000                         return false;
1001                 }
1002         }
1003
1004         /**
1005          * Check if this line represent an item in a list or description (i.e.,
1006          * check that the first non-space char is "-").
1007          *
1008          * @param line
1009          *            the line to check
1010          *
1011          * @return TRUE if it is
1012          */
1013         static private boolean isItemLine(String line) {
1014                 String spacing = getItemSpacing(line);
1015                 return spacing != null && !spacing.isEmpty()
1016                                 && line.charAt(spacing.length()) == '-';
1017         }
1018
1019         /**
1020          * Return all the spaces that start this line (or Empty if none).
1021          *
1022          * @param line
1023          *            the line to get the starting spaces from
1024          *
1025          * @return the left spacing
1026          */
1027         static private String getItemSpacing(String line) {
1028                 int i;
1029                 for (i = 0; i < line.length(); i++) {
1030                         if (line.charAt(i) != ' ') {
1031                                 return line.substring(0, i);
1032                         }
1033                 }
1034
1035                 return "";
1036         }
1037
1038         /**
1039          * This line is an horizontal spacer line.
1040          *
1041          * @param line
1042          *            the line to test
1043          *
1044          * @return TRUE if it is
1045          */
1046         static private boolean isHrLine(CharSequence line) {
1047                 int count = 0;
1048                 if (line != null) {
1049                         for (int i = 0; i < line.length(); i++) {
1050                                 char car = line.charAt(i);
1051                                 if (car == ' ' || car == '\t' || car == '*' || car == '-'
1052                                                 || car == '_' || car == '~' || car == '=' || car == '/'
1053                                                 || car == '\\') {
1054                                         count++;
1055                                 } else {
1056                                         return false;
1057                                 }
1058                         }
1059                 }
1060
1061                 return count > 2;
1062         }
1063 }