serial: fix Importer, more tests

[fanfix.git] / src / be / nikiroo / utils / StringUtils.java
diff --git a/src/be/nikiroo/utils/StringUtils.java b/src/be/nikiroo/utils/StringUtils.java

index 993f62b386f2c32da01e4987ed0aa47d5b28aae0..f7548aecca717c19aff3b0c60db338f5ad0fe1df 100644 (file)
--- a/src/be/nikiroo/utils/StringUtils.java
+++ b/src/be/nikiroo/utils/StringUtils.java
@@ -1,30 +1,34 @@
  package be.nikiroo.utils;
  
-import java.awt.Image;
-import java.awt.geom.AffineTransform;
-import java.awt.image.AffineTransformOp;
-import java.awt.image.BufferedImage;
  import java.io.ByteArrayInputStream;
  import java.io.ByteArrayOutputStream;
-import java.io.File;
  import java.io.IOException;
  import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
  import java.security.MessageDigest;
  import java.security.NoSuchAlgorithmException;
  import java.text.Normalizer;
  import java.text.Normalizer.Form;
  import java.text.ParseException;
  import java.text.SimpleDateFormat;
-import java.util.Base64;
+import java.util.AbstractMap;
+import java.util.ArrayList;
+import java.util.Arrays;
  import java.util.Date;
+import java.util.List;
+import java.util.Map.Entry;
  import java.util.regex.Pattern;
-
-import javax.imageio.ImageIO;
+import java.util.zip.GZIPInputStream;
+import java.util.zip.GZIPOutputStream;
  
  import org.unbescape.html.HtmlEscape;
  import org.unbescape.html.HtmlEscapeLevel;
  import org.unbescape.html.HtmlEscapeType;
  
+import be.nikiroo.utils.streams.Base64InputStream;
+import be.nikiroo.utils.streams.Base64OutputStream;
+
  /**
   * This class offer some utilities based around {@link String}s.
   * 
@@ -33,19 +37,49 @@ import org.unbescape.html.HtmlEscapeType;
  public class StringUtils {
         /**
          * This enum type will decide the alignment of a {@link String} when padding
-        * is applied or if there is enough horizontal space for it to be aligned.
+        * or justification is applied (if there is enough horizontal space for it
+        * to be aligned).
          */
         public enum Alignment {
                 /** Aligned at left. */
-               Beginning,
+               LEFT,
                 /** Centered. */
-               Center,
+               CENTER,
                 /** Aligned at right. */
-               End
+               RIGHT,
+               /** Full justified (to both left and right). */
+               JUSTIFY,
+
+               // Old Deprecated values:
+
+               /** DEPRECATED: please use LEFT. */
+               @Deprecated
+               Beginning,
+               /** DEPRECATED: please use CENTER. */
+               @Deprecated
+               Center,
+               /** DEPRECATED: please use RIGHT. */
+               @Deprecated
+               End;
+
+               /**
+                * Return the non-deprecated version of this enum if needed (or return
+                * self if not).
+                * 
+                * @return the non-deprecated value
+                */
+               Alignment undeprecate() {
+                       if (this == Beginning)
+                               return LEFT;
+                       if (this == Center)
+                               return CENTER;
+                       if (this == End)
+                               return RIGHT;
+                       return this;
+               }
         }
  
-       static private Pattern marks = Pattern
-                       .compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
+       static private Pattern marks = getMarks();
  
         /**
          * Fix the size of the given {@link String} either with space-padding or by
@@ -59,7 +93,7 @@ public class StringUtils {
          * @return the resulting {@link String} of size <i>size</i>
          */
         static public String padString(String text, int width) {
-               return padString(text, width, true, Alignment.Beginning);
+               return padString(text, width, true, null);
         }
  
         /**
@@ -75,13 +109,19 @@ public class StringUtils {
          *            cut the {@link String} shorter if needed
          * @param align
          *            align the {@link String} in this position if we have enough
-        *            space
+        *            space (default is Alignment.Beginning)
          * 
          * @return the resulting {@link String} of size <i>size</i> minimum
          */
         static public String padString(String text, int width, boolean cut,
                         Alignment align) {
  
+               if (align == null) {
+                       align = Alignment.LEFT;
+               }
+
+               align = align.undeprecate();
+
                 if (width >= 0) {
                         if (text == null)
                                 text = "";
@@ -92,23 +132,23 @@ public class StringUtils {
                                 if (cut)
                                         text = text.substring(0, width);
                         } else if (diff > 0) {
-                               if (diff < 2 && align != Alignment.End)
-                                       align = Alignment.Beginning;
+                               if (diff < 2 && align != Alignment.RIGHT)
+                                       align = Alignment.LEFT;
  
                                 switch (align) {
-                               case Beginning:
-                                       text = text + new String(new char[diff]).replace('\0', ' ');
-                                       break;
-                               case End:
+                               case RIGHT:
                                         text = new String(new char[diff]).replace('\0', ' ') + text;
                                         break;
-                               case Center:
-                               default:
+                               case CENTER:
                                         int pad1 = (diff) / 2;
                                         int pad2 = (diff + 1) / 2;
                                         text = new String(new char[pad1]).replace('\0', ' ') + text
                                                         + new String(new char[pad2]).replace('\0', ' ');
                                         break;
+                               case LEFT:
+                               default:
+                                       text = text + new String(new char[diff]).replace('\0', ' ');
+                                       break;
                                 }
                         }
                 }
@@ -116,6 +156,177 @@ public class StringUtils {
                 return text;
         }
  
+       /**
+        * Justify a text into width-sized (at the maximum) lines.
+        * 
+        * @param text
+        *            the {@link String} to justify
+        * @param width
+        *            the maximum size of the resulting lines
+        * 
+        * @return a list of justified text lines
+        */
+       static public List<String> justifyText(String text, int width) {
+               return justifyText(text, width, null);
+       }
+
+       /**
+        * Justify a text into width-sized (at the maximum) lines.
+        * 
+        * @param text
+        *            the {@link String} to justify
+        * @param width
+        *            the maximum size of the resulting lines
+        * @param align
+        *            align the lines in this position (default is
+        *            Alignment.Beginning)
+        * 
+        * @return a list of justified text lines
+        */
+       static public List<String> justifyText(String text, int width,
+                       Alignment align) {
+               if (align == null) {
+                       align = Alignment.LEFT;
+               }
+
+               align = align.undeprecate();
+
+               switch (align) {
+               case CENTER:
+                       return StringJustifier.center(text, width);
+               case RIGHT:
+                       return StringJustifier.right(text, width);
+               case JUSTIFY:
+                       return StringJustifier.full(text, width);
+               case LEFT:
+               default:
+                       return StringJustifier.left(text, width);
+               }
+       }
+
+       /**
+        * Justify a text into width-sized (at the maximum) lines.
+        * 
+        * @param text
+        *            the {@link String} to justify
+        * @param width
+        *            the maximum size of the resulting lines
+        * 
+        * @return a list of justified text lines
+        */
+       static public List<String> justifyText(List<String> text, int width) {
+               return justifyText(text, width, null);
+       }
+
+       /**
+        * Justify a text into width-sized (at the maximum) lines.
+        * 
+        * @param text
+        *            the {@link String} to justify
+        * @param width
+        *            the maximum size of the resulting lines
+        * @param align
+        *            align the lines in this position (default is
+        *            Alignment.Beginning)
+        * 
+        * @return a list of justified text lines
+        */
+       static public List<String> justifyText(List<String> text, int width,
+                       Alignment align) {
+               List<String> result = new ArrayList<String>();
+
+               // Content <-> Bullet spacing (null = no spacing)
+               List<Entry<String, String>> lines = new ArrayList<Entry<String, String>>();
+               StringBuilder previous = null;
+               StringBuilder tmp = new StringBuilder();
+               String previousItemBulletSpacing = null;
+               String itemBulletSpacing = null;
+               for (String inputLine : text) {
+                       boolean previousLineComplete = true;
+
+                       String current = inputLine.replace("\t", "    ");
+                       itemBulletSpacing = getItemSpacing(current);
+                       boolean bullet = isItemLine(current);
+                       if ((previousItemBulletSpacing == null || itemBulletSpacing
+                                       .length() <= previousItemBulletSpacing.length()) && !bullet) {
+                               itemBulletSpacing = null;
+                       }
+
+                       if (itemBulletSpacing != null) {
+                               current = current.trim();
+                               if (!current.isEmpty() && bullet) {
+                                       current = current.substring(1);
+                               }
+                               current = current.trim();
+                               previousLineComplete = bullet;
+                       } else {
+                               tmp.setLength(0);
+                               for (String word : current.split(" ")) {
+                                       if (word.isEmpty()) {
+                                               continue;
+                                       }
+
+                                       if (tmp.length() > 0) {
+                                               tmp.append(' ');
+                                       }
+                                       tmp.append(word.trim());
+                               }
+                               current = tmp.toString();
+
+                               previousLineComplete = current.isEmpty()
+                                               || previousItemBulletSpacing != null
+                                               || (previous != null && isFullLine(previous))
+                                               || isHrLine(current) || isHrLine(previous);
+                       }
+
+                       if (previous == null) {
+                               previous = new StringBuilder();
+                       } else {
+                               if (previousLineComplete) {
+                                       lines.add(new AbstractMap.SimpleEntry<String, String>(
+                                                       previous.toString(), previousItemBulletSpacing));
+                                       previous.setLength(0);
+                                       previousItemBulletSpacing = itemBulletSpacing;
+                               } else {
+                                       previous.append(' ');
+                               }
+                       }
+
+                       previous.append(current);
+
+               }
+
+               if (previous != null) {
+                       lines.add(new AbstractMap.SimpleEntry<String, String>(previous
+                                       .toString(), previousItemBulletSpacing));
+               }
+
+               for (Entry<String, String> line : lines) {
+                       String content = line.getKey();
+                       String spacing = line.getValue();
+
+                       String bullet = "- ";
+                       if (spacing == null) {
+                               bullet = "";
+                               spacing = "";
+                       }
+
+                       if (spacing.length() > width + 3) {
+                               spacing = "";
+                       }
+
+                       for (String subline : StringUtils.justifyText(content, width
+                                       - (spacing.length() + bullet.length()), align)) {
+                               result.add(spacing + bullet + subline);
+                               if (!bullet.isEmpty()) {
+                                       bullet = "  ";
+                               }
+                       }
+               }
+
+               return result;
+       }
+
         /**
          * Sanitise the given input to make it more Terminal-friendly by removing
          * combining characters.
@@ -150,7 +361,9 @@ public class StringUtils {
  
                 if (removeAllAccents) {
                         input = Normalizer.normalize(input, Form.NFKD);
-                       input = marks.matcher(input).replaceAll("");
+                       if (marks != null) {
+                               input = marks.matcher(input).replaceAll("");
+                       }
                 }
  
                 input = Normalizer.normalize(input, Form.NFKC);
@@ -172,11 +385,15 @@ public class StringUtils {
         }
  
         /**
-        * Convert between time in milliseconds to {@link String} in a "static" way
-        * (to exchange data over the wire, for instance).
+        * Convert between the time in milliseconds to a {@link String} in a "fixed"
+        * way (to exchange data over the wire, for instance).
+        * <p>
+        * Precise to the second.
          * 
          * @param time
-        *            the time in milliseconds
+        *            the specified number of milliseconds since the standard base
+        *            time known as "the epoch", namely January 1, 1970, 00:00:00
+        *            GMT
          * 
          * @return the time as a {@link String}
          */
@@ -186,422 +403,736 @@ public class StringUtils {
         }
  
         /**
-        * Convert between time as a {@link String} to milliseconds in a "static"
+        * Convert between the time as a {@link String} to milliseconds in a "fixed"
          * way (to exchange data over the wire, for instance).
+        * <p>
+        * Precise to the second.
          * 
-        * @param time
+        * @param displayTime
          *            the time as a {@link String}
          * 
-        * @return the time in milliseconds
+        * @return the number of milliseconds since the standard base time known as
+        *         "the epoch", namely January 1, 1970, 00:00:00 GMT, or -1 in case
+        *         of error
+        * 
+        * @throws ParseException
+        *             in case of parse error
          */
-       static public long toTime(String display) {
+       static public long toTime(String displayTime) throws ParseException {
                 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+               return sdf.parse(displayTime).getTime();
+       }
+
+       /**
+        * Return a hash of the given {@link String}.
+        * 
+        * @param input
+        *            the input data
+        * 
+        * @return the hash
+        */
+       static public String getMd5Hash(String input) {
                 try {
-                       return sdf.parse(display).getTime();
-               } catch (ParseException e) {
-                       return -1;
+                       MessageDigest md = MessageDigest.getInstance("MD5");
+                       md.update(getBytes(input));
+                       byte byteData[] = md.digest();
+
+                       StringBuffer hexString = new StringBuffer();
+                       for (int i = 0; i < byteData.length; i++) {
+                               String hex = Integer.toHexString(0xff & byteData[i]);
+                               if (hex.length() == 1)
+                                       hexString.append('0');
+                               hexString.append(hex);
+                       }
+
+                       return hexString.toString();
+               } catch (NoSuchAlgorithmException e) {
+                       return input;
                 }
         }
  
         /**
-        * Convert the given {@link Image} object into a Base64 representation of
-        * the same {@link Image}. object.
-        * 
-        * @param image
-        *            the {@link Image} object to convert
+        * Remove the HTML content from the given input, and un-html-ize the rest.
          * 
-        * @return the Base64 representation
+        * @param html
+        *            the HTML-encoded content
          * 
-        * @throws IOException
-        *             in case of IO error
+        * @return the HTML-free equivalent content
          */
-       static public String fromImage(BufferedImage image) throws IOException {
-               String imageString = null;
-               ByteArrayOutputStream out = new ByteArrayOutputStream();
+       public static String unhtml(String html) {
+               StringBuilder builder = new StringBuilder();
  
-               ImageIO.write(image, "jpeg", out);
-               byte[] imageBytes = out.toByteArray();
+               int inTag = 0;
+               for (char car : html.toCharArray()) {
+                       if (car == '<') {
+                               inTag++;
+                       } else if (car == '>') {
+                               inTag--;
+                       } else if (inTag <= 0) {
+                               builder.append(car);
+                       }
+               }
  
-               imageString = new String(Base64.getEncoder().encode(imageBytes));
+               char nbsp = ' '; // non-breakable space (a special char)
+               char space = ' ';
+               return HtmlEscape.unescapeHtml(builder.toString()).replace(nbsp, space);
+       }
  
-               out.close();
+       /**
+        * Escape the given {@link String} so it can be used in XML, as content.
+        * 
+        * @param input
+        *            the input {@link String}
+        * 
+        * @return the escaped {@link String}
+        */
+       public static String xmlEscape(String input) {
+               if (input == null) {
+                       return "";
+               }
+
+               return HtmlEscape.escapeHtml(input,
+                               HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
+                               HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
+       }
+
+       /**
+        * Escape the given {@link String} so it can be used in XML, as text content
+        * inside double-quotes.
+        * 
+        * @param input
+        *            the input {@link String}
+        * 
+        * @return the escaped {@link String}
+        */
+       public static String xmlEscapeQuote(String input) {
+               if (input == null) {
+                       return "";
+               }
  
-               return imageString;
+               return HtmlEscape.escapeHtml(input,
+                               HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
+                               HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
         }
  
         /**
-        * Convert the given {@link File} image into a Base64 representation of the
-        * same {@link File}.
+        * Zip the data and then encode it into Base64.
          * 
-        * @param file
-        *            the {@link File} image to convert
+        * @param data
+        *            the data
          * 
-        * @return the Base64 representation
+        * @return the Base64 zipped version
          * 
          * @throws IOException
-        *             in case of IO error
+        *             in case of I/O error
          */
-       static public String fromStream(InputStream in) throws IOException {
-               String fileString = null;
-               ByteArrayOutputStream out = new ByteArrayOutputStream();
+       public static String zip64(String data) throws IOException {
+               try {
+                       return zip64(getBytes(data));
+               } catch (UnsupportedEncodingException e) {
+                       // All conforming JVM are required to support UTF-8
+                       e.printStackTrace();
+                       return null;
+               }
+       }
  
-               byte[] buf = new byte[8192];
+       /**
+        * Zip the data and then encode it into Base64.
+        * 
+        * @param data
+        *            the data
+        * 
+        * @return the Base64 zipped version
+        * 
+        * @throws IOException
+        *             in case of I/O error
+        */
+       public static String zip64(byte[] data) throws IOException {
+               // 1. compress
+               ByteArrayOutputStream bout = new ByteArrayOutputStream();
+               try {
+                       OutputStream out = new GZIPOutputStream(bout);
+                       try {
+                               out.write(data);
+                       } finally {
+                               out.close();
+                       }
+               } finally {
+                       data = bout.toByteArray();
+                       bout.close();
+               }
  
-               int c = 0;
-               while ((c = in.read(buf, 0, buf.length)) > 0) {
-                       out.write(buf, 0, c);
+               // 2. base64
+               InputStream in = new ByteArrayInputStream(data);
+               try {
+                       in = new Base64InputStream(in, true);
+                       return new String(IOUtils.toByteArray(in), "UTF-8");
+               } finally {
+                       in.close();
                 }
-               out.flush();
-               in.close();
+       }
  
-               fileString = new String(Base64.getEncoder().encode(out.toByteArray()));
-               out.close();
+       /**
+        * Unconvert from Base64 then unzip the content, which is assumed to be a
+        * String.
+        * 
+        * @param data
+        *            the data in Base64 format
+        * 
+        * @return the raw data
+        * 
+        * @throws IOException
+        *             in case of I/O error
+        */
+       public static String unzip64s(String data) throws IOException {
+               return new String(unzip64(data), "UTF-8");
+       }
+
+       /**
+        * Unconvert from Base64 then unzip the content.
+        * 
+        * @param data
+        *            the data in Base64 format
+        * 
+        * @return the raw data
+        * 
+        * @throws IOException
+        *             in case of I/O error
+        */
+       public static byte[] unzip64(String data) throws IOException {
+               InputStream in = new Base64InputStream(new ByteArrayInputStream(
+                               getBytes(data)), false);
+               try {
+                       in = new GZIPInputStream(in);
+                       return IOUtils.toByteArray(in);
+               } finally {
+                       in.close();
+               }
+       }
  
-               return fileString;
+       /**
+        * Convert the given data to Base64 format.
+        * 
+        * @param data
+        *            the data to convert
+        * 
+        * @return the Base64 {@link String} representation of the data
+        * 
+        * @throws IOException
+        *             in case of I/O errors
+        */
+       public static String base64(String data) throws IOException {
+               return base64(getBytes(data));
         }
  
         /**
-        * Convert the given Base64 representation of an image into an {@link Image}
-        * object.
+        * Convert the given data to Base64 format.
          * 
-        * @param b64data
-        *            the {@link Image} in Base64 format
+        * @param data
+        *            the data to convert
          * 
-        * @return the {@link Image} object
+        * @return the Base64 {@link String} representation of the data
          * 
          * @throws IOException
-        *             in case of IO error
+        *             in case of I/O errors
          */
-       static public BufferedImage toImage(String b64data) throws IOException {
-               ByteArrayInputStream in = new ByteArrayInputStream(Base64.getDecoder()
-                               .decode(b64data));
-               return toImage(in);
+       public static String base64(byte[] data) throws IOException {
+               Base64InputStream in = new Base64InputStream(new ByteArrayInputStream(
+                               data), true);
+               try {
+                       return new String(IOUtils.toByteArray(in), "UTF-8");
+               } finally {
+                       in.close();
+               }
         }
  
         /**
-        * Convert the given {@link InputStream} (which must allow calls to
-        * {@link InputStream#reset()}) into an {@link Image} object.
+        * Unconvert the given data from Base64 format back to a raw array of bytes.
          * 
-        * @param in
-        *            the 'resetable' {@link InputStream}
+        * @param data
+        *            the data to unconvert
          * 
-        * @return the {@link Image} object
+        * @return the raw data represented by the given Base64 {@link String},
          * 
          * @throws IOException
-        *             in case of IO error
+        *             in case of I/O errors
          */
-       static public BufferedImage toImage(InputStream in) throws IOException {
-               int orientation;
+       public static byte[] unbase64(String data) throws IOException {
+               Base64InputStream in = new Base64InputStream(new ByteArrayInputStream(
+                               getBytes(data)), false);
                 try {
-                       orientation = getExifTransorm(in);
-               } catch (Exception e) {
-                       // no EXIF transform, ok
-                       orientation = -1;
-               }
-
-               in.reset();
-               BufferedImage image = ImageIO.read(in);
-
-               if (image == null) {
-                       throw new IOException("Failed to convert input to image");
-               }
-
-               // Note: this code has been found on internet;
-               // thank you anonymous coder.
-               int width = image.getWidth();
-               int height = image.getHeight();
-               AffineTransform affineTransform = new AffineTransform();
-
-               switch (orientation) {
-               case 1:
-                       break;
-               case 2: // Flip X
-                       affineTransform.scale(-1.0, 1.0);
-                       affineTransform.translate(-width, 0);
-                       break;
-               case 3: // PI rotation
-                       affineTransform.translate(width, height);
-                       affineTransform.rotate(Math.PI);
-                       break;
-               case 4: // Flip Y
-                       affineTransform.scale(1.0, -1.0);
-                       affineTransform.translate(0, -height);
-                       break;
-               case 5: // - PI/2 and Flip X
-                       affineTransform.rotate(-Math.PI / 2);
-                       affineTransform.scale(-1.0, 1.0);
-                       break;
-               case 6: // -PI/2 and -width
-                       affineTransform.translate(height, 0);
-                       affineTransform.rotate(Math.PI / 2);
-                       break;
-               case 7: // PI/2 and Flip
-                       affineTransform.scale(-1.0, 1.0);
-                       affineTransform.translate(-height, 0);
-                       affineTransform.translate(0, width);
-                       affineTransform.rotate(3 * Math.PI / 2);
-                       break;
-               case 8: // PI / 2
-                       affineTransform.translate(0, width);
-                       affineTransform.rotate(3 * Math.PI / 2);
-                       break;
-               default:
-                       affineTransform = null;
-                       break;
+                       return IOUtils.toByteArray(in);
+               } finally {
+                       in.close();
+               }
+       }
+
+       /**
+        * Unonvert the given data from Base64 format back to a {@link String}.
+        * 
+        * @param data
+        *            the data to unconvert
+        * 
+        * @return the {@link String} represented by the given Base64 {@link String}
+        * 
+        * @throws IOException
+        *             in case of I/O errors
+        */
+       public static String unbase64s(String data) throws IOException {
+               return new String(unbase64(data), "UTF-8");
+       }
+
+       /**
+        * Return a display {@link String} for the given value, which can be
+        * suffixed with "k" or "M" depending upon the number, if it is big enough.
+        * <p>
+        * <p>
+        * Examples:
+        * <ul>
+        * <li><tt>8 765</tt> becomes "8k"</li>
+        * <li><tt>998 765</tt> becomes "998k"</li>
+        * <li><tt>12 987 364</tt> becomes "12M"</li>
+        * <li><tt>5 534 333 221</tt> becomes "5G"</li>
+        * </ul>
+        * 
+        * @param value
+        *            the value to convert
+        * 
+        * @return the display value
+        */
+       public static String formatNumber(long value) {
+               return formatNumber(value, 0);
+       }
+
+       /**
+        * Return a display {@link String} for the given value, which can be
+        * suffixed with "k" or "M" depending upon the number, if it is big enough.
+        * <p>
+        * Examples (assuming decimalPositions = 1):
+        * <ul>
+        * <li><tt>8 765</tt> becomes "8.7k"</li>
+        * <li><tt>998 765</tt> becomes "998.7k"</li>
+        * <li><tt>12 987 364</tt> becomes "12.9M"</li>
+        * <li><tt>5 534 333 221</tt> becomes "5.5G"</li>
+        * </ul>
+        * 
+        * @param value
+        *            the value to convert
+        * @param decimalPositions
+        *            the number of decimal positions to keep
+        * 
+        * @return the display value
+        */
+       public static String formatNumber(long value, int decimalPositions) {
+               long userValue = value;
+               String suffix = "";
+               long mult = 1;
+
+               if (value >= 1000000000l) {
+                       mult = 1000000000l;
+                       userValue = value / 1000000000l;
+                       suffix = " G";
+               } else if (value >= 1000000l) {
+                       mult = 1000000l;
+                       userValue = value / 1000000l;
+                       suffix = " M";
+               } else if (value >= 1000l) {
+                       mult = 1000l;
+                       userValue = value / 1000l;
+                       suffix = " k";
                 }
  
-               if (affineTransform != null) {
-                       AffineTransformOp affineTransformOp = new AffineTransformOp(
-                                       affineTransform, AffineTransformOp.TYPE_BILINEAR);
+               String deci = "";
+               if (decimalPositions > 0) {
+                       deci = Long.toString(value % mult);
+                       int size = Long.toString(mult).length() - 1;
+                       while (deci.length() < size) {
+                               deci = "0" + deci;
+                       }
  
-                       BufferedImage transformedImage = new BufferedImage(height, width,
-                                       image.getType());
-                       transformedImage = affineTransformOp
-                                       .filter(image, transformedImage);
+                       deci = deci.substring(0, Math.min(decimalPositions, deci.length()));
+                       while (deci.length() < decimalPositions) {
+                               deci += "0";
+                       }
  
-                       image = transformedImage;
+                       deci = "." + deci;
                 }
-               //
  
-               return image;
+               return Long.toString(userValue) + deci + suffix;
         }
  
         /**
-        * Return a hash of the given {@link String}.
+        * The reverse operation to {@link StringUtils#formatNumber(long)}: it will
+        * read a "display" number that can contain a "M" or "k" suffix and return
+        * the full value.
+        * <p>
+        * Of course, the conversion to and from display form is lossy (example:
+        * <tt>6870</tt> to "6.5k" to <tt>6500</tt>).
          * 
-        * @param input
-        *            the input data
+        * @param value
+        *            the value in display form with possible "M" and "k" suffixes,
+        *            can be NULL
          * 
-        * @return the hash
+        * @return the value as a number, or 0 if not possible to convert
          */
-       static public String getHash(String input) {
-               try {
-                       MessageDigest md = MessageDigest.getInstance("MD5");
-                       md.update(input.getBytes());
-                       byte byteData[] = md.digest();
+       public static long toNumber(String value) {
+               return toNumber(value, 0l);
+       }
  
-                       StringBuffer hexString = new StringBuffer();
-                       for (int i = 0; i < byteData.length; i++) {
-                               String hex = Integer.toHexString(0xff & byteData[i]);
-                               if (hex.length() == 1)
-                                       hexString.append('0');
-                               hexString.append(hex);
+       /**
+        * The reverse operation to {@link StringUtils#formatNumber(long)}: it will
+        * read a "display" number that can contain a "M" or "k" suffix and return
+        * the full value.
+        * <p>
+        * Of course, the conversion to and from display form is lossy (example:
+        * <tt>6870</tt> to "6.5k" to <tt>6500</tt>).
+        * 
+        * @param value
+        *            the value in display form with possible "M" and "k" suffixes,
+        *            can be NULL
+        * @param def
+        *            the default value if it is not possible to convert the given
+        *            value to a number
+        * 
+        * @return the value as a number, or 0 if not possible to convert
+        */
+       public static long toNumber(String value, long def) {
+               long count = def;
+               if (value != null) {
+                       value = value.trim().toLowerCase();
+                       try {
+                               long mult = 1;
+                               if (value.endsWith("g")) {
+                                       value = value.substring(0, value.length() - 1).trim();
+                                       mult = 1000000000;
+                               } else if (value.endsWith("m")) {
+                                       value = value.substring(0, value.length() - 1).trim();
+                                       mult = 1000000;
+                               } else if (value.endsWith("k")) {
+                                       value = value.substring(0, value.length() - 1).trim();
+                                       mult = 1000;
+                               }
+
+                               long deci = 0;
+                               if (value.contains(".")) {
+                                       String[] tab = value.split("\\.");
+                                       if (tab.length != 2) {
+                                               throw new NumberFormatException(value);
+                                       }
+                                       double decimal = Double.parseDouble("0."
+                                                       + tab[tab.length - 1]);
+                                       deci = ((long) (mult * decimal));
+                                       value = tab[0];
+                               }
+                               count = mult * Long.parseLong(value) + deci;
+                       } catch (Exception e) {
                         }
+               }
  
-                       return hexString.toString();
-               } catch (NoSuchAlgorithmException e) {
-                       return input;
+               return count;
+       }
+
+       /**
+        * Return the bytes array representation of the given {@link String} in
+        * UTF-8.
+        * 
+        * @param str
+        *            the {@link String} to transform into bytes
+        * @return the content in bytes
+        */
+       static public byte[] getBytes(String str) {
+               try {
+                       return str.getBytes("UTF-8");
+               } catch (UnsupportedEncodingException e) {
+                       // All conforming JVM must support UTF-8
+                       e.printStackTrace();
+                       return null;
                 }
         }
  
         /**
-        * Return the EXIF transformation flag of this image if any.
+        * The "remove accents" pattern.
          * 
+        * @return the pattern, or NULL if a problem happens
+        */
+       private static Pattern getMarks() {
+               try {
+                       return Pattern
+                                       .compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
+               } catch (Exception e) {
+                       // Can fail on Android...
+                       return null;
+               }
+       }
+
+       //
+       // justify List<String> related:
+       //
+
+       /**
+        * Check if this line ends as a complete line (ends with a "." or similar).
          * <p>
-        * Note: this code has been found on internet; thank you anonymous coder.
-        * </p>
-        * 
-        * @param in
-        *            the data {@link InputStream}
+        * Note that we consider an empty line as full, and a line ending with
+        * spaces as not complete.
          * 
-        * @return the transformation flag if any
+        * @param line
+        *            the line to check
          * 
-        * @throws IOException
-        *             in case of IO error
-        */
-       static private int getExifTransorm(InputStream in) throws IOException {
-               int[] exif_data = new int[100];
-               int set_flag = 0;
-               int is_motorola = 0;
-
-               /* Read File head, check for JPEG SOI + Exif APP1 */
-               for (int i = 0; i < 4; i++)
-                       exif_data[i] = in.read();
-
-               if (exif_data[0] != 0xFF || exif_data[1] != 0xD8
-                               || exif_data[2] != 0xFF || exif_data[3] != 0xE1)
-                       return -2;
-
-               /* Get the marker parameter length count */
-               int length = (in.read() << 8 | in.read());
-
-               /* Length includes itself, so must be at least 2 */
-               /* Following Exif data length must be at least 6 */
-               if (length < 8)
-                       return -1;
-               length -= 8;
-               /* Read Exif head, check for "Exif" */
-               for (int i = 0; i < 6; i++)
-                       exif_data[i] = in.read();
-
-               if (exif_data[0] != 0x45 || exif_data[1] != 0x78
-                               || exif_data[2] != 0x69 || exif_data[3] != 0x66
-                               || exif_data[4] != 0 || exif_data[5] != 0)
-                       return -1;
-
-               /* Read Exif body */
-               length = length > exif_data.length ? exif_data.length : length;
-               for (int i = 0; i < length; i++)
-                       exif_data[i] = in.read();
-
-               if (length < 12)
-                       return -1; /* Length of an IFD entry */
-
-               /* Discover byte order */
-               if (exif_data[0] == 0x49 && exif_data[1] == 0x49)
-                       is_motorola = 0;
-               else if (exif_data[0] == 0x4D && exif_data[1] == 0x4D)
-                       is_motorola = 1;
-               else
-                       return -1;
-
-               /* Check Tag Mark */
-               if (is_motorola == 1) {
-                       if (exif_data[2] != 0)
-                               return -1;
-                       if (exif_data[3] != 0x2A)
-                               return -1;
-               } else {
-                       if (exif_data[3] != 0)
-                               return -1;
-                       if (exif_data[2] != 0x2A)
-                               return -1;
-               }
-
-               /* Get first IFD offset (offset to IFD0) */
-               int offset;
-               if (is_motorola == 1) {
-                       if (exif_data[4] != 0)
-                               return -1;
-                       if (exif_data[5] != 0)
-                               return -1;
-                       offset = exif_data[6];
-                       offset <<= 8;
-                       offset += exif_data[7];
-               } else {
-                       if (exif_data[7] != 0)
-                               return -1;
-                       if (exif_data[6] != 0)
-                               return -1;
-                       offset = exif_data[5];
-                       offset <<= 8;
-                       offset += exif_data[4];
-               }
-               if (offset > length - 2)
-                       return -1; /* check end of data segment */
-
-               /* Get the number of directory entries contained in this IFD */
-               int number_of_tags;
-               if (is_motorola == 1) {
-                       number_of_tags = exif_data[offset];
-                       number_of_tags <<= 8;
-                       number_of_tags += exif_data[offset + 1];
-               } else {
-                       number_of_tags = exif_data[offset + 1];
-                       number_of_tags <<= 8;
-                       number_of_tags += exif_data[offset];
-               }
-               if (number_of_tags == 0)
-                       return -1;
-               offset += 2;
-
-               /* Search for Orientation Tag in IFD0 */
-               for (;;) {
-                       if (offset > length - 12)
-                               return -1; /* check end of data segment */
-                       /* Get Tag number */
-                       int tagnum;
-                       if (is_motorola == 1) {
-                               tagnum = exif_data[offset];
-                               tagnum <<= 8;
-                               tagnum += exif_data[offset + 1];
-                       } else {
-                               tagnum = exif_data[offset + 1];
-                               tagnum <<= 8;
-                               tagnum += exif_data[offset];
-                       }
-                       if (tagnum == 0x0112)
-                               break; /* found Orientation Tag */
-                       if (--number_of_tags == 0)
-                               return -1;
-                       offset += 12;
+        * @return TRUE if it does
+        */
+       static private boolean isFullLine(StringBuilder line) {
+               if (line.length() == 0) {
+                       return true;
                 }
  
-               /* Get the Orientation value */
-               if (is_motorola == 1) {
-                       if (exif_data[offset + 8] != 0)
-                               return -1;
-                       set_flag = exif_data[offset + 9];
-               } else {
-                       if (exif_data[offset + 9] != 0)
-                               return -1;
-                       set_flag = exif_data[offset + 8];
+               char lastCar = line.charAt(line.length() - 1);
+               switch (lastCar) {
+               case '.': // points
+               case '?':
+               case '!':
+
+               case '\'': // quotes
+               case '‘':
+               case '’':
+
+               case '"': // double quotes
+               case '”':
+               case '“':
+               case '»':
+               case '«':
+                       return true;
+               default:
+                       return false;
                 }
-               if (set_flag > 8)
-                       return -1;
+       }
  
-               return set_flag;
+       /**
+        * Check if this line represent an item in a list or description (i.e.,
+        * check that the first non-space char is "-").
+        * 
+        * @param line
+        *            the line to check
+        * 
+        * @return TRUE if it is
+        */
+       static private boolean isItemLine(String line) {
+               String spacing = getItemSpacing(line);
+               return spacing != null && !spacing.isEmpty()
+                               && line.charAt(spacing.length()) == '-';
         }
  
         /**
-        * Remove the HTML content from the given input, and un-html-ize the rest.
+        * Return all the spaces that start this line (or Empty if none).
          * 
-        * @param html
-        *            the HTML-encoded content
+        * @param line
+        *            the line to get the starting spaces from
          * 
-        * @return the HTML-free equivalent content
+        * @return the left spacing
          */
-       public static String unhtml(String html) {
-               StringBuilder builder = new StringBuilder();
+       static private String getItemSpacing(String line) {
+               int i;
+               for (i = 0; i < line.length(); i++) {
+                       if (line.charAt(i) != ' ') {
+                               return line.substring(0, i);
+                       }
+               }
  
-               int inTag = 0;
-               for (char car : html.toCharArray()) {
-                       if (car == '<') {
-                               inTag++;
-                       } else if (car == '>') {
-                               inTag--;
-                       } else if (inTag <= 0) {
-                               builder.append(car);
+               return "";
+       }
+
+       /**
+        * This line is an horizontal spacer line.
+        * 
+        * @param line
+        *            the line to test
+        * 
+        * @return TRUE if it is
+        */
+       static private boolean isHrLine(CharSequence line) {
+               int count = 0;
+               if (line != null) {
+                       for (int i = 0; i < line.length(); i++) {
+                               char car = line.charAt(i);
+                               if (car == ' ' || car == '\t' || car == '*' || car == '-'
+                                               || car == '_' || car == '~' || car == '=' || car == '/'
+                                               || car == '\\') {
+                                       count++;
+                               } else {
+                                       return false;
+                               }
                         }
                 }
  
-               return HtmlEscape.unescapeHtml(builder.toString());
+               return count > 2;
+       }
+
+       // Deprecated functions, please do not use //
+
+       /**
+        * @deprecated please use {@link StringUtils#zip64(byte[])} or
+        *             {@link StringUtils#base64(byte[])} instead.
+        * 
+        * @param data
+        *            the data to encode
+        * @param zip
+        *            TRUE to zip it before Base64 encoding it, FALSE for Base64
+        *            encoding only
+        * 
+        * @return the encoded data
+        * 
+        * @throws IOException
+        *             in case of I/O error
+        */
+       @Deprecated
+       public static String base64(String data, boolean zip) throws IOException {
+               return base64(getBytes(data), zip);
         }
  
         /**
-        * Escape the given {@link String} so it can be used in XML, as content.
+        * @deprecated please use {@link StringUtils#zip64(String)} or
+        *             {@link StringUtils#base64(String)} instead.
          * 
-        * @param input
-        *            the input {@link String}
+        * @param data
+        *            the data to encode
+        * @param zip
+        *            TRUE to zip it before Base64 encoding it, FALSE for Base64
+        *            encoding only
          * 
-        * @return the escaped {@link String}
+        * @return the encoded data
+        * 
+        * @throws IOException
+        *             in case of I/O error
          */
-       public static String xmlEscape(String input) {
-               if (input == null) {
-                       return "";
+       @Deprecated
+       public static String base64(byte[] data, boolean zip) throws IOException {
+               if (zip) {
+                       return zip64(data);
                 }
  
-               return HtmlEscape.escapeHtml(input,
-                               HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
-                               HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
+               Base64InputStream b64 = new Base64InputStream(new ByteArrayInputStream(
+                               data), true);
+               try {
+                       return IOUtils.readSmallStream(b64);
+               } finally {
+                       b64.close();
+               }
         }
  
         /**
-        * Escape the given {@link String} so it can be used in XML, as text content
-        * inside double-quotes.
+        * @deprecated please use {@link Base64OutputStream} and
+        *             {@link GZIPOutputStream} instead.
          * 
-        * @param input
-        *            the input {@link String}
+        * @param breakLines
+        *            NOT USED ANYMORE, it is always considered FALSE now
+        */
+       @Deprecated
+       public static OutputStream base64(OutputStream data, boolean zip,
+                       boolean breakLines) throws IOException {
+               OutputStream out = new Base64OutputStream(data);
+               if (zip) {
+                       out = new java.util.zip.GZIPOutputStream(out);
+               }
+
+               return out;
+       }
+
+       /**
+        * Unconvert the given data from Base64 format back to a raw array of bytes.
+        * <p>
+        * Will automatically detect zipped data and also uncompress it before
+        * returning, unless ZIP is false.
          * 
-        * @return the escaped {@link String}
+        * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped)
+        * 
+        * @param data
+        *            the data to unconvert
+        * @param zip
+        *            TRUE to also uncompress the data from a GZIP format
+        *            automatically; if set to FALSE, zipped data can be returned
+        * 
+        * @return the raw data represented by the given Base64 {@link String},
+        *         optionally compressed with GZIP
+        * 
+        * @throws IOException
+        *             in case of I/O errors
          */
-       public static String xmlEscapeQuote(String input) {
-               if (input == null) {
-                       return "";
+       @Deprecated
+       public static byte[] unbase64(String data, boolean zip) throws IOException {
+               byte[] buffer = unbase64(data);
+               if (!zip) {
+                       return buffer;
                 }
  
-               return HtmlEscape.escapeHtml(input,
-                               HtmlEscapeType.HTML4_NAMED_REFERENCES_DEFAULT_TO_HEXA,
-                               HtmlEscapeLevel.LEVEL_1_ONLY_MARKUP_SIGNIFICANT);
+               try {
+                       GZIPInputStream zipped = new GZIPInputStream(
+                                       new ByteArrayInputStream(buffer));
+                       try {
+                               ByteArrayOutputStream out = new ByteArrayOutputStream();
+                               try {
+                                       IOUtils.write(zipped, out);
+                                       return out.toByteArray();
+                               } finally {
+                                       out.close();
+                               }
+                       } finally {
+                               zipped.close();
+                       }
+               } catch (Exception e) {
+                       return buffer;
+               }
+       }
+
+       /**
+        * Unconvert the given data from Base64 format back to a raw array of bytes.
+        * <p>
+        * Will automatically detect zipped data and also uncompress it before
+        * returning, unless ZIP is false.
+        * 
+        * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped)
+        * 
+        * @param data
+        *            the data to unconvert
+        * @param zip
+        *            TRUE to also uncompress the data from a GZIP format
+        *            automatically; if set to FALSE, zipped data can be returned
+        * 
+        * @return the raw data represented by the given Base64 {@link String},
+        *         optionally compressed with GZIP
+        * 
+        * @throws IOException
+        *             in case of I/O errors
+        */
+       @Deprecated
+       public static InputStream unbase64(InputStream data, boolean zip)
+                       throws IOException {
+               return new ByteArrayInputStream(unbase64(IOUtils.readSmallStream(data),
+                               zip));
+       }
+
+       /**
+        * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped)
+        */
+       @Deprecated
+       public static byte[] unbase64(byte[] data, int offset, int count,
+                       boolean zip) throws IOException {
+               byte[] dataPart = Arrays.copyOfRange(data, offset, offset + count);
+               return unbase64(new String(dataPart, "UTF-8"), zip);
+       }
+
+       /**
+        * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped)
+        */
+       @Deprecated
+       public static String unbase64s(String data, boolean zip) throws IOException {
+               return new String(unbase64(data, zip), "UTF-8");
+       }
+
+       /**
+        * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped)
+        */
+       @Deprecated
+       public static String unbase64s(byte[] data, int offset, int count,
+                       boolean zip) throws IOException {
+               return new String(unbase64(data, offset, count, zip), "UTF-8");
         }
  }