X-Git-Url: http://git.nikiroo.be/?a=blobdiff_plain;ds=sidebyside;f=src%2Fbe%2Fnikiroo%2Futils%2FStringUtils.java;h=be1c654502f58bd91554873a0b241876837a33c0;hb=0c410e69d6891b49b88b1d9e12ebe127de5b7edf;hp=2c90d29d0e1bc45c1cb4dfadfb64d4cd7988ee21;hpb=c0c091af3d5ecd11fd46f517e6a1493f1454ceb0;p=fanfix.git diff --git a/src/be/nikiroo/utils/StringUtils.java b/src/be/nikiroo/utils/StringUtils.java index 2c90d29..be1c654 100644 --- a/src/be/nikiroo/utils/StringUtils.java +++ b/src/be/nikiroo/utils/StringUtils.java @@ -1,7 +1,10 @@ package be.nikiroo.utils; import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; import java.io.UnsupportedEncodingException; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; @@ -11,16 +14,21 @@ import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.AbstractMap; import java.util.ArrayList; +import java.util.Arrays; import java.util.Date; import java.util.List; import java.util.Map.Entry; -import java.util.Scanner; import java.util.regex.Pattern; +import java.util.zip.GZIPInputStream; +import java.util.zip.GZIPOutputStream; import org.unbescape.html.HtmlEscape; import org.unbescape.html.HtmlEscapeLevel; import org.unbescape.html.HtmlEscapeType; +import be.nikiroo.utils.streams.Base64InputStream; +import be.nikiroo.utils.streams.Base64OutputStream; + /** * This class offer some utilities based around {@link String}s. * @@ -148,6 +156,30 @@ public class StringUtils { return text; } + /** + * Justify a text into width-sized (at the maximum) lines and return all the + * lines concatenated into a single '\\n'-separated line of text. + * + * @param text + * the {@link String} to justify + * @param width + * the maximum size of the resulting lines + * + * @return a list of justified text lines concatenated into a single + * '\\n'-separated line of text + */ + static public String justifyTexts(String text, int width) { + StringBuilder builder = new StringBuilder(); + for (String line : justifyText(text, width, null)) { + if (builder.length() > 0) { + builder.append('\n'); + } + builder.append(line); + } + + return builder.toString(); + } + /** * Justify a text into width-sized (at the maximum) lines. * @@ -422,11 +454,14 @@ public class StringUtils { * the input data * * @return the hash + * + * @deprecated please use {@link HashUtils} */ + @Deprecated static public String getMd5Hash(String input) { try { MessageDigest md = MessageDigest.getInstance("MD5"); - md.update(input.getBytes("UTF-8")); + md.update(getBytes(input)); byte byteData[] = md.digest(); StringBuffer hexString = new StringBuffer(); @@ -440,8 +475,6 @@ public class StringUtils { return hexString.toString(); } catch (NoSuchAlgorithmException e) { return input; - } catch (UnsupportedEncodingException e) { - return input; } } @@ -516,16 +549,72 @@ public class StringUtils { * the data * * @return the Base64 zipped version + * + * @throws IOException + * in case of I/O error */ - public static String zip64(String data) { + public static String zip64(String data) throws IOException { try { - return Base64.encodeBytes(data.getBytes(), Base64.GZIP); - } catch (IOException e) { + return zip64(getBytes(data)); + } catch (UnsupportedEncodingException e) { + // All conforming JVM are required to support UTF-8 e.printStackTrace(); return null; } } + /** + * Zip the data and then encode it into Base64. + * + * @param data + * the data + * + * @return the Base64 zipped version + * + * @throws IOException + * in case of I/O error + */ + public static String zip64(byte[] data) throws IOException { + // 1. compress + ByteArrayOutputStream bout = new ByteArrayOutputStream(); + try { + OutputStream out = new GZIPOutputStream(bout); + try { + out.write(data); + } finally { + out.close(); + } + } finally { + data = bout.toByteArray(); + bout.close(); + } + + // 2. base64 + InputStream in = new ByteArrayInputStream(data); + try { + in = new Base64InputStream(in, true); + return new String(IOUtils.toByteArray(in), "UTF-8"); + } finally { + in.close(); + } + } + + /** + * Unconvert from Base64 then unzip the content, which is assumed to be a + * String. + * + * @param data + * the data in Base64 format + * + * @return the raw data + * + * @throws IOException + * in case of I/O error + */ + public static String unzip64s(String data) throws IOException { + return new String(unzip64(data), "UTF-8"); + } + /** * Unconvert from Base64 then unzip the content. * @@ -537,16 +626,254 @@ public class StringUtils { * @throws IOException * in case of I/O error */ - public static String unzip64(String data) throws IOException { - ByteArrayInputStream in = new ByteArrayInputStream(Base64.decode(data, - Base64.GZIP)); + public static byte[] unzip64(String data) throws IOException { + InputStream in = new Base64InputStream(new ByteArrayInputStream( + getBytes(data)), false); + try { + in = new GZIPInputStream(in); + return IOUtils.toByteArray(in); + } finally { + in.close(); + } + } - Scanner scan = new Scanner(in); - scan.useDelimiter("\\A"); + /** + * Convert the given data to Base64 format. + * + * @param data + * the data to convert + * + * @return the Base64 {@link String} representation of the data + * + * @throws IOException + * in case of I/O errors + */ + public static String base64(String data) throws IOException { + return base64(getBytes(data)); + } + + /** + * Convert the given data to Base64 format. + * + * @param data + * the data to convert + * + * @return the Base64 {@link String} representation of the data + * + * @throws IOException + * in case of I/O errors + */ + public static String base64(byte[] data) throws IOException { + Base64InputStream in = new Base64InputStream(new ByteArrayInputStream( + data), true); try { - return scan.next(); + return new String(IOUtils.toByteArray(in), "UTF-8"); } finally { - scan.close(); + in.close(); + } + } + + /** + * Unconvert the given data from Base64 format back to a raw array of bytes. + * + * @param data + * the data to unconvert + * + * @return the raw data represented by the given Base64 {@link String}, + * + * @throws IOException + * in case of I/O errors + */ + public static byte[] unbase64(String data) throws IOException { + Base64InputStream in = new Base64InputStream(new ByteArrayInputStream( + getBytes(data)), false); + try { + return IOUtils.toByteArray(in); + } finally { + in.close(); + } + } + + /** + * Unonvert the given data from Base64 format back to a {@link String}. + * + * @param data + * the data to unconvert + * + * @return the {@link String} represented by the given Base64 {@link String} + * + * @throws IOException + * in case of I/O errors + */ + public static String unbase64s(String data) throws IOException { + return new String(unbase64(data), "UTF-8"); + } + + /** + * Return a display {@link String} for the given value, which can be + * suffixed with "k" or "M" depending upon the number, if it is big enough. + *
+ *
+ * Examples: + *
+ * Examples (assuming decimalPositions = 1): + *
+ * Of course, the conversion to and from display form is lossy (example: + * 6870 to "6.5k" to 6500). + * + * @param value + * the value in display form with possible "M" and "k" suffixes, + * can be NULL + * + * @return the value as a number, or 0 if not possible to convert + */ + public static long toNumber(String value) { + return toNumber(value, 0l); + } + + /** + * The reverse operation to {@link StringUtils#formatNumber(long)}: it will + * read a "display" number that can contain a "M" or "k" suffix and return + * the full value. + *
+ * Of course, the conversion to and from display form is lossy (example:
+ * 6870 to "6.5k" to 6500).
+ *
+ * @param value
+ * the value in display form with possible "M" and "k" suffixes,
+ * can be NULL
+ * @param def
+ * the default value if it is not possible to convert the given
+ * value to a number
+ *
+ * @return the value as a number, or 0 if not possible to convert
+ */
+ public static long toNumber(String value, long def) {
+ long count = def;
+ if (value != null) {
+ value = value.trim().toLowerCase();
+ try {
+ long mult = 1;
+ if (value.endsWith("g")) {
+ value = value.substring(0, value.length() - 1).trim();
+ mult = 1000000000;
+ } else if (value.endsWith("m")) {
+ value = value.substring(0, value.length() - 1).trim();
+ mult = 1000000;
+ } else if (value.endsWith("k")) {
+ value = value.substring(0, value.length() - 1).trim();
+ mult = 1000;
+ }
+
+ long deci = 0;
+ if (value.contains(".")) {
+ String[] tab = value.split("\\.");
+ if (tab.length != 2) {
+ throw new NumberFormatException(value);
+ }
+ double decimal = Double.parseDouble("0."
+ + tab[tab.length - 1]);
+ deci = ((long) (mult * decimal));
+ value = tab[0];
+ }
+ count = mult * Long.parseLong(value) + deci;
+ } catch (Exception e) {
+ }
+ }
+
+ return count;
+ }
+
+ /**
+ * Return the bytes array representation of the given {@link String} in
+ * UTF-8.
+ *
+ * @param str
+ * the {@link String} to transform into bytes
+ * @return the content in bytes
+ */
+ static public byte[] getBytes(String str) {
+ try {
+ return str.getBytes("UTF-8");
+ } catch (UnsupportedEncodingException e) {
+ // All conforming JVM must support UTF-8
+ e.printStackTrace();
+ return null;
}
}
@@ -565,21 +892,70 @@ public class StringUtils {
}
}
+ //
// justify List
+ * Note that we consider an empty line as full, and a line ending with
+ * spaces as not complete.
+ *
+ * @param line
+ * the line to check
+ *
+ * @return TRUE if it does
+ */
static private boolean isFullLine(StringBuilder line) {
- return line.length() == 0 //
- || line.charAt(line.length() - 1) == '.'
- || line.charAt(line.length() - 1) == '"'
- || line.charAt(line.length() - 1) == '»';
+ if (line.length() == 0) {
+ return true;
+ }
+
+ char lastCar = line.charAt(line.length() - 1);
+ switch (lastCar) {
+ case '.': // points
+ case '?':
+ case '!':
+
+ case '\'': // quotes
+ case 'â':
+ case 'â':
+
+ case '"': // double quotes
+ case 'â':
+ case 'â':
+ case '»':
+ case '«':
+ return true;
+ default:
+ return false;
+ }
}
+ /**
+ * Check if this line represent an item in a list or description (i.e.,
+ * check that the first non-space char is "-").
+ *
+ * @param line
+ * the line to check
+ *
+ * @return TRUE if it is
+ */
static private boolean isItemLine(String line) {
String spacing = getItemSpacing(line);
return spacing != null && !spacing.isEmpty()
&& line.charAt(spacing.length()) == '-';
}
+ /**
+ * Return all the spaces that start this line (or Empty if none).
+ *
+ * @param line
+ * the line to get the starting spaces from
+ *
+ * @return the left spacing
+ */
static private String getItemSpacing(String line) {
int i;
for (i = 0; i < line.length(); i++) {
@@ -591,6 +967,14 @@ public class StringUtils {
return "";
}
+ /**
+ * This line is an horizontal spacer line.
+ *
+ * @param line
+ * the line to test
+ *
+ * @return TRUE if it is
+ */
static private boolean isHrLine(CharSequence line) {
int count = 0;
if (line != null) {
@@ -608,4 +992,174 @@ public class StringUtils {
return count > 2;
}
+
+ // Deprecated functions, please do not use //
+
+ /**
+ * @deprecated please use {@link StringUtils#zip64(byte[])} or
+ * {@link StringUtils#base64(byte[])} instead.
+ *
+ * @param data
+ * the data to encode
+ * @param zip
+ * TRUE to zip it before Base64 encoding it, FALSE for Base64
+ * encoding only
+ *
+ * @return the encoded data
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ @Deprecated
+ public static String base64(String data, boolean zip) throws IOException {
+ return base64(getBytes(data), zip);
+ }
+
+ /**
+ * @deprecated please use {@link StringUtils#zip64(String)} or
+ * {@link StringUtils#base64(String)} instead.
+ *
+ * @param data
+ * the data to encode
+ * @param zip
+ * TRUE to zip it before Base64 encoding it, FALSE for Base64
+ * encoding only
+ *
+ * @return the encoded data
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ @Deprecated
+ public static String base64(byte[] data, boolean zip) throws IOException {
+ if (zip) {
+ return zip64(data);
+ }
+
+ Base64InputStream b64 = new Base64InputStream(new ByteArrayInputStream(
+ data), true);
+ try {
+ return IOUtils.readSmallStream(b64);
+ } finally {
+ b64.close();
+ }
+ }
+
+ /**
+ * @deprecated please use {@link Base64OutputStream} and
+ * {@link GZIPOutputStream} instead.
+ *
+ * @param breakLines
+ * NOT USED ANYMORE, it is always considered FALSE now
+ */
+ @Deprecated
+ public static OutputStream base64(OutputStream data, boolean zip,
+ boolean breakLines) throws IOException {
+ OutputStream out = new Base64OutputStream(data);
+ if (zip) {
+ out = new java.util.zip.GZIPOutputStream(out);
+ }
+
+ return out;
+ }
+
+ /**
+ * Unconvert the given data from Base64 format back to a raw array of bytes.
+ *
+ * Will automatically detect zipped data and also uncompress it before
+ * returning, unless ZIP is false.
+ *
+ * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped)
+ *
+ * @param data
+ * the data to unconvert
+ * @param zip
+ * TRUE to also uncompress the data from a GZIP format
+ * automatically; if set to FALSE, zipped data can be returned
+ *
+ * @return the raw data represented by the given Base64 {@link String},
+ * optionally compressed with GZIP
+ *
+ * @throws IOException
+ * in case of I/O errors
+ */
+ @Deprecated
+ public static byte[] unbase64(String data, boolean zip) throws IOException {
+ byte[] buffer = unbase64(data);
+ if (!zip) {
+ return buffer;
+ }
+
+ try {
+ GZIPInputStream zipped = new GZIPInputStream(
+ new ByteArrayInputStream(buffer));
+ try {
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ try {
+ IOUtils.write(zipped, out);
+ return out.toByteArray();
+ } finally {
+ out.close();
+ }
+ } finally {
+ zipped.close();
+ }
+ } catch (Exception e) {
+ return buffer;
+ }
+ }
+
+ /**
+ * Unconvert the given data from Base64 format back to a raw array of bytes.
+ *
+ * Will automatically detect zipped data and also uncompress it before
+ * returning, unless ZIP is false.
+ *
+ * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped)
+ *
+ * @param data
+ * the data to unconvert
+ * @param zip
+ * TRUE to also uncompress the data from a GZIP format
+ * automatically; if set to FALSE, zipped data can be returned
+ *
+ * @return the raw data represented by the given Base64 {@link String},
+ * optionally compressed with GZIP
+ *
+ * @throws IOException
+ * in case of I/O errors
+ */
+ @Deprecated
+ public static InputStream unbase64(InputStream data, boolean zip)
+ throws IOException {
+ return new ByteArrayInputStream(unbase64(IOUtils.readSmallStream(data),
+ zip));
+ }
+
+ /**
+ * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped)
+ */
+ @Deprecated
+ public static byte[] unbase64(byte[] data, int offset, int count,
+ boolean zip) throws IOException {
+ byte[] dataPart = Arrays.copyOfRange(data, offset, offset + count);
+ return unbase64(new String(dataPart, "UTF-8"), zip);
+ }
+
+ /**
+ * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped)
+ */
+ @Deprecated
+ public static String unbase64s(String data, boolean zip) throws IOException {
+ return new String(unbase64(data, zip), "UTF-8");
+ }
+
+ /**
+ * @deprecated DO NOT USE ANYMORE (bad perf, will be dropped)
+ */
+ @Deprecated
+ public static String unbase64s(byte[] data, int offset, int count,
+ boolean zip) throws IOException {
+ return new String(unbase64(data, offset, count, zip), "UTF-8");
+ }
}