fix limit in replace for BufferedInputStream
authorNiki Roo <niki@nikiroo.be>
Tue, 19 May 2020 21:15:24 +0000 (23:15 +0200)
committerNiki Roo <niki@nikiroo.be>
Tue, 19 May 2020 21:15:24 +0000 (23:15 +0200)
streams/BufferedInputStream.java
streams/ReplaceInputStream.java
test_code/NextableInputStreamTest.java

index 683fa55865aff5ee4b6d442638721372c99e0e02..babd2ce88ec929883713af1895a556d6c4178ad9 100644 (file)
@@ -2,7 +2,10 @@ package be.nikiroo.utils.streams;
 
 import java.io.IOException;
 import java.io.InputStream;
-import java.util.Arrays;
+import java.util.AbstractMap;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map.Entry;
 
 import be.nikiroo.utils.StringUtils;
 
@@ -18,10 +21,11 @@ import be.nikiroo.utils.StringUtils;
 public class BufferedInputStream extends InputStream {
        /**
         * The size of the internal buffer (can be different if you pass your own
-        * buffer, of course).
+        * buffer, of course, and can also expand to search for longer "startsWith"
+        * data).
         * <p>
-        * A second buffer of twice the size can sometimes be created as needed for
-        * the {@link BufferedInputStream#startsWith(byte[])} search operation.
+        * Note that special "push-back" buffers can also be created during the life
+        * of this stream.
         */
        static private final int BUFFER_SIZE = 4096;
 
@@ -38,16 +42,13 @@ public class BufferedInputStream extends InputStream {
        private InputStream in;
        private int openCounter;
 
-       // special use, prefetched next buffer
-       private byte[] buffer2;
-       private int pos2;
-       private int len2;
-       private byte[] originalBuffer;
+       /** array + offset of pushed-back buffers */
+       private List<Entry<byte[], Integer>> backBuffers;
 
        private long bytesRead;
 
        /**
-        * Create a new {@link BufferedInputStream} that wraps the given
+        * Create a new {@link BufferedInputStream2} that wraps the given
         * {@link InputStream}.
         * 
         * @param in
@@ -57,14 +58,14 @@ public class BufferedInputStream extends InputStream {
                this.in = in;
 
                this.buffer = new byte[BUFFER_SIZE];
-               this.originalBuffer = this.buffer;
                this.start = 0;
                this.stop = 0;
+               this.backBuffers = new ArrayList<Entry<byte[], Integer>>();
        }
 
        /**
-        * Create a new {@link BufferedInputStream} that wraps the given bytes array
-        * as a data source.
+        * Create a new {@link BufferedInputStream2} that wraps the given bytes
+        * array as a data source.
         * 
         * @param in
         *            the array to wrap, cannot be NULL
@@ -74,8 +75,8 @@ public class BufferedInputStream extends InputStream {
        }
 
        /**
-        * Create a new {@link BufferedInputStream} that wraps the given bytes array
-        * as a data source.
+        * Create a new {@link BufferedInputStream2} that wraps the given bytes
+        * array as a data source.
         * 
         * @param in
         *            the array to wrap, cannot be NULL
@@ -100,24 +101,15 @@ public class BufferedInputStream extends InputStream {
                this.in = null;
 
                this.buffer = in;
-               this.originalBuffer = this.buffer;
                this.start = offset;
                this.stop = length;
+               this.backBuffers = new ArrayList<Entry<byte[], Integer>>();
        }
 
        /**
-        * The internal buffer size (can be useful to know for search methods).
-        * 
-        * @return the size of the internal buffer, in bytes.
-        */
-       public int getInternalBufferSize() {
-               return originalBuffer.length;
-       }
-
-       /**
-        * Return this very same {@link BufferedInputStream}, but keep a counter of
+        * Return this very same {@link BufferedInputStream2}, but keep a counter of
         * how many streams were open this way. When calling
-        * {@link BufferedInputStream#close()}, decrease this counter if it is not
+        * {@link BufferedInputStream2#close()}, decrease this counter if it is not
         * already zero instead of actually closing the stream.
         * <p>
         * You are now responsible for it &mdash; you <b>must</b> close it.
@@ -174,7 +166,7 @@ public class BufferedInputStream extends InputStream {
         */
        public boolean is(byte[] search) throws IOException {
                if (startsWith(search)) {
-                       return (stop - start) == search.length;
+                       return available() == search.length;
                }
 
                return false;
@@ -220,40 +212,27 @@ public class BufferedInputStream extends InputStream {
         *             greater than the internal buffer
         */
        public boolean startsWith(byte[] search) throws IOException {
-               if (search.length > originalBuffer.length) {
-                       throw new IOException(
-                                       "This stream does not support searching for more than "
-                                                       + buffer.length + " bytes");
-               }
-
                checkClose();
 
-               if (available() < search.length) {
+               while (consolidatePushBack(search.length) < search.length) {
                        preRead();
-               }
-
-               if (available() >= search.length) {
-                       // Easy path
-                       return StreamUtils.startsWith(search, buffer, start, stop);
-               } else if (in != null && !eof) {
-                       // Harder path
-                       if (buffer2 == null && buffer.length == originalBuffer.length) {
-                               buffer2 = Arrays.copyOf(buffer, buffer.length * 2);
-
-                               pos2 = buffer.length;
-                               len2 = read(in, buffer2, pos2, buffer.length);
-                               if (len2 > 0) {
-                                       bytesRead += len2;
-                               }
-
-                               // Note: here, len/len2 = INDEX of last good byte
-                               len2 += pos2;
+                       if (start >= stop) {
+                               // Not enough data left to start with that
+                               return false;
                        }
 
-                       return StreamUtils.startsWith(search, buffer2, pos2, len2);
+                       byte[] newBuffer = new byte[stop - start];
+                       System.arraycopy(buffer, start, newBuffer, 0, stop - start);
+                       pushback(newBuffer, 0);
+                       start = stop;
                }
 
-               return false;
+               Entry<byte[], Integer> bb = backBuffers.get(backBuffers.size() - 1);
+               byte[] bbBuffer = bb.getKey();
+               int bbOffset = bb.getValue();
+
+               return StreamUtils.startsWith(search, bbBuffer, bbOffset,
+                               bbBuffer.length);
        }
 
        /**
@@ -266,8 +245,7 @@ public class BufferedInputStream extends InputStream {
        }
 
        /**
-        * Check if this stream is spent (no more data to read or to
-        * process).
+        * Check if this stream is spent (no more data to read or to process).
         * 
         * @return TRUE if it is
         * 
@@ -330,6 +308,28 @@ public class BufferedInputStream extends InputStream {
                        return 0;
                }
 
+               // Read from the pushed-back buffers if any
+               if (!backBuffers.isEmpty()) {
+                       int read = 0;
+
+                       Entry<byte[], Integer> bb = backBuffers
+                                       .remove(backBuffers.size() - 1);
+                       byte[] bbBuffer = bb.getKey();
+                       int bbOffset = bb.getValue();
+                       int bbSize = bbBuffer.length - bbOffset;
+
+                       if (bbSize > blen) {
+                               read = blen;
+                               System.arraycopy(bbBuffer, bbOffset, b, boff, read);
+                               pushback(bbBuffer, bbOffset + read);
+                       } else {
+                               read = bbSize;
+                               System.arraycopy(bbBuffer, bbOffset, b, boff, read);
+                       }
+
+                       return read;
+               }
+
                int done = 0;
                while (hasMoreData() && done < blen) {
                        preRead();
@@ -353,6 +353,23 @@ public class BufferedInputStream extends InputStream {
                }
 
                long skipped = 0;
+               while (!backBuffers.isEmpty() && n > 0) {
+                       Entry<byte[], Integer> bb = backBuffers
+                                       .remove(backBuffers.size() - 1);
+                       byte[] bbBuffer = bb.getKey();
+                       int bbOffset = bb.getValue();
+                       int bbSize = bbBuffer.length - bbOffset;
+
+                       int localSkip = 0;
+                       localSkip = (int) Math.min(n, bbSize);
+
+                       n -= localSkip;
+                       bbSize -= localSkip;
+
+                       if (bbSize > 0) {
+                               pushback(bbBuffer, bbOffset + localSkip);
+                       }
+               }
                while (hasMoreData() && n > 0) {
                        preRead();
 
@@ -371,7 +388,12 @@ public class BufferedInputStream extends InputStream {
                        return 0;
                }
 
-               return Math.max(0, stop - start);
+               int avail = 0;
+               for (Entry<byte[], Integer> entry : backBuffers) {
+                       avail += entry.getKey().length - entry.getValue();
+               }
+
+               return avail + Math.max(0, stop - start);
        }
 
        /**
@@ -380,11 +402,11 @@ public class BufferedInputStream extends InputStream {
         * <p>
         * Including the under-laying {@link InputStream}.
         * <p>
-        * <b>Note:</b> if you called the {@link BufferedInputStream#open()} method
+        * <b>Note:</b> if you called the {@link BufferedInputStream2#open()} method
         * prior to this one, it will just decrease the internal count of how many
         * open streams it held and do nothing else. The stream will actually be
-        * closed when you have called {@link BufferedInputStream#close()} once more
-        * than {@link BufferedInputStream#open()}.
+        * closed when you have called {@link BufferedInputStream2#close()} once
+        * more than {@link BufferedInputStream2#open()}.
         * 
         * @exception IOException
         *                in case of I/O error
@@ -404,11 +426,11 @@ public class BufferedInputStream extends InputStream {
         * You can call this method multiple times, it will not cause an
         * {@link IOException} for subsequent calls.
         * <p>
-        * <b>Note:</b> if you called the {@link BufferedInputStream#open()} method
+        * <b>Note:</b> if you called the {@link BufferedInputStream2#open()} method
         * prior to this one, it will just decrease the internal count of how many
         * open streams it held and do nothing else. The stream will actually be
-        * closed when you have called {@link BufferedInputStream#close()} once more
-        * than {@link BufferedInputStream#open()}.
+        * closed when you have called {@link BufferedInputStream2#close()} once
+        * more than {@link BufferedInputStream2#open()}.
         * 
         * @param includingSubStream
         *            also close the under-laying stream
@@ -430,6 +452,52 @@ public class BufferedInputStream extends InputStream {
                }
        }
 
+       /**
+        * Consolidate the push-back buffers so the last one is at least the given
+        * size, if possible.
+        * <p>
+        * If there is not enough data in the push-back buffers, they will all be
+        * consolidated.
+        * 
+        * @param size
+        *            the minimum size of the consolidated buffer, or -1 to force
+        *            the consolidation of all push-back buffers
+        * 
+        * @return the size of the last, consolidated buffer; can be less than the
+        *         requested size if not enough data
+        */
+       protected int consolidatePushBack(int size) {
+               int bbIndex = -1;
+               int bbUpToSize = 0;
+               for (Entry<byte[], Integer> entry : backBuffers) {
+                       bbIndex++;
+                       bbUpToSize += entry.getKey().length - entry.getValue();
+
+                       if (size >= 0 && bbUpToSize >= size) {
+                               break;
+                       }
+               }
+
+               // Index 0 means "the last buffer is already big enough"
+               if (bbIndex > 0) {
+                       byte[] consolidatedBuffer = new byte[bbUpToSize];
+                       int consolidatedPos = 0;
+                       for (int i = 0; i <= bbIndex; i++) {
+                               Entry<byte[], Integer> bb = backBuffers
+                                               .remove(backBuffers.size() - 1);
+                               byte[] bbBuffer = bb.getKey();
+                               int bbOffset = bb.getValue();
+                               int bbSize = bbBuffer.length - bbOffset;
+                               System.arraycopy(bbBuffer, bbOffset, consolidatedBuffer,
+                                               consolidatedPos, bbSize);
+                       }
+
+                       pushback(consolidatedBuffer, 0);
+               }
+
+               return bbUpToSize;
+       }
+
        /**
         * Check if we still have some data in the buffer and, if not, fetch some.
         * 
@@ -443,21 +511,9 @@ public class BufferedInputStream extends InputStream {
                boolean hasRead = false;
                if (in != null && !eof && start >= stop) {
                        start = 0;
-                       if (buffer2 != null) {
-                               buffer = buffer2;
-                               start = pos2;
-                               stop = len2;
-
-                               buffer2 = null;
-                               pos2 = 0;
-                               len2 = 0;
-                       } else {
-                               buffer = originalBuffer;
-
-                               stop = read(in, buffer, 0, buffer.length);
-                               if (stop > 0) {
-                                       bytesRead += stop;
-                               }
+                       stop = read(in, buffer, 0, buffer.length);
+                       if (stop > 0) {
+                               bytesRead += stop;
                        }
 
                        hasRead = true;
@@ -471,12 +527,25 @@ public class BufferedInputStream extends InputStream {
        }
 
        /**
-        * Read the under-laying stream into the local buffer.
+        * Push back some data that will be read again at the next read call.
+        * 
+        * @param buffer
+        *            the buffer to push back
+        * @param offset
+        *            the offset at which to start reading in the buffer
+        */
+       protected void pushback(byte[] buffer, int offset) {
+               backBuffers.add(
+                               new AbstractMap.SimpleEntry<byte[], Integer>(buffer, offset));
+       }
+
+       /**
+        * Read the under-laying stream into the given local buffer.
         * 
         * @param in
         *            the under-laying {@link InputStream}
         * @param buffer
-        *            the buffer we use in this {@link BufferedInputStream}
+        *            the buffer we use in this {@link BufferedInputStream2}
         * @param off
         *            the offset
         * @param len
@@ -503,7 +572,7 @@ public class BufferedInputStream extends InputStream {
                        return false;
                }
 
-               return (start < stop) || !eof;
+               return !backBuffers.isEmpty() || (start < stop) || !eof;
        }
 
        /**
index 9f733504117c314768603669b73cde0f7aa2ab40..ae576e25e7a4833e9ce8f4bea25616ff018eca52 100644 (file)
@@ -114,7 +114,8 @@ public class ReplaceInputStream extends BufferedInputStream {
                }
 
                // We need at least maxFromSize so we can iterate and replace
-               source = new byte[Math.max(2 * maxFromSize, MIN_BUFFER_SIZE)];
+               source = new byte[Math.max(2 * Math.max(maxToSize, maxFromSize),
+                               MIN_BUFFER_SIZE)];
                spos = 0;
                slen = 0;
        }
index 463a123652a14aebbc0962def6fc3f7a2e9106fd..4e5982363af1925d4b7422ba0eeb4a60ec69204a 100644 (file)
@@ -1,7 +1,6 @@
 package be.nikiroo.utils.test_code;
 
 import java.io.ByteArrayInputStream;
-import java.io.IOException;
 
 import be.nikiroo.utils.IOUtils;
 import be.nikiroo.utils.streams.NextableInputStream;
@@ -177,12 +176,10 @@ public class NextableInputStreamTest extends TestLauncher {
                                                                11 }));
 
                                // too big
-                               try {
-                                       in.startsWith(new byte[] { 42, 12, 0, 127, 12, 51, 11, 12,
-                                                       0 });
-                                       fail("Searching a prefix bigger than the array should throw an IOException");
-                               } catch (IOException e) {
-                               }
+                               assertEquals(
+                                               "A search term bigger than the whole data cannot be found in the data",
+                                               false, in.startsWith(new byte[] { 42, 12, 0, 127, 12,
+                                                               51, 11, 12, 0 }));
 
                                in.close();
                        }
@@ -209,13 +206,11 @@ public class NextableInputStreamTest extends TestLauncher {
                                                in.startsWith("Toto"));
                                assertEquals("It actually does not start with that", false,
                                                in.startsWith("Fanfan et Toto vont à la mee"));
-
+                               
                                // too big
-                               try {
-                                       in.startsWith("Fanfan et Toto vont à la mer.");
-                                       fail("Searching a prefix bigger than the array should throw an IOException");
-                               } catch (IOException e) {
-                               }
+                               assertEquals(
+                                               "A search term bigger than the whole data cannot be found in the data",
+                                               false, in.startsWith("Fanfan et Toto vont à la mer."));
 
                                in.close();
                        }