From 2e6426c0e430244113c990e278dce1f500504fe2 Mon Sep 17 00:00:00 2001 From: Niki Roo Date: Tue, 19 May 2020 23:15:24 +0200 Subject: [PATCH] fix limit in replace for BufferedInputStream --- streams/BufferedInputStream.java | 233 ++++++++++++++++--------- streams/ReplaceInputStream.java | 3 +- test_code/NextableInputStreamTest.java | 21 +-- 3 files changed, 161 insertions(+), 96 deletions(-) diff --git a/streams/BufferedInputStream.java b/streams/BufferedInputStream.java index 683fa55..babd2ce 100644 --- a/streams/BufferedInputStream.java +++ b/streams/BufferedInputStream.java @@ -2,7 +2,10 @@ package be.nikiroo.utils.streams; import java.io.IOException; import java.io.InputStream; -import java.util.Arrays; +import java.util.AbstractMap; +import java.util.ArrayList; +import java.util.List; +import java.util.Map.Entry; import be.nikiroo.utils.StringUtils; @@ -18,10 +21,11 @@ import be.nikiroo.utils.StringUtils; public class BufferedInputStream extends InputStream { /** * The size of the internal buffer (can be different if you pass your own - * buffer, of course). + * buffer, of course, and can also expand to search for longer "startsWith" + * data). *

- * A second buffer of twice the size can sometimes be created as needed for - * the {@link BufferedInputStream#startsWith(byte[])} search operation. + * Note that special "push-back" buffers can also be created during the life + * of this stream. */ static private final int BUFFER_SIZE = 4096; @@ -38,16 +42,13 @@ public class BufferedInputStream extends InputStream { private InputStream in; private int openCounter; - // special use, prefetched next buffer - private byte[] buffer2; - private int pos2; - private int len2; - private byte[] originalBuffer; + /** array + offset of pushed-back buffers */ + private List> backBuffers; private long bytesRead; /** - * Create a new {@link BufferedInputStream} that wraps the given + * Create a new {@link BufferedInputStream2} that wraps the given * {@link InputStream}. * * @param in @@ -57,14 +58,14 @@ public class BufferedInputStream extends InputStream { this.in = in; this.buffer = new byte[BUFFER_SIZE]; - this.originalBuffer = this.buffer; this.start = 0; this.stop = 0; + this.backBuffers = new ArrayList>(); } /** - * Create a new {@link BufferedInputStream} that wraps the given bytes array - * as a data source. + * Create a new {@link BufferedInputStream2} that wraps the given bytes + * array as a data source. * * @param in * the array to wrap, cannot be NULL @@ -74,8 +75,8 @@ public class BufferedInputStream extends InputStream { } /** - * Create a new {@link BufferedInputStream} that wraps the given bytes array - * as a data source. + * Create a new {@link BufferedInputStream2} that wraps the given bytes + * array as a data source. * * @param in * the array to wrap, cannot be NULL @@ -100,24 +101,15 @@ public class BufferedInputStream extends InputStream { this.in = null; this.buffer = in; - this.originalBuffer = this.buffer; this.start = offset; this.stop = length; + this.backBuffers = new ArrayList>(); } /** - * The internal buffer size (can be useful to know for search methods). - * - * @return the size of the internal buffer, in bytes. - */ - public int getInternalBufferSize() { - return originalBuffer.length; - } - - /** - * Return this very same {@link BufferedInputStream}, but keep a counter of + * Return this very same {@link BufferedInputStream2}, but keep a counter of * how many streams were open this way. When calling - * {@link BufferedInputStream#close()}, decrease this counter if it is not + * {@link BufferedInputStream2#close()}, decrease this counter if it is not * already zero instead of actually closing the stream. *

* You are now responsible for it — you must close it. @@ -174,7 +166,7 @@ public class BufferedInputStream extends InputStream { */ public boolean is(byte[] search) throws IOException { if (startsWith(search)) { - return (stop - start) == search.length; + return available() == search.length; } return false; @@ -220,40 +212,27 @@ public class BufferedInputStream extends InputStream { * greater than the internal buffer */ public boolean startsWith(byte[] search) throws IOException { - if (search.length > originalBuffer.length) { - throw new IOException( - "This stream does not support searching for more than " - + buffer.length + " bytes"); - } - checkClose(); - if (available() < search.length) { + while (consolidatePushBack(search.length) < search.length) { preRead(); - } - - if (available() >= search.length) { - // Easy path - return StreamUtils.startsWith(search, buffer, start, stop); - } else if (in != null && !eof) { - // Harder path - if (buffer2 == null && buffer.length == originalBuffer.length) { - buffer2 = Arrays.copyOf(buffer, buffer.length * 2); - - pos2 = buffer.length; - len2 = read(in, buffer2, pos2, buffer.length); - if (len2 > 0) { - bytesRead += len2; - } - - // Note: here, len/len2 = INDEX of last good byte - len2 += pos2; + if (start >= stop) { + // Not enough data left to start with that + return false; } - return StreamUtils.startsWith(search, buffer2, pos2, len2); + byte[] newBuffer = new byte[stop - start]; + System.arraycopy(buffer, start, newBuffer, 0, stop - start); + pushback(newBuffer, 0); + start = stop; } - return false; + Entry bb = backBuffers.get(backBuffers.size() - 1); + byte[] bbBuffer = bb.getKey(); + int bbOffset = bb.getValue(); + + return StreamUtils.startsWith(search, bbBuffer, bbOffset, + bbBuffer.length); } /** @@ -266,8 +245,7 @@ public class BufferedInputStream extends InputStream { } /** - * Check if this stream is spent (no more data to read or to - * process). + * Check if this stream is spent (no more data to read or to process). * * @return TRUE if it is * @@ -330,6 +308,28 @@ public class BufferedInputStream extends InputStream { return 0; } + // Read from the pushed-back buffers if any + if (!backBuffers.isEmpty()) { + int read = 0; + + Entry bb = backBuffers + .remove(backBuffers.size() - 1); + byte[] bbBuffer = bb.getKey(); + int bbOffset = bb.getValue(); + int bbSize = bbBuffer.length - bbOffset; + + if (bbSize > blen) { + read = blen; + System.arraycopy(bbBuffer, bbOffset, b, boff, read); + pushback(bbBuffer, bbOffset + read); + } else { + read = bbSize; + System.arraycopy(bbBuffer, bbOffset, b, boff, read); + } + + return read; + } + int done = 0; while (hasMoreData() && done < blen) { preRead(); @@ -353,6 +353,23 @@ public class BufferedInputStream extends InputStream { } long skipped = 0; + while (!backBuffers.isEmpty() && n > 0) { + Entry bb = backBuffers + .remove(backBuffers.size() - 1); + byte[] bbBuffer = bb.getKey(); + int bbOffset = bb.getValue(); + int bbSize = bbBuffer.length - bbOffset; + + int localSkip = 0; + localSkip = (int) Math.min(n, bbSize); + + n -= localSkip; + bbSize -= localSkip; + + if (bbSize > 0) { + pushback(bbBuffer, bbOffset + localSkip); + } + } while (hasMoreData() && n > 0) { preRead(); @@ -371,7 +388,12 @@ public class BufferedInputStream extends InputStream { return 0; } - return Math.max(0, stop - start); + int avail = 0; + for (Entry entry : backBuffers) { + avail += entry.getKey().length - entry.getValue(); + } + + return avail + Math.max(0, stop - start); } /** @@ -380,11 +402,11 @@ public class BufferedInputStream extends InputStream { *

* Including the under-laying {@link InputStream}. *

- * Note: if you called the {@link BufferedInputStream#open()} method + * Note: if you called the {@link BufferedInputStream2#open()} method * prior to this one, it will just decrease the internal count of how many * open streams it held and do nothing else. The stream will actually be - * closed when you have called {@link BufferedInputStream#close()} once more - * than {@link BufferedInputStream#open()}. + * closed when you have called {@link BufferedInputStream2#close()} once + * more than {@link BufferedInputStream2#open()}. * * @exception IOException * in case of I/O error @@ -404,11 +426,11 @@ public class BufferedInputStream extends InputStream { * You can call this method multiple times, it will not cause an * {@link IOException} for subsequent calls. *

- * Note: if you called the {@link BufferedInputStream#open()} method + * Note: if you called the {@link BufferedInputStream2#open()} method * prior to this one, it will just decrease the internal count of how many * open streams it held and do nothing else. The stream will actually be - * closed when you have called {@link BufferedInputStream#close()} once more - * than {@link BufferedInputStream#open()}. + * closed when you have called {@link BufferedInputStream2#close()} once + * more than {@link BufferedInputStream2#open()}. * * @param includingSubStream * also close the under-laying stream @@ -430,6 +452,52 @@ public class BufferedInputStream extends InputStream { } } + /** + * Consolidate the push-back buffers so the last one is at least the given + * size, if possible. + *

+ * If there is not enough data in the push-back buffers, they will all be + * consolidated. + * + * @param size + * the minimum size of the consolidated buffer, or -1 to force + * the consolidation of all push-back buffers + * + * @return the size of the last, consolidated buffer; can be less than the + * requested size if not enough data + */ + protected int consolidatePushBack(int size) { + int bbIndex = -1; + int bbUpToSize = 0; + for (Entry entry : backBuffers) { + bbIndex++; + bbUpToSize += entry.getKey().length - entry.getValue(); + + if (size >= 0 && bbUpToSize >= size) { + break; + } + } + + // Index 0 means "the last buffer is already big enough" + if (bbIndex > 0) { + byte[] consolidatedBuffer = new byte[bbUpToSize]; + int consolidatedPos = 0; + for (int i = 0; i <= bbIndex; i++) { + Entry bb = backBuffers + .remove(backBuffers.size() - 1); + byte[] bbBuffer = bb.getKey(); + int bbOffset = bb.getValue(); + int bbSize = bbBuffer.length - bbOffset; + System.arraycopy(bbBuffer, bbOffset, consolidatedBuffer, + consolidatedPos, bbSize); + } + + pushback(consolidatedBuffer, 0); + } + + return bbUpToSize; + } + /** * Check if we still have some data in the buffer and, if not, fetch some. * @@ -443,21 +511,9 @@ public class BufferedInputStream extends InputStream { boolean hasRead = false; if (in != null && !eof && start >= stop) { start = 0; - if (buffer2 != null) { - buffer = buffer2; - start = pos2; - stop = len2; - - buffer2 = null; - pos2 = 0; - len2 = 0; - } else { - buffer = originalBuffer; - - stop = read(in, buffer, 0, buffer.length); - if (stop > 0) { - bytesRead += stop; - } + stop = read(in, buffer, 0, buffer.length); + if (stop > 0) { + bytesRead += stop; } hasRead = true; @@ -471,12 +527,25 @@ public class BufferedInputStream extends InputStream { } /** - * Read the under-laying stream into the local buffer. + * Push back some data that will be read again at the next read call. + * + * @param buffer + * the buffer to push back + * @param offset + * the offset at which to start reading in the buffer + */ + protected void pushback(byte[] buffer, int offset) { + backBuffers.add( + new AbstractMap.SimpleEntry(buffer, offset)); + } + + /** + * Read the under-laying stream into the given local buffer. * * @param in * the under-laying {@link InputStream} * @param buffer - * the buffer we use in this {@link BufferedInputStream} + * the buffer we use in this {@link BufferedInputStream2} * @param off * the offset * @param len @@ -503,7 +572,7 @@ public class BufferedInputStream extends InputStream { return false; } - return (start < stop) || !eof; + return !backBuffers.isEmpty() || (start < stop) || !eof; } /** diff --git a/streams/ReplaceInputStream.java b/streams/ReplaceInputStream.java index 9f73350..ae576e2 100644 --- a/streams/ReplaceInputStream.java +++ b/streams/ReplaceInputStream.java @@ -114,7 +114,8 @@ public class ReplaceInputStream extends BufferedInputStream { } // We need at least maxFromSize so we can iterate and replace - source = new byte[Math.max(2 * maxFromSize, MIN_BUFFER_SIZE)]; + source = new byte[Math.max(2 * Math.max(maxToSize, maxFromSize), + MIN_BUFFER_SIZE)]; spos = 0; slen = 0; } diff --git a/test_code/NextableInputStreamTest.java b/test_code/NextableInputStreamTest.java index 463a123..4e59823 100644 --- a/test_code/NextableInputStreamTest.java +++ b/test_code/NextableInputStreamTest.java @@ -1,7 +1,6 @@ package be.nikiroo.utils.test_code; import java.io.ByteArrayInputStream; -import java.io.IOException; import be.nikiroo.utils.IOUtils; import be.nikiroo.utils.streams.NextableInputStream; @@ -177,12 +176,10 @@ public class NextableInputStreamTest extends TestLauncher { 11 })); // too big - try { - in.startsWith(new byte[] { 42, 12, 0, 127, 12, 51, 11, 12, - 0 }); - fail("Searching a prefix bigger than the array should throw an IOException"); - } catch (IOException e) { - } + assertEquals( + "A search term bigger than the whole data cannot be found in the data", + false, in.startsWith(new byte[] { 42, 12, 0, 127, 12, + 51, 11, 12, 0 })); in.close(); } @@ -209,13 +206,11 @@ public class NextableInputStreamTest extends TestLauncher { in.startsWith("Toto")); assertEquals("It actually does not start with that", false, in.startsWith("Fanfan et Toto vont à la mee")); - + // too big - try { - in.startsWith("Fanfan et Toto vont à la mer."); - fail("Searching a prefix bigger than the array should throw an IOException"); - } catch (IOException e) { - } + assertEquals( + "A search term bigger than the whole data cannot be found in the data", + false, in.startsWith("Fanfan et Toto vont à la mer.")); in.close(); } -- 2.27.0