
python-checkins at python
Aug 4, 2012, 6:39 AM
Post #1 of 1
(36 views)
Permalink
|
|
cpython: #15546: Fix BZ2File.read1()'s handling of pathological input data.
|
|
http://hg.python.org/cpython/rev/cdf27a213bd2 changeset: 78409:cdf27a213bd2 user: Nadeem Vawda <nadeem.vawda [at] gmail> date: Sat Aug 04 15:29:28 2012 +0200 summary: #15546: Fix BZ2File.read1()'s handling of pathological input data. files: Lib/bz2.py | 47 +++++++++++++++++++++++------------------ 1 files changed, 26 insertions(+), 21 deletions(-) diff --git a/Lib/bz2.py b/Lib/bz2.py --- a/Lib/bz2.py +++ b/Lib/bz2.py @@ -174,29 +174,31 @@ # Fill the readahead buffer if it is empty. Returns False on EOF. def _fill_buffer(self): - if self._buffer: - return True + # Depending on the input data, our call to the decompressor may not + # return any data. In this case, try again after reading another block. + while True: + if self._buffer: + return True - if self._decompressor.unused_data: - rawblock = self._decompressor.unused_data - else: - rawblock = self._fp.read(_BUFFER_SIZE) + if self._decompressor.unused_data: + rawblock = self._decompressor.unused_data + else: + rawblock = self._fp.read(_BUFFER_SIZE) - if not rawblock: + if not rawblock: + if self._decompressor.eof: + self._mode = _MODE_READ_EOF + self._size = self._pos + return False + else: + raise EOFError("Compressed file ended before the " + "end-of-stream marker was reached") + + # Continue to next stream. if self._decompressor.eof: - self._mode = _MODE_READ_EOF - self._size = self._pos - return False - else: - raise EOFError("Compressed file ended before the " - "end-of-stream marker was reached") + self._decompressor = BZ2Decompressor() - # Continue to next stream. - if self._decompressor.eof: - self._decompressor = BZ2Decompressor() - - self._buffer = self._decompressor.decompress(rawblock) - return True + self._buffer = self._decompressor.decompress(rawblock) # Read data until EOF. # If return_data is false, consume the data without returning it. @@ -256,11 +258,14 @@ return self._read_block(size) def read1(self, size=-1): - """Read up to size uncompressed bytes with at most one read - from the underlying stream. + """Read up to size uncompressed bytes, while trying to avoid + making multiple reads from the underlying stream. Returns b'' if the file is at EOF. """ + # Usually, read1() calls _fp.read() at most once. However, sometimes + # this does not give enough data for the decompressor to make progress. + # In this case we make multiple reads, to avoid returning b"". with self._lock: self._check_can_read() if (size == 0 or self._mode == _MODE_READ_EOF or -- Repository URL: http://hg.python.org/cpython
|