mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
support replay of records that have Transfer-Encoding: chunked, but
were not actually rewritten to the warc as chunked. Attempt to parse chunk length, and if failed, fallback to treating record as not chunked
This commit is contained in:
parent
8fd10673e8
commit
a1cd40fba1
@ -262,12 +262,16 @@ class LineReader:
|
|||||||
if not self.buff or self.buff.pos >= self.buff.len:
|
if not self.buff or self.buff.pos >= self.buff.len:
|
||||||
toRead = min(self.maxLen - self.numRead, self.chunkSize) if (self.maxLen > 0) else self.chunkSize
|
toRead = min(self.maxLen - self.numRead, self.chunkSize) if (self.maxLen > 0) else self.chunkSize
|
||||||
data = self.stream.read(toRead)
|
data = self.stream.read(toRead)
|
||||||
self.numRead += len(data)
|
self._process_read(data)
|
||||||
|
|
||||||
if self.decomp:
|
def _process_read(self, data):
|
||||||
data = self.decomp.decompress(data)
|
self.numRead += len(data)
|
||||||
|
|
||||||
|
if self.decomp:
|
||||||
|
data = self.decomp.decompress(data)
|
||||||
|
|
||||||
|
self.buff = StringIO.StringIO(data)
|
||||||
|
|
||||||
self.buff = StringIO.StringIO(data)
|
|
||||||
|
|
||||||
def read(self, length = None):
|
def read(self, length = None):
|
||||||
self._fillbuff()
|
self._fillbuff()
|
||||||
@ -282,16 +286,31 @@ class LineReader:
|
|||||||
self.stream.close()
|
self.stream.close()
|
||||||
self.stream = None
|
self.stream = None
|
||||||
|
|
||||||
|
|
||||||
class ChunkedLineReader(LineReader):
|
class ChunkedLineReader(LineReader):
|
||||||
allChunksRead = False
|
allChunksRead = False
|
||||||
|
notChunked = False
|
||||||
|
|
||||||
def _fillbuff(self, chunkSize = None):
|
def _fillbuff(self, chunkSize = None):
|
||||||
|
if self.notChunked:
|
||||||
|
LineReader._fillbuff(self, chunkSize)
|
||||||
|
|
||||||
if self.allChunksRead:
|
if self.allChunksRead:
|
||||||
return
|
return
|
||||||
|
|
||||||
if not self.buff or self.buff.pos >= self.buff.len:
|
if not self.buff or self.buff.pos >= self.buff.len:
|
||||||
lengthHeader = self.stream.readline()
|
lengthHeader = self.stream.readline(64)
|
||||||
chunkSize = int(lengthHeader.strip().split(';')[0], 16)
|
|
||||||
|
# It's possible that non-chunked data is set with a Transfer-Encoding: chunked
|
||||||
|
# to handle this, if its not possible to decode it the chunk, then treat this as a regular LineReader
|
||||||
|
try:
|
||||||
|
chunkSize = int(lengthHeader.strip().split(';')[0], 16)
|
||||||
|
except Exception:
|
||||||
|
# can't parse the lengthHeader, treat this as non-chunk encoded from here on
|
||||||
|
self._process_read(lengthHeader)
|
||||||
|
self.notChunked = True
|
||||||
|
return
|
||||||
|
|
||||||
data = ''
|
data = ''
|
||||||
if chunkSize:
|
if chunkSize:
|
||||||
while len(data) < chunkSize:
|
while len(data) < chunkSize:
|
||||||
@ -310,6 +329,7 @@ class ChunkedLineReader(LineReader):
|
|||||||
|
|
||||||
self.buff = StringIO.StringIO(data)
|
self.buff = StringIO.StringIO(data)
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import doctest
|
import doctest
|
||||||
|
Loading…
x
Reference in New Issue
Block a user