mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
replace StringIO with BytesIO
This commit is contained in:
parent
1a6f2e2fe1
commit
3b1afc3e3d
@ -2,7 +2,7 @@ import os
|
||||
import collections
|
||||
import itertools
|
||||
import logging
|
||||
from cStringIO import StringIO
|
||||
from io import BytesIO
|
||||
import datetime
|
||||
|
||||
from cdxsource import CDXSource
|
||||
@ -189,7 +189,7 @@ class ZipNumCluster(CDXSource):
|
||||
def decompress_block(range_):
|
||||
decomp = gzip_decompressor()
|
||||
buff = decomp.decompress(reader.read(range_))
|
||||
return readline_to_iter(StringIO(buff))
|
||||
return readline_to_iter(BytesIO(buff))
|
||||
|
||||
iter_ = itertools.chain(*itertools.imap(decompress_block, ranges))
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
import StringIO
|
||||
from io import BytesIO
|
||||
|
||||
from pywb.rewrite.url_rewriter import UrlRewriter
|
||||
from pywb.utils.bufferedreaders import ChunkedDataReader
|
||||
@ -149,11 +149,11 @@ class ReplayView:
|
||||
|
||||
# Buffer rewrite iterator and return a response from a string
|
||||
def buffered_response(self, status_headers, iterator):
|
||||
out = StringIO.StringIO()
|
||||
out = BytesIO()
|
||||
|
||||
try:
|
||||
for buff in iterator:
|
||||
out.write(buff)
|
||||
out.write(bytes(buff))
|
||||
|
||||
finally:
|
||||
content = out.getvalue()
|
||||
|
@ -1,4 +1,4 @@
|
||||
import StringIO
|
||||
from io import BytesIO
|
||||
import zlib
|
||||
|
||||
|
||||
@ -44,12 +44,13 @@ class DecompressingBufferedReader(object):
|
||||
|
||||
self.buff = None
|
||||
self.num_read = 0
|
||||
self.buff_size = 0
|
||||
|
||||
def _fillbuff(self, block_size=None):
|
||||
if not block_size:
|
||||
block_size = self.block_size
|
||||
|
||||
if not self.buff or self.buff.pos >= self.buff.len:
|
||||
if not self.buff or self.buff.tell() == self.buff_size:
|
||||
data = self.stream.read(block_size)
|
||||
self._process_read(data)
|
||||
|
||||
@ -57,7 +58,7 @@ class DecompressingBufferedReader(object):
|
||||
data = self._decompress(data)
|
||||
self.buff_size = len(data)
|
||||
self.num_read += self.buff_size
|
||||
self.buff = StringIO.StringIO(data)
|
||||
self.buff = BytesIO(data)
|
||||
|
||||
def _decompress(self, data):
|
||||
if self.decompressor and data:
|
||||
@ -129,21 +130,21 @@ class ChunkedDataReader(DecompressingBufferedReader):
|
||||
assumed to not be chunked and no more dechunking occurs.
|
||||
|
||||
Properly formatted chunked data:
|
||||
>>> c = ChunkedDataReader(StringIO.StringIO("4\r\n1234\r\n0\r\n\r\n"));
|
||||
>>> c = ChunkedDataReader(BytesIO("4\r\n1234\r\n0\r\n\r\n"));
|
||||
>>> c.read() + c.read()
|
||||
'1234'
|
||||
|
||||
Non-chunked data:
|
||||
>>> ChunkedDataReader(StringIO.StringIO("xyz123!@#")).read()
|
||||
>>> ChunkedDataReader(BytesIO("xyz123!@#")).read()
|
||||
'xyz123!@#'
|
||||
|
||||
Starts like chunked data, but isn't:
|
||||
>>> c = ChunkedDataReader(StringIO.StringIO("1\r\nxyz123!@#"));
|
||||
>>> c = ChunkedDataReader(BytesIO("1\r\nxyz123!@#"));
|
||||
>>> c.read() + c.read()
|
||||
'1\r\nx123!@#'
|
||||
|
||||
Chunked data cut off part way through:
|
||||
>>> c = ChunkedDataReader(StringIO.StringIO("4\r\n1234\r\n4\r\n12"));
|
||||
>>> c = ChunkedDataReader(BytesIO("4\r\n1234\r\n4\r\n12"));
|
||||
>>> c.read() + c.read()
|
||||
'123412'
|
||||
"""
|
||||
@ -161,7 +162,7 @@ class ChunkedDataReader(DecompressingBufferedReader):
|
||||
if self.all_chunks_read:
|
||||
return
|
||||
|
||||
if not self.buff or self.buff.pos >= self.buff.len:
|
||||
if not self.buff or self.buff.tell() >= self.buff_size:
|
||||
length_header = self.stream.readline(64)
|
||||
self._data = ''
|
||||
|
||||
|
@ -8,7 +8,7 @@ import hmac
|
||||
import urllib2
|
||||
import time
|
||||
import pkg_resources
|
||||
|
||||
from io import open
|
||||
|
||||
#=================================================================
|
||||
def is_http(filename):
|
||||
|
@ -1,13 +1,13 @@
|
||||
#=================================================================
|
||||
"""
|
||||
# LimitReader Tests
|
||||
>>> LimitReader(StringIO.StringIO('abcdefghjiklmnopqrstuvwxyz'), 10).read(26)
|
||||
>>> LimitReader(BytesIO('abcdefghjiklmnopqrstuvwxyz'), 10).read(26)
|
||||
'abcdefghji'
|
||||
|
||||
>>> LimitReader(StringIO.StringIO('abcdefghjiklmnopqrstuvwxyz'), 8).readline(26)
|
||||
>>> LimitReader(BytesIO('abcdefghjiklmnopqrstuvwxyz'), 8).readline(26)
|
||||
'abcdefgh'
|
||||
|
||||
>>> read_multiple(LimitReader(StringIO.StringIO('abcdefghjiklmnopqrstuvwxyz'), 10), [2, 2, 20])
|
||||
>>> read_multiple(LimitReader(BytesIO('abcdefghjiklmnopqrstuvwxyz'), 10), [2, 2, 20])
|
||||
'efghji'
|
||||
|
||||
# BlockLoader Tests (includes LimitReader)
|
||||
@ -30,6 +30,9 @@
|
||||
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb')).readline()
|
||||
' CDX N b a m s k r M S V g\\n'
|
||||
|
||||
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb'), decomp_type = 'gzip').readline()
|
||||
' CDX N b a m s k r M S V g\\n'
|
||||
|
||||
#DecompressingBufferedReader readline() with decompression (zipnum file, no header)
|
||||
>>> DecompressingBufferedReader(open(test_zip_dir + 'zipnum-sample.cdx.gz', 'rb'), decomp_type = 'gzip').readline()
|
||||
'com,example)/ 20140127171200 http://example.com text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1046 334 dupes.warc.gz\\n'
|
||||
@ -38,7 +41,7 @@
|
||||
'Example Domain'
|
||||
|
||||
# test very small block size
|
||||
>>> dbr = DecompressingBufferedReader(StringIO.StringIO('ABCDEFG\\nHIJKLMN\\nOPQR\\nXYZ'), block_size = 3)
|
||||
>>> dbr = DecompressingBufferedReader(BytesIO('ABCDEFG\\nHIJKLMN\\nOPQR\\nXYZ'), block_size = 3)
|
||||
>>> dbr.readline(); dbr.readline(4); dbr.readline(); dbr.readline(); dbr.readline(2); dbr.readline(); dbr.readline()
|
||||
'ABCDEFG\\n'
|
||||
'HIJK'
|
||||
@ -52,7 +55,7 @@
|
||||
|
||||
#=================================================================
|
||||
import os
|
||||
import StringIO
|
||||
from io import BytesIO, open
|
||||
from pywb.utils.loaders import BlockLoader, HMACCookieMaker
|
||||
from pywb.utils.loaders import LimitReader, SeekableTextFileReader
|
||||
from pywb.utils.bufferedreaders import DecompressingBufferedReader
|
||||
|
@ -1,17 +1,17 @@
|
||||
"""
|
||||
>>> StatusAndHeadersParser(['HTTP/1.0']).parse(StringIO.StringIO(status_headers_1))
|
||||
>>> StatusAndHeadersParser(['HTTP/1.0']).parse(BytesIO(status_headers_1))
|
||||
StatusAndHeaders(protocol = 'HTTP/1.0', statusline = '200 OK', headers = [ ('Content-Type', 'ABC'),
|
||||
('Some', 'Value'),
|
||||
('Multi-Line', 'Value1 Also This')])
|
||||
|
||||
>>> StatusAndHeadersParser(['Other']).parse(StringIO.StringIO(status_headers_1))
|
||||
>>> StatusAndHeadersParser(['Other']).parse(BytesIO(status_headers_1))
|
||||
Traceback (most recent call last):
|
||||
StatusAndHeadersParserException: Expected Status Line starting with ['Other'] - Found: HTTP/1.0 200 OK
|
||||
"""
|
||||
|
||||
|
||||
from pywb.utils.statusandheaders import StatusAndHeadersParser
|
||||
import StringIO
|
||||
from io import BytesIO
|
||||
|
||||
|
||||
status_headers_1 = "\
|
||||
|
Loading…
x
Reference in New Issue
Block a user