1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

replace StringIO with BytesIO

This commit is contained in:
Ilya Kreymer 2014-03-08 09:30:19 -08:00
parent 1a6f2e2fe1
commit 3b1afc3e3d
6 changed files with 26 additions and 22 deletions

View File

@ -2,7 +2,7 @@ import os
import collections
import itertools
import logging
from cStringIO import StringIO
from io import BytesIO
import datetime
from cdxsource import CDXSource
@ -189,7 +189,7 @@ class ZipNumCluster(CDXSource):
def decompress_block(range_):
decomp = gzip_decompressor()
buff = decomp.decompress(reader.read(range_))
return readline_to_iter(StringIO(buff))
return readline_to_iter(BytesIO(buff))
iter_ = itertools.chain(*itertools.imap(decompress_block, ranges))

View File

@ -1,4 +1,4 @@
import StringIO
from io import BytesIO
from pywb.rewrite.url_rewriter import UrlRewriter
from pywb.utils.bufferedreaders import ChunkedDataReader
@ -149,11 +149,11 @@ class ReplayView:
# Buffer rewrite iterator and return a response from a string
def buffered_response(self, status_headers, iterator):
out = StringIO.StringIO()
out = BytesIO()
try:
for buff in iterator:
out.write(buff)
out.write(bytes(buff))
finally:
content = out.getvalue()

View File

@ -1,4 +1,4 @@
import StringIO
from io import BytesIO
import zlib
@ -44,12 +44,13 @@ class DecompressingBufferedReader(object):
self.buff = None
self.num_read = 0
self.buff_size = 0
def _fillbuff(self, block_size=None):
if not block_size:
block_size = self.block_size
if not self.buff or self.buff.pos >= self.buff.len:
if not self.buff or self.buff.tell() == self.buff_size:
data = self.stream.read(block_size)
self._process_read(data)
@ -57,7 +58,7 @@ class DecompressingBufferedReader(object):
data = self._decompress(data)
self.buff_size = len(data)
self.num_read += self.buff_size
self.buff = StringIO.StringIO(data)
self.buff = BytesIO(data)
def _decompress(self, data):
if self.decompressor and data:
@ -129,21 +130,21 @@ class ChunkedDataReader(DecompressingBufferedReader):
assumed to not be chunked and no more dechunking occurs.
Properly formatted chunked data:
>>> c = ChunkedDataReader(StringIO.StringIO("4\r\n1234\r\n0\r\n\r\n"));
>>> c = ChunkedDataReader(BytesIO("4\r\n1234\r\n0\r\n\r\n"));
>>> c.read() + c.read()
'1234'
Non-chunked data:
>>> ChunkedDataReader(StringIO.StringIO("xyz123!@#")).read()
>>> ChunkedDataReader(BytesIO("xyz123!@#")).read()
'xyz123!@#'
Starts like chunked data, but isn't:
>>> c = ChunkedDataReader(StringIO.StringIO("1\r\nxyz123!@#"));
>>> c = ChunkedDataReader(BytesIO("1\r\nxyz123!@#"));
>>> c.read() + c.read()
'1\r\nx123!@#'
Chunked data cut off part way through:
>>> c = ChunkedDataReader(StringIO.StringIO("4\r\n1234\r\n4\r\n12"));
>>> c = ChunkedDataReader(BytesIO("4\r\n1234\r\n4\r\n12"));
>>> c.read() + c.read()
'123412'
"""
@ -161,7 +162,7 @@ class ChunkedDataReader(DecompressingBufferedReader):
if self.all_chunks_read:
return
if not self.buff or self.buff.pos >= self.buff.len:
if not self.buff or self.buff.tell() >= self.buff_size:
length_header = self.stream.readline(64)
self._data = ''

View File

@ -8,7 +8,7 @@ import hmac
import urllib2
import time
import pkg_resources
from io import open
#=================================================================
def is_http(filename):

View File

@ -1,13 +1,13 @@
#=================================================================
"""
# LimitReader Tests
>>> LimitReader(StringIO.StringIO('abcdefghjiklmnopqrstuvwxyz'), 10).read(26)
>>> LimitReader(BytesIO('abcdefghjiklmnopqrstuvwxyz'), 10).read(26)
'abcdefghji'
>>> LimitReader(StringIO.StringIO('abcdefghjiklmnopqrstuvwxyz'), 8).readline(26)
>>> LimitReader(BytesIO('abcdefghjiklmnopqrstuvwxyz'), 8).readline(26)
'abcdefgh'
>>> read_multiple(LimitReader(StringIO.StringIO('abcdefghjiklmnopqrstuvwxyz'), 10), [2, 2, 20])
>>> read_multiple(LimitReader(BytesIO('abcdefghjiklmnopqrstuvwxyz'), 10), [2, 2, 20])
'efghji'
# BlockLoader Tests (includes LimitReader)
@ -30,6 +30,9 @@
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb')).readline()
' CDX N b a m s k r M S V g\\n'
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb'), decomp_type = 'gzip').readline()
' CDX N b a m s k r M S V g\\n'
#DecompressingBufferedReader readline() with decompression (zipnum file, no header)
>>> DecompressingBufferedReader(open(test_zip_dir + 'zipnum-sample.cdx.gz', 'rb'), decomp_type = 'gzip').readline()
'com,example)/ 20140127171200 http://example.com text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1046 334 dupes.warc.gz\\n'
@ -38,7 +41,7 @@
'Example Domain'
# test very small block size
>>> dbr = DecompressingBufferedReader(StringIO.StringIO('ABCDEFG\\nHIJKLMN\\nOPQR\\nXYZ'), block_size = 3)
>>> dbr = DecompressingBufferedReader(BytesIO('ABCDEFG\\nHIJKLMN\\nOPQR\\nXYZ'), block_size = 3)
>>> dbr.readline(); dbr.readline(4); dbr.readline(); dbr.readline(); dbr.readline(2); dbr.readline(); dbr.readline()
'ABCDEFG\\n'
'HIJK'
@ -52,7 +55,7 @@
#=================================================================
import os
import StringIO
from io import BytesIO, open
from pywb.utils.loaders import BlockLoader, HMACCookieMaker
from pywb.utils.loaders import LimitReader, SeekableTextFileReader
from pywb.utils.bufferedreaders import DecompressingBufferedReader

View File

@ -1,17 +1,17 @@
"""
>>> StatusAndHeadersParser(['HTTP/1.0']).parse(StringIO.StringIO(status_headers_1))
>>> StatusAndHeadersParser(['HTTP/1.0']).parse(BytesIO(status_headers_1))
StatusAndHeaders(protocol = 'HTTP/1.0', statusline = '200 OK', headers = [ ('Content-Type', 'ABC'),
('Some', 'Value'),
('Multi-Line', 'Value1 Also This')])
>>> StatusAndHeadersParser(['Other']).parse(StringIO.StringIO(status_headers_1))
>>> StatusAndHeadersParser(['Other']).parse(BytesIO(status_headers_1))
Traceback (most recent call last):
StatusAndHeadersParserException: Expected Status Line starting with ['Other'] - Found: HTTP/1.0 200 OK
"""
from pywb.utils.statusandheaders import StatusAndHeadersParser
import StringIO
from io import BytesIO
status_headers_1 = "\