1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-24 06:59:52 +01:00

replace StringIO with BytesIO

This commit is contained in:
Ilya Kreymer 2014-03-08 09:30:19 -08:00
parent 1a6f2e2fe1
commit 3b1afc3e3d
6 changed files with 26 additions and 22 deletions

View File

@ -2,7 +2,7 @@ import os
import collections import collections
import itertools import itertools
import logging import logging
from cStringIO import StringIO from io import BytesIO
import datetime import datetime
from cdxsource import CDXSource from cdxsource import CDXSource
@ -189,7 +189,7 @@ class ZipNumCluster(CDXSource):
def decompress_block(range_): def decompress_block(range_):
decomp = gzip_decompressor() decomp = gzip_decompressor()
buff = decomp.decompress(reader.read(range_)) buff = decomp.decompress(reader.read(range_))
return readline_to_iter(StringIO(buff)) return readline_to_iter(BytesIO(buff))
iter_ = itertools.chain(*itertools.imap(decompress_block, ranges)) iter_ = itertools.chain(*itertools.imap(decompress_block, ranges))

View File

@ -1,4 +1,4 @@
import StringIO from io import BytesIO
from pywb.rewrite.url_rewriter import UrlRewriter from pywb.rewrite.url_rewriter import UrlRewriter
from pywb.utils.bufferedreaders import ChunkedDataReader from pywb.utils.bufferedreaders import ChunkedDataReader
@ -149,11 +149,11 @@ class ReplayView:
# Buffer rewrite iterator and return a response from a string # Buffer rewrite iterator and return a response from a string
def buffered_response(self, status_headers, iterator): def buffered_response(self, status_headers, iterator):
out = StringIO.StringIO() out = BytesIO()
try: try:
for buff in iterator: for buff in iterator:
out.write(buff) out.write(bytes(buff))
finally: finally:
content = out.getvalue() content = out.getvalue()

View File

@ -1,4 +1,4 @@
import StringIO from io import BytesIO
import zlib import zlib
@ -44,12 +44,13 @@ class DecompressingBufferedReader(object):
self.buff = None self.buff = None
self.num_read = 0 self.num_read = 0
self.buff_size = 0
def _fillbuff(self, block_size=None): def _fillbuff(self, block_size=None):
if not block_size: if not block_size:
block_size = self.block_size block_size = self.block_size
if not self.buff or self.buff.pos >= self.buff.len: if not self.buff or self.buff.tell() == self.buff_size:
data = self.stream.read(block_size) data = self.stream.read(block_size)
self._process_read(data) self._process_read(data)
@ -57,7 +58,7 @@ class DecompressingBufferedReader(object):
data = self._decompress(data) data = self._decompress(data)
self.buff_size = len(data) self.buff_size = len(data)
self.num_read += self.buff_size self.num_read += self.buff_size
self.buff = StringIO.StringIO(data) self.buff = BytesIO(data)
def _decompress(self, data): def _decompress(self, data):
if self.decompressor and data: if self.decompressor and data:
@ -129,21 +130,21 @@ class ChunkedDataReader(DecompressingBufferedReader):
assumed to not be chunked and no more dechunking occurs. assumed to not be chunked and no more dechunking occurs.
Properly formatted chunked data: Properly formatted chunked data:
>>> c = ChunkedDataReader(StringIO.StringIO("4\r\n1234\r\n0\r\n\r\n")); >>> c = ChunkedDataReader(BytesIO("4\r\n1234\r\n0\r\n\r\n"));
>>> c.read() + c.read() >>> c.read() + c.read()
'1234' '1234'
Non-chunked data: Non-chunked data:
>>> ChunkedDataReader(StringIO.StringIO("xyz123!@#")).read() >>> ChunkedDataReader(BytesIO("xyz123!@#")).read()
'xyz123!@#' 'xyz123!@#'
Starts like chunked data, but isn't: Starts like chunked data, but isn't:
>>> c = ChunkedDataReader(StringIO.StringIO("1\r\nxyz123!@#")); >>> c = ChunkedDataReader(BytesIO("1\r\nxyz123!@#"));
>>> c.read() + c.read() >>> c.read() + c.read()
'1\r\nx123!@#' '1\r\nx123!@#'
Chunked data cut off part way through: Chunked data cut off part way through:
>>> c = ChunkedDataReader(StringIO.StringIO("4\r\n1234\r\n4\r\n12")); >>> c = ChunkedDataReader(BytesIO("4\r\n1234\r\n4\r\n12"));
>>> c.read() + c.read() >>> c.read() + c.read()
'123412' '123412'
""" """
@ -161,7 +162,7 @@ class ChunkedDataReader(DecompressingBufferedReader):
if self.all_chunks_read: if self.all_chunks_read:
return return
if not self.buff or self.buff.pos >= self.buff.len: if not self.buff or self.buff.tell() >= self.buff_size:
length_header = self.stream.readline(64) length_header = self.stream.readline(64)
self._data = '' self._data = ''

View File

@ -8,7 +8,7 @@ import hmac
import urllib2 import urllib2
import time import time
import pkg_resources import pkg_resources
from io import open
#================================================================= #=================================================================
def is_http(filename): def is_http(filename):

View File

@ -1,13 +1,13 @@
#================================================================= #=================================================================
""" """
# LimitReader Tests # LimitReader Tests
>>> LimitReader(StringIO.StringIO('abcdefghjiklmnopqrstuvwxyz'), 10).read(26) >>> LimitReader(BytesIO('abcdefghjiklmnopqrstuvwxyz'), 10).read(26)
'abcdefghji' 'abcdefghji'
>>> LimitReader(StringIO.StringIO('abcdefghjiklmnopqrstuvwxyz'), 8).readline(26) >>> LimitReader(BytesIO('abcdefghjiklmnopqrstuvwxyz'), 8).readline(26)
'abcdefgh' 'abcdefgh'
>>> read_multiple(LimitReader(StringIO.StringIO('abcdefghjiklmnopqrstuvwxyz'), 10), [2, 2, 20]) >>> read_multiple(LimitReader(BytesIO('abcdefghjiklmnopqrstuvwxyz'), 10), [2, 2, 20])
'efghji' 'efghji'
# BlockLoader Tests (includes LimitReader) # BlockLoader Tests (includes LimitReader)
@ -30,6 +30,9 @@
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb')).readline() >>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb')).readline()
' CDX N b a m s k r M S V g\\n' ' CDX N b a m s k r M S V g\\n'
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb'), decomp_type = 'gzip').readline()
' CDX N b a m s k r M S V g\\n'
#DecompressingBufferedReader readline() with decompression (zipnum file, no header) #DecompressingBufferedReader readline() with decompression (zipnum file, no header)
>>> DecompressingBufferedReader(open(test_zip_dir + 'zipnum-sample.cdx.gz', 'rb'), decomp_type = 'gzip').readline() >>> DecompressingBufferedReader(open(test_zip_dir + 'zipnum-sample.cdx.gz', 'rb'), decomp_type = 'gzip').readline()
'com,example)/ 20140127171200 http://example.com text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1046 334 dupes.warc.gz\\n' 'com,example)/ 20140127171200 http://example.com text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1046 334 dupes.warc.gz\\n'
@ -38,7 +41,7 @@
'Example Domain' 'Example Domain'
# test very small block size # test very small block size
>>> dbr = DecompressingBufferedReader(StringIO.StringIO('ABCDEFG\\nHIJKLMN\\nOPQR\\nXYZ'), block_size = 3) >>> dbr = DecompressingBufferedReader(BytesIO('ABCDEFG\\nHIJKLMN\\nOPQR\\nXYZ'), block_size = 3)
>>> dbr.readline(); dbr.readline(4); dbr.readline(); dbr.readline(); dbr.readline(2); dbr.readline(); dbr.readline() >>> dbr.readline(); dbr.readline(4); dbr.readline(); dbr.readline(); dbr.readline(2); dbr.readline(); dbr.readline()
'ABCDEFG\\n' 'ABCDEFG\\n'
'HIJK' 'HIJK'
@ -52,7 +55,7 @@
#================================================================= #=================================================================
import os import os
import StringIO from io import BytesIO, open
from pywb.utils.loaders import BlockLoader, HMACCookieMaker from pywb.utils.loaders import BlockLoader, HMACCookieMaker
from pywb.utils.loaders import LimitReader, SeekableTextFileReader from pywb.utils.loaders import LimitReader, SeekableTextFileReader
from pywb.utils.bufferedreaders import DecompressingBufferedReader from pywb.utils.bufferedreaders import DecompressingBufferedReader

View File

@ -1,17 +1,17 @@
""" """
>>> StatusAndHeadersParser(['HTTP/1.0']).parse(StringIO.StringIO(status_headers_1)) >>> StatusAndHeadersParser(['HTTP/1.0']).parse(BytesIO(status_headers_1))
StatusAndHeaders(protocol = 'HTTP/1.0', statusline = '200 OK', headers = [ ('Content-Type', 'ABC'), StatusAndHeaders(protocol = 'HTTP/1.0', statusline = '200 OK', headers = [ ('Content-Type', 'ABC'),
('Some', 'Value'), ('Some', 'Value'),
('Multi-Line', 'Value1 Also This')]) ('Multi-Line', 'Value1 Also This')])
>>> StatusAndHeadersParser(['Other']).parse(StringIO.StringIO(status_headers_1)) >>> StatusAndHeadersParser(['Other']).parse(BytesIO(status_headers_1))
Traceback (most recent call last): Traceback (most recent call last):
StatusAndHeadersParserException: Expected Status Line starting with ['Other'] - Found: HTTP/1.0 200 OK StatusAndHeadersParserException: Expected Status Line starting with ['Other'] - Found: HTTP/1.0 200 OK
""" """
from pywb.utils.statusandheaders import StatusAndHeadersParser from pywb.utils.statusandheaders import StatusAndHeadersParser
import StringIO from io import BytesIO
status_headers_1 = "\ status_headers_1 = "\