1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-24 06:59:52 +01:00

file open() pass: convert all read and write to ensure binary 'b' flag is set (#56)

This commit is contained in:
Ilya Kreymer 2015-01-11 18:53:47 -08:00
parent fb4bf817f7
commit db75bda736
9 changed files with 14 additions and 14 deletions

View File

@ -30,7 +30,7 @@ class CDXFile(CDXSource):
def load_cdx(self, query): def load_cdx(self, query):
def do_open(): def do_open():
try: try:
source = open(self.filename) source = open(self.filename, 'rb')
gen = iter_range(source, query.key, query.end_key) gen = iter_range(source, query.key, query.end_key)
for line in gen: for line in gen:
yield line yield line

View File

@ -26,7 +26,7 @@ test_cdx_dir = get_test_dir() + 'cdx/'
def load_cdx_into_redis(source, filename, key=None): def load_cdx_into_redis(source, filename, key=None):
# load a cdx into mock redis # load a cdx into mock redis
with open(test_cdx_dir + filename) as fh: with open(test_cdx_dir + filename, 'rb') as fh:
for line in fh: for line in fh:
zadd_cdx(source, line, key) zadd_cdx(source, line, key)

View File

@ -84,7 +84,7 @@ class ZipNumCluster(CDXSource):
self.loc_mtime = new_mtime self.loc_mtime = new_mtime
logging.debug('Loading loc from: ' + self.loc_filename) logging.debug('Loading loc from: ' + self.loc_filename)
with open(self.loc_filename) as fh: with open(self.loc_filename, 'rb') as fh:
for line in fh: for line in fh:
parts = line.rstrip().split('\t') parts = line.rstrip().split('\t')
self.loc_map[parts[0]] = parts[1:] self.loc_map[parts[0]] = parts[1:]
@ -112,7 +112,7 @@ class ZipNumCluster(CDXSource):
def load_cdx(self, query): def load_cdx(self, query):
self.load_loc() self.load_loc()
reader = open(self.summary) reader = open(self.summary, 'rb')
idx_iter = iter_range(reader, idx_iter = iter_range(reader,
query.key, query.key,

View File

@ -334,7 +334,7 @@ class ProxyRouter(object):
return None return None
buff = '' buff = ''
with open(self.ca.ca_file) as fh: with open(self.ca.ca_file, 'rb') as fh:
buff = fh.read() buff = fh.read()
content_type = 'application/x-x509-ca-cert' content_type = 'application/x-x509-ca-cert'

View File

@ -66,12 +66,12 @@ from pywb import get_test_dir
test_cdx_dir = get_test_dir() + 'cdx/' test_cdx_dir = get_test_dir() + 'cdx/'
def print_binsearch_results(key, iter_func): def print_binsearch_results(key, iter_func):
with open(test_cdx_dir + 'iana.cdx') as cdx: with open(test_cdx_dir + 'iana.cdx', 'rb') as cdx:
for line in iter_func(cdx, key): for line in iter_func(cdx, key):
print line print line
def print_binsearch_results_range(key, end_key, iter_func, prev_size=0): def print_binsearch_results_range(key, end_key, iter_func, prev_size=0):
with open(test_cdx_dir + 'iana.cdx') as cdx: with open(test_cdx_dir + 'iana.cdx', 'rb') as cdx:
for line in iter_func(cdx, key, end_key, prev_size=prev_size): for line in iter_func(cdx, key, end_key, prev_size=prev_size):
print line print line

View File

@ -3,11 +3,11 @@ r"""
#================================================================= #=================================================================
# DecompressingBufferedReader readline() # DecompressingBufferedReader readline()
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rU')).readline() >>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb')).readline()
' CDX N b a m s k r M S V g\n' ' CDX N b a m s k r M S V g\n'
# detect not compressed # detect not compressed
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rU'), decomp_type = 'gzip').readline() >>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb'), decomp_type = 'gzip').readline()
' CDX N b a m s k r M S V g\n' ' CDX N b a m s k r M S V g\n'
# decompress with on the fly compression, default gzip compression # decompress with on the fly compression, default gzip compression

View File

@ -115,7 +115,7 @@ def write_multi_cdx_index(output, inputs, **options):
outpath = cdx_filename(filename) outpath = cdx_filename(filename)
outpath = os.path.join(output, outpath) outpath = os.path.join(output, outpath)
with open(outpath, 'w') as outfile: with open(outpath, 'wb') as outfile:
with open(fullpath, 'rb') as infile: with open(fullpath, 'rb') as infile:
write_cdx_index(outfile, infile, filename, **options) write_cdx_index(outfile, infile, filename, **options)
@ -124,7 +124,7 @@ def write_multi_cdx_index(output, inputs, **options):
if output == '-': if output == '-':
outfile = sys.stdout outfile = sys.stdout
else: else:
outfile = open(output, 'w') outfile = open(output, 'wb')
if options.get('sort'): if options.get('sort'):
writer_cls = SortedCDXWriter writer_cls = SortedCDXWriter

View File

@ -57,7 +57,7 @@ class RedisResolver:
class PathIndexResolver: class PathIndexResolver:
def __init__(self, pathindex_file): def __init__(self, pathindex_file):
self.pathindex_file = pathindex_file self.pathindex_file = pathindex_file
self.reader = open(pathindex_file) self.reader = open(pathindex_file, 'rb')
def __call__(self, filename): def __call__(self, filename):
result = iter_exact(self.reader, filename, '\t') result = iter_exact(self.reader, filename, '\t')

View File

@ -160,7 +160,7 @@ TEST_CDX_DIR = get_test_dir() + 'cdx/'
TEST_WARC_DIR = get_test_dir() + 'warcs/' TEST_WARC_DIR = get_test_dir() + 'warcs/'
def read_fully(cdx): def read_fully(cdx):
with open(TEST_CDX_DIR + cdx, 'rU') as fh: with open(TEST_CDX_DIR + cdx, 'rb') as fh:
curr = BytesIO() curr = BytesIO()
while True: while True:
b = fh.read() b = fh.read()
@ -213,7 +213,7 @@ def cli_lines_with_dir(input_):
print filename print filename
with open(os.path.join(tmp_dir, filename), 'rU') as fh: with open(os.path.join(tmp_dir, filename), 'rb') as fh:
lines = fh.read(8192).rstrip().split('\n') lines = fh.read(8192).rstrip().split('\n')
finally: finally: