mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
file open() pass: convert all read and write to ensure binary 'b' flag is set (#56)
This commit is contained in:
parent
fb4bf817f7
commit
db75bda736
@ -30,7 +30,7 @@ class CDXFile(CDXSource):
|
||||
def load_cdx(self, query):
|
||||
def do_open():
|
||||
try:
|
||||
source = open(self.filename)
|
||||
source = open(self.filename, 'rb')
|
||||
gen = iter_range(source, query.key, query.end_key)
|
||||
for line in gen:
|
||||
yield line
|
||||
|
@ -26,7 +26,7 @@ test_cdx_dir = get_test_dir() + 'cdx/'
|
||||
|
||||
def load_cdx_into_redis(source, filename, key=None):
|
||||
# load a cdx into mock redis
|
||||
with open(test_cdx_dir + filename) as fh:
|
||||
with open(test_cdx_dir + filename, 'rb') as fh:
|
||||
for line in fh:
|
||||
zadd_cdx(source, line, key)
|
||||
|
||||
|
@ -84,7 +84,7 @@ class ZipNumCluster(CDXSource):
|
||||
self.loc_mtime = new_mtime
|
||||
|
||||
logging.debug('Loading loc from: ' + self.loc_filename)
|
||||
with open(self.loc_filename) as fh:
|
||||
with open(self.loc_filename, 'rb') as fh:
|
||||
for line in fh:
|
||||
parts = line.rstrip().split('\t')
|
||||
self.loc_map[parts[0]] = parts[1:]
|
||||
@ -112,7 +112,7 @@ class ZipNumCluster(CDXSource):
|
||||
def load_cdx(self, query):
|
||||
self.load_loc()
|
||||
|
||||
reader = open(self.summary)
|
||||
reader = open(self.summary, 'rb')
|
||||
|
||||
idx_iter = iter_range(reader,
|
||||
query.key,
|
||||
|
@ -334,7 +334,7 @@ class ProxyRouter(object):
|
||||
return None
|
||||
|
||||
buff = ''
|
||||
with open(self.ca.ca_file) as fh:
|
||||
with open(self.ca.ca_file, 'rb') as fh:
|
||||
buff = fh.read()
|
||||
|
||||
content_type = 'application/x-x509-ca-cert'
|
||||
|
@ -66,12 +66,12 @@ from pywb import get_test_dir
|
||||
test_cdx_dir = get_test_dir() + 'cdx/'
|
||||
|
||||
def print_binsearch_results(key, iter_func):
|
||||
with open(test_cdx_dir + 'iana.cdx') as cdx:
|
||||
with open(test_cdx_dir + 'iana.cdx', 'rb') as cdx:
|
||||
for line in iter_func(cdx, key):
|
||||
print line
|
||||
|
||||
def print_binsearch_results_range(key, end_key, iter_func, prev_size=0):
|
||||
with open(test_cdx_dir + 'iana.cdx') as cdx:
|
||||
with open(test_cdx_dir + 'iana.cdx', 'rb') as cdx:
|
||||
for line in iter_func(cdx, key, end_key, prev_size=prev_size):
|
||||
print line
|
||||
|
||||
|
@ -3,11 +3,11 @@ r"""
|
||||
#=================================================================
|
||||
|
||||
# DecompressingBufferedReader readline()
|
||||
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rU')).readline()
|
||||
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb')).readline()
|
||||
' CDX N b a m s k r M S V g\n'
|
||||
|
||||
# detect not compressed
|
||||
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rU'), decomp_type = 'gzip').readline()
|
||||
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb'), decomp_type = 'gzip').readline()
|
||||
' CDX N b a m s k r M S V g\n'
|
||||
|
||||
# decompress with on the fly compression, default gzip compression
|
||||
|
@ -115,7 +115,7 @@ def write_multi_cdx_index(output, inputs, **options):
|
||||
outpath = cdx_filename(filename)
|
||||
outpath = os.path.join(output, outpath)
|
||||
|
||||
with open(outpath, 'w') as outfile:
|
||||
with open(outpath, 'wb') as outfile:
|
||||
with open(fullpath, 'rb') as infile:
|
||||
write_cdx_index(outfile, infile, filename, **options)
|
||||
|
||||
@ -124,7 +124,7 @@ def write_multi_cdx_index(output, inputs, **options):
|
||||
if output == '-':
|
||||
outfile = sys.stdout
|
||||
else:
|
||||
outfile = open(output, 'w')
|
||||
outfile = open(output, 'wb')
|
||||
|
||||
if options.get('sort'):
|
||||
writer_cls = SortedCDXWriter
|
||||
|
@ -57,7 +57,7 @@ class RedisResolver:
|
||||
class PathIndexResolver:
|
||||
def __init__(self, pathindex_file):
|
||||
self.pathindex_file = pathindex_file
|
||||
self.reader = open(pathindex_file)
|
||||
self.reader = open(pathindex_file, 'rb')
|
||||
|
||||
def __call__(self, filename):
|
||||
result = iter_exact(self.reader, filename, '\t')
|
||||
|
@ -160,7 +160,7 @@ TEST_CDX_DIR = get_test_dir() + 'cdx/'
|
||||
TEST_WARC_DIR = get_test_dir() + 'warcs/'
|
||||
|
||||
def read_fully(cdx):
|
||||
with open(TEST_CDX_DIR + cdx, 'rU') as fh:
|
||||
with open(TEST_CDX_DIR + cdx, 'rb') as fh:
|
||||
curr = BytesIO()
|
||||
while True:
|
||||
b = fh.read()
|
||||
@ -213,7 +213,7 @@ def cli_lines_with_dir(input_):
|
||||
|
||||
print filename
|
||||
|
||||
with open(os.path.join(tmp_dir, filename), 'rU') as fh:
|
||||
with open(os.path.join(tmp_dir, filename), 'rb') as fh:
|
||||
lines = fh.read(8192).rstrip().split('\n')
|
||||
|
||||
finally:
|
||||
|
Loading…
x
Reference in New Issue
Block a user