mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
file open() pass: convert all read and write to ensure binary 'b' flag is set (#56)
This commit is contained in:
parent
fb4bf817f7
commit
db75bda736
@ -30,7 +30,7 @@ class CDXFile(CDXSource):
|
|||||||
def load_cdx(self, query):
|
def load_cdx(self, query):
|
||||||
def do_open():
|
def do_open():
|
||||||
try:
|
try:
|
||||||
source = open(self.filename)
|
source = open(self.filename, 'rb')
|
||||||
gen = iter_range(source, query.key, query.end_key)
|
gen = iter_range(source, query.key, query.end_key)
|
||||||
for line in gen:
|
for line in gen:
|
||||||
yield line
|
yield line
|
||||||
|
@ -26,7 +26,7 @@ test_cdx_dir = get_test_dir() + 'cdx/'
|
|||||||
|
|
||||||
def load_cdx_into_redis(source, filename, key=None):
|
def load_cdx_into_redis(source, filename, key=None):
|
||||||
# load a cdx into mock redis
|
# load a cdx into mock redis
|
||||||
with open(test_cdx_dir + filename) as fh:
|
with open(test_cdx_dir + filename, 'rb') as fh:
|
||||||
for line in fh:
|
for line in fh:
|
||||||
zadd_cdx(source, line, key)
|
zadd_cdx(source, line, key)
|
||||||
|
|
||||||
|
@ -84,7 +84,7 @@ class ZipNumCluster(CDXSource):
|
|||||||
self.loc_mtime = new_mtime
|
self.loc_mtime = new_mtime
|
||||||
|
|
||||||
logging.debug('Loading loc from: ' + self.loc_filename)
|
logging.debug('Loading loc from: ' + self.loc_filename)
|
||||||
with open(self.loc_filename) as fh:
|
with open(self.loc_filename, 'rb') as fh:
|
||||||
for line in fh:
|
for line in fh:
|
||||||
parts = line.rstrip().split('\t')
|
parts = line.rstrip().split('\t')
|
||||||
self.loc_map[parts[0]] = parts[1:]
|
self.loc_map[parts[0]] = parts[1:]
|
||||||
@ -112,7 +112,7 @@ class ZipNumCluster(CDXSource):
|
|||||||
def load_cdx(self, query):
|
def load_cdx(self, query):
|
||||||
self.load_loc()
|
self.load_loc()
|
||||||
|
|
||||||
reader = open(self.summary)
|
reader = open(self.summary, 'rb')
|
||||||
|
|
||||||
idx_iter = iter_range(reader,
|
idx_iter = iter_range(reader,
|
||||||
query.key,
|
query.key,
|
||||||
|
@ -334,7 +334,7 @@ class ProxyRouter(object):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
buff = ''
|
buff = ''
|
||||||
with open(self.ca.ca_file) as fh:
|
with open(self.ca.ca_file, 'rb') as fh:
|
||||||
buff = fh.read()
|
buff = fh.read()
|
||||||
|
|
||||||
content_type = 'application/x-x509-ca-cert'
|
content_type = 'application/x-x509-ca-cert'
|
||||||
|
@ -66,12 +66,12 @@ from pywb import get_test_dir
|
|||||||
test_cdx_dir = get_test_dir() + 'cdx/'
|
test_cdx_dir = get_test_dir() + 'cdx/'
|
||||||
|
|
||||||
def print_binsearch_results(key, iter_func):
|
def print_binsearch_results(key, iter_func):
|
||||||
with open(test_cdx_dir + 'iana.cdx') as cdx:
|
with open(test_cdx_dir + 'iana.cdx', 'rb') as cdx:
|
||||||
for line in iter_func(cdx, key):
|
for line in iter_func(cdx, key):
|
||||||
print line
|
print line
|
||||||
|
|
||||||
def print_binsearch_results_range(key, end_key, iter_func, prev_size=0):
|
def print_binsearch_results_range(key, end_key, iter_func, prev_size=0):
|
||||||
with open(test_cdx_dir + 'iana.cdx') as cdx:
|
with open(test_cdx_dir + 'iana.cdx', 'rb') as cdx:
|
||||||
for line in iter_func(cdx, key, end_key, prev_size=prev_size):
|
for line in iter_func(cdx, key, end_key, prev_size=prev_size):
|
||||||
print line
|
print line
|
||||||
|
|
||||||
|
@ -3,11 +3,11 @@ r"""
|
|||||||
#=================================================================
|
#=================================================================
|
||||||
|
|
||||||
# DecompressingBufferedReader readline()
|
# DecompressingBufferedReader readline()
|
||||||
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rU')).readline()
|
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb')).readline()
|
||||||
' CDX N b a m s k r M S V g\n'
|
' CDX N b a m s k r M S V g\n'
|
||||||
|
|
||||||
# detect not compressed
|
# detect not compressed
|
||||||
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rU'), decomp_type = 'gzip').readline()
|
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb'), decomp_type = 'gzip').readline()
|
||||||
' CDX N b a m s k r M S V g\n'
|
' CDX N b a m s k r M S V g\n'
|
||||||
|
|
||||||
# decompress with on the fly compression, default gzip compression
|
# decompress with on the fly compression, default gzip compression
|
||||||
|
@ -115,7 +115,7 @@ def write_multi_cdx_index(output, inputs, **options):
|
|||||||
outpath = cdx_filename(filename)
|
outpath = cdx_filename(filename)
|
||||||
outpath = os.path.join(output, outpath)
|
outpath = os.path.join(output, outpath)
|
||||||
|
|
||||||
with open(outpath, 'w') as outfile:
|
with open(outpath, 'wb') as outfile:
|
||||||
with open(fullpath, 'rb') as infile:
|
with open(fullpath, 'rb') as infile:
|
||||||
write_cdx_index(outfile, infile, filename, **options)
|
write_cdx_index(outfile, infile, filename, **options)
|
||||||
|
|
||||||
@ -124,7 +124,7 @@ def write_multi_cdx_index(output, inputs, **options):
|
|||||||
if output == '-':
|
if output == '-':
|
||||||
outfile = sys.stdout
|
outfile = sys.stdout
|
||||||
else:
|
else:
|
||||||
outfile = open(output, 'w')
|
outfile = open(output, 'wb')
|
||||||
|
|
||||||
if options.get('sort'):
|
if options.get('sort'):
|
||||||
writer_cls = SortedCDXWriter
|
writer_cls = SortedCDXWriter
|
||||||
|
@ -57,7 +57,7 @@ class RedisResolver:
|
|||||||
class PathIndexResolver:
|
class PathIndexResolver:
|
||||||
def __init__(self, pathindex_file):
|
def __init__(self, pathindex_file):
|
||||||
self.pathindex_file = pathindex_file
|
self.pathindex_file = pathindex_file
|
||||||
self.reader = open(pathindex_file)
|
self.reader = open(pathindex_file, 'rb')
|
||||||
|
|
||||||
def __call__(self, filename):
|
def __call__(self, filename):
|
||||||
result = iter_exact(self.reader, filename, '\t')
|
result = iter_exact(self.reader, filename, '\t')
|
||||||
|
@ -160,7 +160,7 @@ TEST_CDX_DIR = get_test_dir() + 'cdx/'
|
|||||||
TEST_WARC_DIR = get_test_dir() + 'warcs/'
|
TEST_WARC_DIR = get_test_dir() + 'warcs/'
|
||||||
|
|
||||||
def read_fully(cdx):
|
def read_fully(cdx):
|
||||||
with open(TEST_CDX_DIR + cdx, 'rU') as fh:
|
with open(TEST_CDX_DIR + cdx, 'rb') as fh:
|
||||||
curr = BytesIO()
|
curr = BytesIO()
|
||||||
while True:
|
while True:
|
||||||
b = fh.read()
|
b = fh.read()
|
||||||
@ -213,7 +213,7 @@ def cli_lines_with_dir(input_):
|
|||||||
|
|
||||||
print filename
|
print filename
|
||||||
|
|
||||||
with open(os.path.join(tmp_dir, filename), 'rU') as fh:
|
with open(os.path.join(tmp_dir, filename), 'rb') as fh:
|
||||||
lines = fh.read(8192).rstrip().split('\n')
|
lines = fh.read(8192).rstrip().split('\n')
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user