diff --git a/pywb/cdx/cdxsource.py b/pywb/cdx/cdxsource.py index ac0eaf74..e3174ab1 100644 --- a/pywb/cdx/cdxsource.py +++ b/pywb/cdx/cdxsource.py @@ -30,7 +30,7 @@ class CDXFile(CDXSource): def load_cdx(self, query): def do_open(): try: - source = open(self.filename) + source = open(self.filename, 'rb') gen = iter_range(source, query.key, query.end_key) for line in gen: yield line diff --git a/pywb/cdx/test/test_redis_source.py b/pywb/cdx/test/test_redis_source.py index 9f5daa8d..a52411dd 100644 --- a/pywb/cdx/test/test_redis_source.py +++ b/pywb/cdx/test/test_redis_source.py @@ -26,7 +26,7 @@ test_cdx_dir = get_test_dir() + 'cdx/' def load_cdx_into_redis(source, filename, key=None): # load a cdx into mock redis - with open(test_cdx_dir + filename) as fh: + with open(test_cdx_dir + filename, 'rb') as fh: for line in fh: zadd_cdx(source, line, key) diff --git a/pywb/cdx/zipnum.py b/pywb/cdx/zipnum.py index 071319a5..87ec1340 100644 --- a/pywb/cdx/zipnum.py +++ b/pywb/cdx/zipnum.py @@ -84,7 +84,7 @@ class ZipNumCluster(CDXSource): self.loc_mtime = new_mtime logging.debug('Loading loc from: ' + self.loc_filename) - with open(self.loc_filename) as fh: + with open(self.loc_filename, 'rb') as fh: for line in fh: parts = line.rstrip().split('\t') self.loc_map[parts[0]] = parts[1:] @@ -112,7 +112,7 @@ class ZipNumCluster(CDXSource): def load_cdx(self, query): self.load_loc() - reader = open(self.summary) + reader = open(self.summary, 'rb') idx_iter = iter_range(reader, query.key, diff --git a/pywb/framework/proxy.py b/pywb/framework/proxy.py index 57a081e8..f8bab933 100644 --- a/pywb/framework/proxy.py +++ b/pywb/framework/proxy.py @@ -334,7 +334,7 @@ class ProxyRouter(object): return None buff = '' - with open(self.ca.ca_file) as fh: + with open(self.ca.ca_file, 'rb') as fh: buff = fh.read() content_type = 'application/x-x509-ca-cert' diff --git a/pywb/utils/test/test_binsearch.py b/pywb/utils/test/test_binsearch.py index c599377e..7fae02ff 100644 --- a/pywb/utils/test/test_binsearch.py +++ b/pywb/utils/test/test_binsearch.py @@ -66,12 +66,12 @@ from pywb import get_test_dir test_cdx_dir = get_test_dir() + 'cdx/' def print_binsearch_results(key, iter_func): - with open(test_cdx_dir + 'iana.cdx') as cdx: + with open(test_cdx_dir + 'iana.cdx', 'rb') as cdx: for line in iter_func(cdx, key): print line def print_binsearch_results_range(key, end_key, iter_func, prev_size=0): - with open(test_cdx_dir + 'iana.cdx') as cdx: + with open(test_cdx_dir + 'iana.cdx', 'rb') as cdx: for line in iter_func(cdx, key, end_key, prev_size=prev_size): print line diff --git a/pywb/utils/test/test_bufferedreaders.py b/pywb/utils/test/test_bufferedreaders.py index 0a249981..cd5f3787 100644 --- a/pywb/utils/test/test_bufferedreaders.py +++ b/pywb/utils/test/test_bufferedreaders.py @@ -3,11 +3,11 @@ r""" #================================================================= # DecompressingBufferedReader readline() ->>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rU')).readline() +>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb')).readline() ' CDX N b a m s k r M S V g\n' # detect not compressed ->>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rU'), decomp_type = 'gzip').readline() +>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb'), decomp_type = 'gzip').readline() ' CDX N b a m s k r M S V g\n' # decompress with on the fly compression, default gzip compression diff --git a/pywb/warc/cdxindexer.py b/pywb/warc/cdxindexer.py index aa432f38..acd492f9 100644 --- a/pywb/warc/cdxindexer.py +++ b/pywb/warc/cdxindexer.py @@ -115,7 +115,7 @@ def write_multi_cdx_index(output, inputs, **options): outpath = cdx_filename(filename) outpath = os.path.join(output, outpath) - with open(outpath, 'w') as outfile: + with open(outpath, 'wb') as outfile: with open(fullpath, 'rb') as infile: write_cdx_index(outfile, infile, filename, **options) @@ -124,7 +124,7 @@ def write_multi_cdx_index(output, inputs, **options): if output == '-': outfile = sys.stdout else: - outfile = open(output, 'w') + outfile = open(output, 'wb') if options.get('sort'): writer_cls = SortedCDXWriter diff --git a/pywb/warc/pathresolvers.py b/pywb/warc/pathresolvers.py index 2d1f7439..6e710533 100644 --- a/pywb/warc/pathresolvers.py +++ b/pywb/warc/pathresolvers.py @@ -57,7 +57,7 @@ class RedisResolver: class PathIndexResolver: def __init__(self, pathindex_file): self.pathindex_file = pathindex_file - self.reader = open(pathindex_file) + self.reader = open(pathindex_file, 'rb') def __call__(self, filename): result = iter_exact(self.reader, filename, '\t') diff --git a/pywb/warc/test/test_indexing.py b/pywb/warc/test/test_indexing.py index 2e704530..7e185a8c 100644 --- a/pywb/warc/test/test_indexing.py +++ b/pywb/warc/test/test_indexing.py @@ -160,7 +160,7 @@ TEST_CDX_DIR = get_test_dir() + 'cdx/' TEST_WARC_DIR = get_test_dir() + 'warcs/' def read_fully(cdx): - with open(TEST_CDX_DIR + cdx, 'rU') as fh: + with open(TEST_CDX_DIR + cdx, 'rb') as fh: curr = BytesIO() while True: b = fh.read() @@ -213,7 +213,7 @@ def cli_lines_with_dir(input_): print filename - with open(os.path.join(tmp_dir, filename), 'rU') as fh: + with open(os.path.join(tmp_dir, filename), 'rb') as fh: lines = fh.read(8192).rstrip().split('\n') finally: