file open() pass: convert all read and write to ensure binary 'b' flag is set (#56)

2025-03-15 00:03:28 +01:00 · 2015-01-11 18:53:47 -08:00 · 2015-01-11 18:53:47 -08:00 · db75bda736
commit db75bda736
parent fb4bf817f7
9 changed files with 14 additions and 14 deletions
--- a/pywb/cdx/cdxsource.py
+++ b/pywb/cdx/cdxsource.py
@ -30,7 +30,7 @@ class CDXFile(CDXSource):
    def load_cdx(self, query):
        def do_open():
            try:
-                source = open(self.filename)
+                source = open(self.filename, 'rb')
                gen = iter_range(source, query.key, query.end_key)
                for line in gen:
                    yield line
--- a/pywb/cdx/test/test_redis_source.py
+++ b/pywb/cdx/test/test_redis_source.py
@ -26,7 +26,7 @@ test_cdx_dir = get_test_dir() + 'cdx/'

 def load_cdx_into_redis(source, filename, key=None):
    # load a cdx into mock redis
-    with open(test_cdx_dir + filename) as fh:
+    with open(test_cdx_dir + filename, 'rb') as fh:
        for line in fh:
            zadd_cdx(source, line, key)

--- a/pywb/cdx/zipnum.py
+++ b/pywb/cdx/zipnum.py
@ -84,7 +84,7 @@ class ZipNumCluster(CDXSource):
        self.loc_mtime = new_mtime

        logging.debug('Loading loc from: ' + self.loc_filename)
-        with open(self.loc_filename) as fh:
+        with open(self.loc_filename, 'rb') as fh:
            for line in fh:
                parts = line.rstrip().split('\t')
                self.loc_map[parts[0]] = parts[1:]
@ -112,7 +112,7 @@ class ZipNumCluster(CDXSource):
    def load_cdx(self, query):
        self.load_loc()

-        reader = open(self.summary)
+        reader = open(self.summary, 'rb')

        idx_iter = iter_range(reader,
                              query.key,
--- a/pywb/framework/proxy.py
+++ b/pywb/framework/proxy.py
@ -334,7 +334,7 @@ class ProxyRouter(object):
                return None

            buff = ''
-            with open(self.ca.ca_file) as fh:
+            with open(self.ca.ca_file, 'rb') as fh:
                buff = fh.read()

            content_type = 'application/x-x509-ca-cert'
--- a/pywb/utils/test/test_binsearch.py
+++ b/pywb/utils/test/test_binsearch.py
@ -66,12 +66,12 @@ from pywb import get_test_dir
 test_cdx_dir = get_test_dir() + 'cdx/'

 def print_binsearch_results(key, iter_func):
-    with open(test_cdx_dir + 'iana.cdx') as cdx:
+    with open(test_cdx_dir + 'iana.cdx', 'rb') as cdx:
        for line in iter_func(cdx, key):
            print line

 def print_binsearch_results_range(key, end_key, iter_func, prev_size=0):
-    with open(test_cdx_dir + 'iana.cdx') as cdx:
+    with open(test_cdx_dir + 'iana.cdx', 'rb') as cdx:
        for line in iter_func(cdx, key, end_key, prev_size=prev_size):
            print line

--- a/pywb/utils/test/test_bufferedreaders.py
+++ b/pywb/utils/test/test_bufferedreaders.py
@ -3,11 +3,11 @@ r"""
 #=================================================================

 # DecompressingBufferedReader readline()
->>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rU')).readline()
+>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb')).readline()
 ' CDX N b a m s k r M S V g\n'

 # detect not compressed
->>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rU'), decomp_type = 'gzip').readline()
+>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb'), decomp_type = 'gzip').readline()
 ' CDX N b a m s k r M S V g\n'

 # decompress with on the fly compression, default gzip compression
--- a/pywb/warc/cdxindexer.py
+++ b/pywb/warc/cdxindexer.py
@ -115,7 +115,7 @@ def write_multi_cdx_index(output, inputs, **options):
            outpath = cdx_filename(filename)
            outpath = os.path.join(output, outpath)

-            with open(outpath, 'w') as outfile:
+            with open(outpath, 'wb') as outfile:
                with open(fullpath, 'rb') as infile:
                    write_cdx_index(outfile, infile, filename, **options)

@ -124,7 +124,7 @@ def write_multi_cdx_index(output, inputs, **options):
        if output == '-':
            outfile = sys.stdout
        else:
-            outfile = open(output, 'w')
+            outfile = open(output, 'wb')

        if options.get('sort'):
            writer_cls = SortedCDXWriter
--- a/pywb/warc/pathresolvers.py
+++ b/pywb/warc/pathresolvers.py
@ -57,7 +57,7 @@ class RedisResolver:
 class PathIndexResolver:
    def __init__(self, pathindex_file):
        self.pathindex_file = pathindex_file
-        self.reader = open(pathindex_file)
+        self.reader = open(pathindex_file, 'rb')

    def __call__(self, filename):
        result = iter_exact(self.reader, filename, '\t')
--- a/pywb/warc/test/test_indexing.py
+++ b/pywb/warc/test/test_indexing.py
@ -160,7 +160,7 @@ TEST_CDX_DIR = get_test_dir() + 'cdx/'
 TEST_WARC_DIR = get_test_dir() + 'warcs/'

 def read_fully(cdx):
-    with open(TEST_CDX_DIR + cdx, 'rU') as fh:
+    with open(TEST_CDX_DIR + cdx, 'rb') as fh:
        curr = BytesIO()
        while True:
            b = fh.read()
@ -213,7 +213,7 @@ def cli_lines_with_dir(input_):

        print filename

-        with open(os.path.join(tmp_dir, filename), 'rU') as fh:
+        with open(os.path.join(tmp_dir, filename), 'rb') as fh:
            lines = fh.read(8192).rstrip().split('\n')

    finally: