windows support work: fix loaders to use pathname2url to convert to

file:/// url, use urlopen to open file paths fix some tests to use universal line breaks
2025-03-15 00:03:28 +01:00 · 2015-01-10 14:06:15 -08:00 · 2015-01-10 14:06:15 -08:00 · 1eb0f96f92
commit 1eb0f96f92
parent 06b2ea498e
6 changed files with 31 additions and 18 deletions
--- a/pywb/rewrite/rewrite_live.py
+++ b/pywb/rewrite/rewrite_live.py
@ -6,8 +6,10 @@ import requests
 import datetime
 import mimetypes
 import logging
 import os
-from urlparse import urlsplit
+from urlparse import urlsplit, urljoin
 from urllib import pathname2url
 from pywb.utils.loaders import is_http, LimitReader, BlockLoader
 from pywb.utils.loaders import extract_client_cookie
@ -180,16 +182,25 @@ class LiveRewriter(object):
        if url.startswith('//'):
            url = 'http:' + url
        if is_http(url):
            is_remote = True
        else:
            is_remote = False
            if not url.startswith('file:'):
                url = os.path.abspath(url)
                url = urljoin('file:', pathname2url(url))
                print(url)
        # explicit urlkey may be passed in (say for testing)
        if not urlkey:
            urlkey = canonicalize(url)
-        if is_http(url):
+        if is_remote:
            (status_headers, stream) = self.fetch_http(url, urlkey, env,
                                                       req_headers,
                                                       follow_redirects,
                                                       ignore_proxies)
-        else:
+        else:   
            (status_headers, stream) = self.fetch_local_file(url)
        if timestamp is None:
--- a/pywb/utils/loaders.py
+++ b/pywb/utils/loaders.py
@ -129,13 +129,13 @@ class BlockLoader(object):
        # if starting with . or /, can only be a file path..
        file_only = url.startswith(('/', '.'))
        if url.startswith('file://'):
            url = url[len('file://'):]
            file_only = True
        try:
            # first, try as file
-            afile = open(url, 'rb')
+            if url.startswith('file://'):
                file_only = True
                afile = urllib.urlopen(url)
            else:
                afile = open(url, 'rb')
        except IOError:
            if file_only:
--- a/pywb/utils/test/test_bufferedreaders.py
+++ b/pywb/utils/test/test_bufferedreaders.py
@ -3,11 +3,11 @@ r"""
 #=================================================================
 # DecompressingBufferedReader readline()
->>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb')).readline()
+>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rU')).readline()
 ' CDX N b a m s k r M S V g\n'
 # detect not compressed
->>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb'), decomp_type = 'gzip').readline()
+>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rU'), decomp_type = 'gzip').readline()
 ' CDX N b a m s k r M S V g\n'
 # decompress with on the fly compression, default gzip compression
--- a/pywb/utils/test/test_loaders.py
+++ b/pywb/utils/test/test_loaders.py
@ -25,7 +25,7 @@ True
 100
 # no length specified, read full amount requested
->>> len(BlockLoader().load('file://' + test_cdx_dir + 'example.cdx', 0, -1).read(400))
+>>> len(BlockLoader().load('file:' + pathname2url(test_cdx_dir + 'example.cdx'), 0, -1).read(400))
 400
 # HMAC Cookie Maker
@ -65,6 +65,8 @@ from io import BytesIO
 from pywb.utils.loaders import BlockLoader, HMACCookieMaker
 from pywb.utils.loaders import LimitReader, extract_client_cookie
 from urllib import pathname2url
 from pywb import get_test_dir
 test_cdx_dir = get_test_dir() + 'cdx/'
--- a/pywb/warc/test/test_indexing.py
+++ b/pywb/warc/test/test_indexing.py
@ -160,7 +160,7 @@ TEST_CDX_DIR = get_test_dir() + 'cdx/'
 TEST_WARC_DIR = get_test_dir() + 'warcs/'
 def read_fully(cdx):
-    with open(TEST_CDX_DIR + cdx) as fh:
+    with open(TEST_CDX_DIR + cdx, 'rU') as fh:
        curr = BytesIO()
        while True:
            b = fh.read()
@ -172,7 +172,7 @@ def read_fully(cdx):
 def cdx_index(warc, **options):
    buff = BytesIO()
-    with open(TEST_WARC_DIR + warc) as fh:
+    with open(TEST_WARC_DIR + warc, 'rU') as fh:
        write_cdx_index(buff, fh,  warc, **options)
    return buff.getvalue()
@ -213,7 +213,7 @@ def cli_lines_with_dir(input_):
        print filename
-        with open(os.path.join(tmp_dir, filename), 'r') as fh:
+        with open(os.path.join(tmp_dir, filename), 'rU') as fh:
            lines = fh.read(8192).rstrip().split('\n')
    finally:
--- a/setup.py
+++ b/setup.py
@ -58,10 +58,10 @@ setup(
        'pywb': ['static/flowplayer/*', 'static/*.*', 'ui/*', '*.yaml'],
        },
    data_files=[
-        ('sample_archive/cdx/', glob.glob('sample_archive/cdx/*')),
+        ('sample_archive/cdx', glob.glob('sample_archive/cdx/*')),
-        ('sample_archive/zipcdx/', glob.glob('sample_archive/zipcdx/*')),
+        ('sample_archive/zipcdx', glob.glob('sample_archive/zipcdx/*')),
-        ('sample_archive/warcs/', glob.glob('sample_archive/warcs/*')),
+        ('sample_archive/warcs', glob.glob('sample_archive/warcs/*')),
-        ('sample_archive/text_content/',
+        ('sample_archive/text_content',
            glob.glob('sample_archive/text_content/*')),
        ],
    install_requires=[