diff --git a/pywb/rewrite/rewrite_live.py b/pywb/rewrite/rewrite_live.py index 0c9ff62c..e6860d22 100644 --- a/pywb/rewrite/rewrite_live.py +++ b/pywb/rewrite/rewrite_live.py @@ -8,10 +8,9 @@ import mimetypes import logging import os -from urlparse import urlsplit, urljoin -from urllib import pathname2url +from urlparse import urlsplit -from pywb.utils.loaders import is_http, LimitReader, BlockLoader +from pywb.utils.loaders import is_http, LimitReader, BlockLoader, to_file_url from pywb.utils.loaders import extract_client_cookie from pywb.utils.timeutils import datetime_to_timestamp from pywb.utils.statusandheaders import StatusAndHeaders @@ -187,8 +186,7 @@ class LiveRewriter(object): else: is_remote = False if not url.startswith('file:'): - url = os.path.abspath(url) - url = urljoin('file:', pathname2url(url)) + url = to_file_url(url) # explicit urlkey may be passed in (say for testing) if not urlkey: diff --git a/pywb/utils/loaders.py b/pywb/utils/loaders.py index 6ef1355e..34eca14b 100644 --- a/pywb/utils/loaders.py +++ b/pywb/utils/loaders.py @@ -7,6 +7,7 @@ import os import hmac import urllib import urllib2 +import urlparse import time import pkg_resources from io import open @@ -17,6 +18,15 @@ def is_http(filename): return filename.startswith(('http://', 'https://')) +#================================================================= +def to_file_url(filename): + """ Convert a filename to a file:// url + """ + url = os.path.abspath(filename) + url = urlparse.urljoin('file:', urllib.pathname2url(url)) + return url + + #================================================================= def load_yaml_config(config_file): import yaml @@ -129,13 +139,14 @@ class BlockLoader(object): # if starting with . or /, can only be a file path.. file_only = url.startswith(('/', '.')) + # convert to filename + if url.startswith('file://'): + file_only = True + url = urllib.url2pathname(url[len('file://'):]) + try: # first, try as file - if url.startswith('file://'): - file_only = True - afile = urllib.urlopen(url) - else: - afile = open(url, 'rb') + afile = open(url, 'rb') except IOError: if file_only: diff --git a/pywb/utils/test/test_loaders.py b/pywb/utils/test/test_loaders.py index a765808e..312af81b 100644 --- a/pywb/utils/test/test_loaders.py +++ b/pywb/utils/test/test_loaders.py @@ -25,7 +25,7 @@ True 100 # no length specified, read full amount requested ->>> len(BlockLoader().load(to_local_url(test_cdx_dir + 'example.cdx'), 0, -1).read(400)) +>>> len(BlockLoader().load(to_file_url(test_cdx_dir + 'example.cdx'), 0, -1).read(400)) 400 # HMAC Cookie Maker @@ -63,11 +63,9 @@ True import re import os from io import BytesIO -from pywb.utils.loaders import BlockLoader, HMACCookieMaker +from pywb.utils.loaders import BlockLoader, HMACCookieMaker, to_file_url from pywb.utils.loaders import LimitReader, extract_client_cookie -from urllib import pathname2url - from pywb import get_test_dir test_cdx_dir = get_test_dir() + 'cdx/' @@ -84,9 +82,6 @@ def seek_read_full(seekable_reader, offset): seekable_reader.readline() #skip return seekable_reader.readline() -def to_local_url(filename): - filename = os.path.abspath(filename) - return 'file://' + pathname2url(filename) if __name__ == "__main__": import doctest diff --git a/pywb/warc/test/test_pathresolvers.py b/pywb/warc/test/test_pathresolvers.py index f60eafaa..d00f3348 100644 --- a/pywb/warc/test/test_pathresolvers.py +++ b/pywb/warc/test/test_pathresolvers.py @@ -33,13 +33,13 @@ PrefixResolver('http://myhost.example.com/warcs/', contains = '/') RedisResolver('redis://myhost.example.com:1234/1') # a file ->>> r = make_best_resolver(to_local_url(os.path.realpath(__file__))) +>>> r = make_best_resolver(to_file_url(os.path.realpath(__file__))) >>> r.__class__.__name__ 'PathIndexResolver' # a dir >>> path = os.path.realpath(__file__) ->>> r = make_best_resolver(to_local_url(os.path.dirname(path))) +>>> r = make_best_resolver(to_file_url(os.path.dirname(path))) >>> r.__class__.__name__ 'PrefixResolver' @@ -54,9 +54,9 @@ RedisResolver('redis://myhost.example.com:1234/1') from pywb import get_test_dir from pywb.warc.pathresolvers import PrefixResolver, PathIndexResolver, RedisResolver from pywb.warc.pathresolvers import make_best_resolver, make_best_resolvers -import os +from pywb.utils.loaders import to_file_url -from urllib import pathname2url +import os from fakeredis import FakeStrictRedis from mock import patch @@ -69,12 +69,6 @@ def init_redis_resolver(): def hset_path(filename, path): redis_resolver.redis.hset(redis_resolver.key_prefix + filename, 'path', path) -def to_local_url(filename): - filename = os.path.abspath(filename) - res = 'file:' + pathname2url(filename) - #print(res) - return res - redis_resolver = init_redis_resolver() #=================================================================