1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

windows: fix file path to/from file:// url conversion, add

from_file_url() and use to_file_url() more consistently
resolvers: make_best_resolver() handles file:// urls, but not
PrefixResolver itself
This commit is contained in:
Ilya Kreymer 2017-09-28 08:37:04 -07:00
parent a870f7e91a
commit 02f8fa9ff3
4 changed files with 21 additions and 15 deletions

View File

@ -11,7 +11,6 @@ import requests
import yaml
import six
from six.moves.urllib.request import pathname2url, url2pathname
from six.moves.urllib.parse import urljoin, unquote_plus, urlsplit, urlencode
import time
@ -38,8 +37,17 @@ def is_http(filename):
def to_file_url(filename):
""" Convert a filename to a file:// url
"""
url = os.path.abspath(filename)
url = urljoin('file:', pathname2url(url))
url = 'file://' + os.path.abspath(filename).replace(os.path.sep, '/')
return url
#=================================================================
def from_file_url(url):
""" Convert from file:// url to file path
"""
if url.startswith('file://'):
url = url[len('file://'):].replace('/', os.path.sep)
return url
@ -259,9 +267,10 @@ class LocalFileLoader(PackageLoader):
file_only = url.startswith(('/', '.'))
# convert to filename
if url.startswith('file://'):
filename = from_file_url(url)
if filename != url:
file_only = True
url = url2pathname(url[len('file://'):])
url = filename
try:
# first, try as file

View File

@ -5,7 +5,7 @@ from pywb.utils.binsearch import iter_exact
from pywb.warcserver.index.indexsource import RedisIndexSource
from six.moves.urllib.request import url2pathname
from pywb.utils.loaders import from_file_url
import six
import os
@ -40,9 +40,7 @@ class PrefixResolver(object):
if '*' not in path:
return path
if path.startswith('file://'):
path = path[7:]
elif '://' in path:
if '://' in path:
return path
paths = glob.glob(path)
@ -112,8 +110,7 @@ class DefaultResolverMixin(object):
if path.startswith('redis://'):
return RedisResolver(path)
if path.startswith('file://'):
path = url2pathname(path[len('file://'):])
path = from_file_url(path)
if os.path.isfile(path):
return PathIndexResolver(path)

View File

@ -31,8 +31,8 @@ class TestPathIndex(object):
assert res[0] == os.path.join(get_test_dir(), 'warcs', 'example.warc.gz')
def test_resolver_dir_wildcard_as_file_url(self):
url = to_file_url(get_test_dir()) + os.path.sep + '*' + os.path.sep
resolver = PrefixResolver(url)
url = to_file_url(get_test_dir()) + '/*/'
resolver = DefaultResolverMixin.make_best_resolver(url)
cdx = CDXObject()
res = resolver('example.warc.gz', cdx)

View File

@ -104,7 +104,7 @@ class TestRecordReplay(CollsDirMixin, BaseConfigTest):
link_lines = res.text.rstrip().split('\n')
assert len(link_lines) == 5
assert '_test_colls:test2/indexes/autoindex.cdxj' in link_lines[3]
assert '_test_colls:test/indexes/autoindex.cdxj' in link_lines[4]
assert to_path('_test_colls:test2/indexes/autoindex.cdxj') in link_lines[3]
assert to_path('_test_colls:test/indexes/autoindex.cdxj') in link_lines[4]