mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
windows: fix file path to/from file:// url conversion, add
from_file_url() and use to_file_url() more consistently resolvers: make_best_resolver() handles file:// urls, but not PrefixResolver itself
This commit is contained in:
parent
a870f7e91a
commit
02f8fa9ff3
@ -11,7 +11,6 @@ import requests
|
|||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
import six
|
import six
|
||||||
from six.moves.urllib.request import pathname2url, url2pathname
|
|
||||||
from six.moves.urllib.parse import urljoin, unquote_plus, urlsplit, urlencode
|
from six.moves.urllib.parse import urljoin, unquote_plus, urlsplit, urlencode
|
||||||
|
|
||||||
import time
|
import time
|
||||||
@ -38,8 +37,17 @@ def is_http(filename):
|
|||||||
def to_file_url(filename):
|
def to_file_url(filename):
|
||||||
""" Convert a filename to a file:// url
|
""" Convert a filename to a file:// url
|
||||||
"""
|
"""
|
||||||
url = os.path.abspath(filename)
|
url = 'file://' + os.path.abspath(filename).replace(os.path.sep, '/')
|
||||||
url = urljoin('file:', pathname2url(url))
|
return url
|
||||||
|
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
def from_file_url(url):
|
||||||
|
""" Convert from file:// url to file path
|
||||||
|
"""
|
||||||
|
if url.startswith('file://'):
|
||||||
|
url = url[len('file://'):].replace('/', os.path.sep)
|
||||||
|
|
||||||
return url
|
return url
|
||||||
|
|
||||||
|
|
||||||
@ -259,9 +267,10 @@ class LocalFileLoader(PackageLoader):
|
|||||||
file_only = url.startswith(('/', '.'))
|
file_only = url.startswith(('/', '.'))
|
||||||
|
|
||||||
# convert to filename
|
# convert to filename
|
||||||
if url.startswith('file://'):
|
filename = from_file_url(url)
|
||||||
|
if filename != url:
|
||||||
file_only = True
|
file_only = True
|
||||||
url = url2pathname(url[len('file://'):])
|
url = filename
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# first, try as file
|
# first, try as file
|
||||||
|
@ -5,7 +5,7 @@ from pywb.utils.binsearch import iter_exact
|
|||||||
|
|
||||||
from pywb.warcserver.index.indexsource import RedisIndexSource
|
from pywb.warcserver.index.indexsource import RedisIndexSource
|
||||||
|
|
||||||
from six.moves.urllib.request import url2pathname
|
from pywb.utils.loaders import from_file_url
|
||||||
import six
|
import six
|
||||||
|
|
||||||
import os
|
import os
|
||||||
@ -40,9 +40,7 @@ class PrefixResolver(object):
|
|||||||
if '*' not in path:
|
if '*' not in path:
|
||||||
return path
|
return path
|
||||||
|
|
||||||
if path.startswith('file://'):
|
if '://' in path:
|
||||||
path = path[7:]
|
|
||||||
elif '://' in path:
|
|
||||||
return path
|
return path
|
||||||
|
|
||||||
paths = glob.glob(path)
|
paths = glob.glob(path)
|
||||||
@ -112,8 +110,7 @@ class DefaultResolverMixin(object):
|
|||||||
if path.startswith('redis://'):
|
if path.startswith('redis://'):
|
||||||
return RedisResolver(path)
|
return RedisResolver(path)
|
||||||
|
|
||||||
if path.startswith('file://'):
|
path = from_file_url(path)
|
||||||
path = url2pathname(path[len('file://'):])
|
|
||||||
|
|
||||||
if os.path.isfile(path):
|
if os.path.isfile(path):
|
||||||
return PathIndexResolver(path)
|
return PathIndexResolver(path)
|
||||||
|
@ -31,8 +31,8 @@ class TestPathIndex(object):
|
|||||||
assert res[0] == os.path.join(get_test_dir(), 'warcs', 'example.warc.gz')
|
assert res[0] == os.path.join(get_test_dir(), 'warcs', 'example.warc.gz')
|
||||||
|
|
||||||
def test_resolver_dir_wildcard_as_file_url(self):
|
def test_resolver_dir_wildcard_as_file_url(self):
|
||||||
url = to_file_url(get_test_dir()) + os.path.sep + '*' + os.path.sep
|
url = to_file_url(get_test_dir()) + '/*/'
|
||||||
resolver = PrefixResolver(url)
|
resolver = DefaultResolverMixin.make_best_resolver(url)
|
||||||
|
|
||||||
cdx = CDXObject()
|
cdx = CDXObject()
|
||||||
res = resolver('example.warc.gz', cdx)
|
res = resolver('example.warc.gz', cdx)
|
||||||
|
@ -104,7 +104,7 @@ class TestRecordReplay(CollsDirMixin, BaseConfigTest):
|
|||||||
link_lines = res.text.rstrip().split('\n')
|
link_lines = res.text.rstrip().split('\n')
|
||||||
assert len(link_lines) == 5
|
assert len(link_lines) == 5
|
||||||
|
|
||||||
assert '_test_colls:test2/indexes/autoindex.cdxj' in link_lines[3]
|
assert to_path('_test_colls:test2/indexes/autoindex.cdxj') in link_lines[3]
|
||||||
assert '_test_colls:test/indexes/autoindex.cdxj' in link_lines[4]
|
assert to_path('_test_colls:test/indexes/autoindex.cdxj') in link_lines[4]
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user