mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
loaders: add to_file_url() for converting between filename and file://,
used in live rewrite and tests
This commit is contained in:
parent
ba853a4eae
commit
cf0a21509b
@ -8,10 +8,9 @@ import mimetypes
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from urlparse import urlsplit, urljoin
|
from urlparse import urlsplit
|
||||||
from urllib import pathname2url
|
|
||||||
|
|
||||||
from pywb.utils.loaders import is_http, LimitReader, BlockLoader
|
from pywb.utils.loaders import is_http, LimitReader, BlockLoader, to_file_url
|
||||||
from pywb.utils.loaders import extract_client_cookie
|
from pywb.utils.loaders import extract_client_cookie
|
||||||
from pywb.utils.timeutils import datetime_to_timestamp
|
from pywb.utils.timeutils import datetime_to_timestamp
|
||||||
from pywb.utils.statusandheaders import StatusAndHeaders
|
from pywb.utils.statusandheaders import StatusAndHeaders
|
||||||
@ -187,8 +186,7 @@ class LiveRewriter(object):
|
|||||||
else:
|
else:
|
||||||
is_remote = False
|
is_remote = False
|
||||||
if not url.startswith('file:'):
|
if not url.startswith('file:'):
|
||||||
url = os.path.abspath(url)
|
url = to_file_url(url)
|
||||||
url = urljoin('file:', pathname2url(url))
|
|
||||||
|
|
||||||
# explicit urlkey may be passed in (say for testing)
|
# explicit urlkey may be passed in (say for testing)
|
||||||
if not urlkey:
|
if not urlkey:
|
||||||
|
@ -7,6 +7,7 @@ import os
|
|||||||
import hmac
|
import hmac
|
||||||
import urllib
|
import urllib
|
||||||
import urllib2
|
import urllib2
|
||||||
|
import urlparse
|
||||||
import time
|
import time
|
||||||
import pkg_resources
|
import pkg_resources
|
||||||
from io import open
|
from io import open
|
||||||
@ -17,6 +18,15 @@ def is_http(filename):
|
|||||||
return filename.startswith(('http://', 'https://'))
|
return filename.startswith(('http://', 'https://'))
|
||||||
|
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
def to_file_url(filename):
|
||||||
|
""" Convert a filename to a file:// url
|
||||||
|
"""
|
||||||
|
url = os.path.abspath(filename)
|
||||||
|
url = urlparse.urljoin('file:', urllib.pathname2url(url))
|
||||||
|
return url
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
def load_yaml_config(config_file):
|
def load_yaml_config(config_file):
|
||||||
import yaml
|
import yaml
|
||||||
@ -129,13 +139,14 @@ class BlockLoader(object):
|
|||||||
# if starting with . or /, can only be a file path..
|
# if starting with . or /, can only be a file path..
|
||||||
file_only = url.startswith(('/', '.'))
|
file_only = url.startswith(('/', '.'))
|
||||||
|
|
||||||
|
# convert to filename
|
||||||
|
if url.startswith('file://'):
|
||||||
|
file_only = True
|
||||||
|
url = urllib.url2pathname(url[len('file://'):])
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# first, try as file
|
# first, try as file
|
||||||
if url.startswith('file://'):
|
afile = open(url, 'rb')
|
||||||
file_only = True
|
|
||||||
afile = urllib.urlopen(url)
|
|
||||||
else:
|
|
||||||
afile = open(url, 'rb')
|
|
||||||
|
|
||||||
except IOError:
|
except IOError:
|
||||||
if file_only:
|
if file_only:
|
||||||
|
@ -25,7 +25,7 @@ True
|
|||||||
100
|
100
|
||||||
|
|
||||||
# no length specified, read full amount requested
|
# no length specified, read full amount requested
|
||||||
>>> len(BlockLoader().load(to_local_url(test_cdx_dir + 'example.cdx'), 0, -1).read(400))
|
>>> len(BlockLoader().load(to_file_url(test_cdx_dir + 'example.cdx'), 0, -1).read(400))
|
||||||
400
|
400
|
||||||
|
|
||||||
# HMAC Cookie Maker
|
# HMAC Cookie Maker
|
||||||
@ -63,11 +63,9 @@ True
|
|||||||
import re
|
import re
|
||||||
import os
|
import os
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from pywb.utils.loaders import BlockLoader, HMACCookieMaker
|
from pywb.utils.loaders import BlockLoader, HMACCookieMaker, to_file_url
|
||||||
from pywb.utils.loaders import LimitReader, extract_client_cookie
|
from pywb.utils.loaders import LimitReader, extract_client_cookie
|
||||||
|
|
||||||
from urllib import pathname2url
|
|
||||||
|
|
||||||
from pywb import get_test_dir
|
from pywb import get_test_dir
|
||||||
|
|
||||||
test_cdx_dir = get_test_dir() + 'cdx/'
|
test_cdx_dir = get_test_dir() + 'cdx/'
|
||||||
@ -84,9 +82,6 @@ def seek_read_full(seekable_reader, offset):
|
|||||||
seekable_reader.readline() #skip
|
seekable_reader.readline() #skip
|
||||||
return seekable_reader.readline()
|
return seekable_reader.readline()
|
||||||
|
|
||||||
def to_local_url(filename):
|
|
||||||
filename = os.path.abspath(filename)
|
|
||||||
return 'file://' + pathname2url(filename)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import doctest
|
import doctest
|
||||||
|
@ -33,13 +33,13 @@ PrefixResolver('http://myhost.example.com/warcs/', contains = '/')
|
|||||||
RedisResolver('redis://myhost.example.com:1234/1')
|
RedisResolver('redis://myhost.example.com:1234/1')
|
||||||
|
|
||||||
# a file
|
# a file
|
||||||
>>> r = make_best_resolver(to_local_url(os.path.realpath(__file__)))
|
>>> r = make_best_resolver(to_file_url(os.path.realpath(__file__)))
|
||||||
>>> r.__class__.__name__
|
>>> r.__class__.__name__
|
||||||
'PathIndexResolver'
|
'PathIndexResolver'
|
||||||
|
|
||||||
# a dir
|
# a dir
|
||||||
>>> path = os.path.realpath(__file__)
|
>>> path = os.path.realpath(__file__)
|
||||||
>>> r = make_best_resolver(to_local_url(os.path.dirname(path)))
|
>>> r = make_best_resolver(to_file_url(os.path.dirname(path)))
|
||||||
>>> r.__class__.__name__
|
>>> r.__class__.__name__
|
||||||
'PrefixResolver'
|
'PrefixResolver'
|
||||||
|
|
||||||
@ -54,9 +54,9 @@ RedisResolver('redis://myhost.example.com:1234/1')
|
|||||||
from pywb import get_test_dir
|
from pywb import get_test_dir
|
||||||
from pywb.warc.pathresolvers import PrefixResolver, PathIndexResolver, RedisResolver
|
from pywb.warc.pathresolvers import PrefixResolver, PathIndexResolver, RedisResolver
|
||||||
from pywb.warc.pathresolvers import make_best_resolver, make_best_resolvers
|
from pywb.warc.pathresolvers import make_best_resolver, make_best_resolvers
|
||||||
import os
|
from pywb.utils.loaders import to_file_url
|
||||||
|
|
||||||
from urllib import pathname2url
|
import os
|
||||||
|
|
||||||
from fakeredis import FakeStrictRedis
|
from fakeredis import FakeStrictRedis
|
||||||
from mock import patch
|
from mock import patch
|
||||||
@ -69,12 +69,6 @@ def init_redis_resolver():
|
|||||||
def hset_path(filename, path):
|
def hset_path(filename, path):
|
||||||
redis_resolver.redis.hset(redis_resolver.key_prefix + filename, 'path', path)
|
redis_resolver.redis.hset(redis_resolver.key_prefix + filename, 'path', path)
|
||||||
|
|
||||||
def to_local_url(filename):
|
|
||||||
filename = os.path.abspath(filename)
|
|
||||||
res = 'file:' + pathname2url(filename)
|
|
||||||
#print(res)
|
|
||||||
return res
|
|
||||||
|
|
||||||
redis_resolver = init_redis_resolver()
|
redis_resolver = init_redis_resolver()
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
|
Loading…
x
Reference in New Issue
Block a user