1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

loaders: add to_file_url() for converting between filename and file://,

used in live rewrite and tests
This commit is contained in:
Ilya Kreymer 2015-01-11 13:05:48 -08:00
parent ba853a4eae
commit cf0a21509b
4 changed files with 25 additions and 27 deletions

View File

@ -8,10 +8,9 @@ import mimetypes
import logging
import os
from urlparse import urlsplit, urljoin
from urllib import pathname2url
from urlparse import urlsplit
from pywb.utils.loaders import is_http, LimitReader, BlockLoader
from pywb.utils.loaders import is_http, LimitReader, BlockLoader, to_file_url
from pywb.utils.loaders import extract_client_cookie
from pywb.utils.timeutils import datetime_to_timestamp
from pywb.utils.statusandheaders import StatusAndHeaders
@ -187,8 +186,7 @@ class LiveRewriter(object):
else:
is_remote = False
if not url.startswith('file:'):
url = os.path.abspath(url)
url = urljoin('file:', pathname2url(url))
url = to_file_url(url)
# explicit urlkey may be passed in (say for testing)
if not urlkey:

View File

@ -7,6 +7,7 @@ import os
import hmac
import urllib
import urllib2
import urlparse
import time
import pkg_resources
from io import open
@ -17,6 +18,15 @@ def is_http(filename):
return filename.startswith(('http://', 'https://'))
#=================================================================
def to_file_url(filename):
""" Convert a filename to a file:// url
"""
url = os.path.abspath(filename)
url = urlparse.urljoin('file:', urllib.pathname2url(url))
return url
#=================================================================
def load_yaml_config(config_file):
import yaml
@ -129,13 +139,14 @@ class BlockLoader(object):
# if starting with . or /, can only be a file path..
file_only = url.startswith(('/', '.'))
# convert to filename
if url.startswith('file://'):
file_only = True
url = urllib.url2pathname(url[len('file://'):])
try:
# first, try as file
if url.startswith('file://'):
file_only = True
afile = urllib.urlopen(url)
else:
afile = open(url, 'rb')
afile = open(url, 'rb')
except IOError:
if file_only:

View File

@ -25,7 +25,7 @@ True
100
# no length specified, read full amount requested
>>> len(BlockLoader().load(to_local_url(test_cdx_dir + 'example.cdx'), 0, -1).read(400))
>>> len(BlockLoader().load(to_file_url(test_cdx_dir + 'example.cdx'), 0, -1).read(400))
400
# HMAC Cookie Maker
@ -63,11 +63,9 @@ True
import re
import os
from io import BytesIO
from pywb.utils.loaders import BlockLoader, HMACCookieMaker
from pywb.utils.loaders import BlockLoader, HMACCookieMaker, to_file_url
from pywb.utils.loaders import LimitReader, extract_client_cookie
from urllib import pathname2url
from pywb import get_test_dir
test_cdx_dir = get_test_dir() + 'cdx/'
@ -84,9 +82,6 @@ def seek_read_full(seekable_reader, offset):
seekable_reader.readline() #skip
return seekable_reader.readline()
def to_local_url(filename):
filename = os.path.abspath(filename)
return 'file://' + pathname2url(filename)
if __name__ == "__main__":
import doctest

View File

@ -33,13 +33,13 @@ PrefixResolver('http://myhost.example.com/warcs/', contains = '/')
RedisResolver('redis://myhost.example.com:1234/1')
# a file
>>> r = make_best_resolver(to_local_url(os.path.realpath(__file__)))
>>> r = make_best_resolver(to_file_url(os.path.realpath(__file__)))
>>> r.__class__.__name__
'PathIndexResolver'
# a dir
>>> path = os.path.realpath(__file__)
>>> r = make_best_resolver(to_local_url(os.path.dirname(path)))
>>> r = make_best_resolver(to_file_url(os.path.dirname(path)))
>>> r.__class__.__name__
'PrefixResolver'
@ -54,9 +54,9 @@ RedisResolver('redis://myhost.example.com:1234/1')
from pywb import get_test_dir
from pywb.warc.pathresolvers import PrefixResolver, PathIndexResolver, RedisResolver
from pywb.warc.pathresolvers import make_best_resolver, make_best_resolvers
import os
from pywb.utils.loaders import to_file_url
from urllib import pathname2url
import os
from fakeredis import FakeStrictRedis
from mock import patch
@ -69,12 +69,6 @@ def init_redis_resolver():
def hset_path(filename, path):
redis_resolver.redis.hset(redis_resolver.key_prefix + filename, 'path', path)
def to_local_url(filename):
filename = os.path.abspath(filename)
res = 'file:' + pathname2url(filename)
#print(res)
return res
redis_resolver = init_redis_resolver()
#=================================================================