mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
warcserver pathresolvers: fix typos, add more comprehensive resolver tests
This commit is contained in:
parent
925f8337a5
commit
01597c1060
@ -10,6 +10,7 @@ import six
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
import logging
|
import logging
|
||||||
|
import glob
|
||||||
|
|
||||||
"""
|
"""
|
||||||
The purpose of this module is to 'resolve' a warc/arc filename,
|
The purpose of this module is to 'resolve' a warc/arc filename,
|
||||||
@ -51,7 +52,7 @@ class PrefixResolver(object):
|
|||||||
return path
|
return path
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return "PrefixResolver('{0}')".format(self.prefix)
|
return "PrefixResolver('{0}')".format(self.template)
|
||||||
|
|
||||||
|
|
||||||
#=============================================================================
|
#=============================================================================
|
||||||
|
@ -11,63 +11,113 @@ from fakeredis import FakeStrictRedis
|
|||||||
from mock import patch
|
from mock import patch
|
||||||
|
|
||||||
|
|
||||||
def test_path_index_resolvers():
|
# ============================================================================
|
||||||
path_index = PathIndexResolver(get_test_dir() + 'text_content/pathindex.txt')
|
class TestPathIndex(object):
|
||||||
|
def test_path_index_resolvers(self):
|
||||||
|
path = os.path.join(get_test_dir(), 'text_content', 'pathindex.txt')
|
||||||
|
path_index = PathIndexResolver(path)
|
||||||
|
|
||||||
cdx = CDXObject()
|
cdx = CDXObject()
|
||||||
assert list(path_index('example.warc.gz', cdx)) == ['invalid_path', 'sample_archive/warcs/example.warc.gz']
|
assert list(path_index('example.warc.gz', cdx)) == ['invalid_path', 'sample_archive/warcs/example.warc.gz']
|
||||||
assert list(path_index('iana.warc.gz', cdx)) == ['sample_archive/warcs/iana.warc.gz']
|
assert list(path_index('iana.warc.gz', cdx)) == ['sample_archive/warcs/iana.warc.gz']
|
||||||
assert list(path_index('not-found.gz', cdx)) == []
|
assert list(path_index('not-found.gz', cdx)) == []
|
||||||
|
|
||||||
|
def test_resolver_dir_wildcard(self):
|
||||||
|
resolver = DefaultResolverMixin.make_best_resolver(os.path.join(get_test_dir(), '*', ''))
|
||||||
|
|
||||||
@patch('redis.StrictRedis', FakeStrictRedis)
|
cdx = CDXObject()
|
||||||
def test_redis_resolver():
|
res = resolver('example.warc.gz', cdx)
|
||||||
resolver = RedisResolver('redis://127.0.0.1:6379/0/warc_map')
|
assert len(res) == 1
|
||||||
|
assert res[0] == os.path.join(get_test_dir(), 'warcs', 'example.warc.gz')
|
||||||
|
|
||||||
cdx = CDXObject()
|
def test_resolver_dir_wildcard_as_file_url(self):
|
||||||
assert resolver('example.warc.gz', cdx) == None
|
url = to_file_url(get_test_dir()) + '/*/'
|
||||||
|
resolver = DefaultResolverMixin.make_best_resolver(url)
|
||||||
|
|
||||||
resolver.redis.hset(resolver.redis_key_template, 'example.warc.gz', 'some_path/example.warc.gz')
|
cdx = CDXObject()
|
||||||
|
res = resolver('example.warc.gz', cdx)
|
||||||
|
assert len(res) == 1
|
||||||
|
assert res[0] == os.path.abspath(os.path.join(get_test_dir(), 'warcs', 'example.warc.gz'))
|
||||||
|
|
||||||
assert resolver('example.warc.gz', cdx) == 'some_path/example.warc.gz'
|
def test_resolver_http_prefix(self):
|
||||||
|
resolver = DefaultResolverMixin.make_best_resolver('http://example.com/prefix/')
|
||||||
|
|
||||||
|
cdx = CDXObject()
|
||||||
|
res = resolver('example.warc.gz', cdx)
|
||||||
|
assert res == 'http://example.com/prefix/example.warc.gz'
|
||||||
|
|
||||||
def test_make_best_resolver_http():
|
def test_resolver_http_prefix_not_wildcard(self):
|
||||||
res = DefaultResolverMixin.make_best_resolver('http://myhost.example.com/warcs/')
|
resolver = DefaultResolverMixin.make_best_resolver('http://example.com/*/')
|
||||||
assert isinstance(res, PrefixResolver)
|
|
||||||
|
|
||||||
|
cdx = CDXObject()
|
||||||
|
res = resolver('example.warc.gz', cdx)
|
||||||
|
assert res == 'http://example.com/*/example.warc.gz'
|
||||||
|
|
||||||
def test_make_best_resolver_redis():
|
@patch('redis.StrictRedis', FakeStrictRedis)
|
||||||
res = DefaultResolverMixin.make_best_resolver('redis://myhost.example.com:1234/1')
|
def test_redis_resolver(self):
|
||||||
assert isinstance(res, RedisResolver)
|
resolver = RedisResolver('redis://127.0.0.1:6379/0/warc_map')
|
||||||
|
|
||||||
|
cdx = CDXObject()
|
||||||
|
assert resolver('example.warc.gz', cdx) == None
|
||||||
|
|
||||||
def test_resolver_dir_and_file():
|
resolver.redis.hset(resolver.redis_key_template, 'example.warc.gz', 'some_path/example.warc.gz')
|
||||||
a_file = os.path.realpath(__file__)
|
|
||||||
a_dir = os.path.dirname(a_file)
|
|
||||||
|
|
||||||
# a file -- assume path index
|
assert resolver('example.warc.gz', cdx) == 'some_path/example.warc.gz'
|
||||||
res = DefaultResolverMixin.make_best_resolver(a_file)
|
|
||||||
assert isinstance(res, PathIndexResolver)
|
|
||||||
|
|
||||||
# a dir -- asume prefix
|
@patch('redis.StrictRedis', FakeStrictRedis)
|
||||||
res = DefaultResolverMixin.make_best_resolver(a_dir)
|
def test_redis_resolver_multi_key(self):
|
||||||
assert isinstance(res, PrefixResolver)
|
resolver = RedisResolver('redis://127.0.0.1:6379/0/*:warc')
|
||||||
|
|
||||||
# not a valid file -- default to prefix
|
cdx = CDXObject()
|
||||||
res = DefaultResolverMixin.make_best_resolver('file://test/x_invalid')
|
assert resolver('example.warc.gz', cdx) == None
|
||||||
assert isinstance(res, PrefixResolver)
|
|
||||||
|
|
||||||
|
resolver.redis.hset('A:warc', 'example.warc.gz', 'some_path/example.warc.gz')
|
||||||
|
resolver.redis.hset('B:warc', 'example-2.warc.gz', 'some_path/example-2.warc.gz')
|
||||||
|
|
||||||
def test_resolver_list():
|
assert resolver('example.warc.gz', cdx) == 'some_path/example.warc.gz'
|
||||||
paths = [to_file_url(os.path.realpath(__file__)),
|
assert resolver('example-2.warc.gz', cdx) == 'some_path/example-2.warc.gz'
|
||||||
'http://myhost.example.com/warcs/',
|
|
||||||
'redis://localhost:1234/0']
|
|
||||||
|
|
||||||
res = DefaultResolverMixin.make_resolvers(paths)
|
def test_make_best_resolver_http(self):
|
||||||
assert isinstance(res[0], PathIndexResolver)
|
res = DefaultResolverMixin.make_best_resolver('http://myhost.example.com/warcs/')
|
||||||
assert isinstance(res[1], PrefixResolver)
|
assert isinstance(res, PrefixResolver)
|
||||||
assert isinstance(res[2], RedisResolver)
|
assert repr(res) == "PrefixResolver('http://myhost.example.com/warcs/')"
|
||||||
|
|
||||||
|
def test_make_best_resolver_redis(self):
|
||||||
|
res = DefaultResolverMixin.make_best_resolver('redis://myhost.example.com:1234/1')
|
||||||
|
assert isinstance(res, RedisResolver)
|
||||||
|
assert repr(res) == "RedisResolver('redis://myhost.example.com:1234/1')"
|
||||||
|
|
||||||
|
def test_make_best_resolver_pathindex(self):
|
||||||
|
path = os.path.join(get_test_dir(), 'text_content', 'pathindex.txt')
|
||||||
|
res = DefaultResolverMixin.make_best_resolver(path)
|
||||||
|
assert isinstance(res, PathIndexResolver)
|
||||||
|
assert repr(res) == "PathIndexResolver('{0}')".format(path)
|
||||||
|
|
||||||
|
def test_resolver_dir_and_file(self):
|
||||||
|
a_file = os.path.realpath(__file__)
|
||||||
|
a_dir = os.path.dirname(a_file)
|
||||||
|
|
||||||
|
# a file -- assume path index
|
||||||
|
res = DefaultResolverMixin.make_best_resolver(a_file)
|
||||||
|
assert isinstance(res, PathIndexResolver)
|
||||||
|
|
||||||
|
# a dir -- asume prefix
|
||||||
|
res = DefaultResolverMixin.make_best_resolver(a_dir)
|
||||||
|
assert isinstance(res, PrefixResolver)
|
||||||
|
|
||||||
|
# not a valid file -- default to prefix
|
||||||
|
res = DefaultResolverMixin.make_best_resolver('file://test/x_invalid')
|
||||||
|
assert isinstance(res, PrefixResolver)
|
||||||
|
|
||||||
|
def test_resolver_list(self):
|
||||||
|
paths = [to_file_url(os.path.realpath(__file__)),
|
||||||
|
'http://myhost.example.com/warcs/',
|
||||||
|
'redis://localhost:1234/0']
|
||||||
|
|
||||||
|
res = DefaultResolverMixin.make_resolvers(paths)
|
||||||
|
assert isinstance(res[0], PathIndexResolver)
|
||||||
|
assert isinstance(res[1], PrefixResolver)
|
||||||
|
assert isinstance(res[2], RedisResolver)
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
|
Loading…
x
Reference in New Issue
Block a user