From 01597c1060b5edee9f1bbcf3e114a8f3d1d98a95 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Wed, 27 Sep 2017 23:30:08 -0700 Subject: [PATCH] warcserver pathresolvers: fix typos, add more comprehensive resolver tests --- pywb/warcserver/resource/pathresolvers.py | 3 +- .../resource/test/test_pathresolvers.py | 128 ++++++++++++------ 2 files changed, 91 insertions(+), 40 deletions(-) diff --git a/pywb/warcserver/resource/pathresolvers.py b/pywb/warcserver/resource/pathresolvers.py index 8c5767e1..3fa69c7b 100644 --- a/pywb/warcserver/resource/pathresolvers.py +++ b/pywb/warcserver/resource/pathresolvers.py @@ -10,6 +10,7 @@ import six import os import logging +import glob """ The purpose of this module is to 'resolve' a warc/arc filename, @@ -51,7 +52,7 @@ class PrefixResolver(object): return path def __repr__(self): - return "PrefixResolver('{0}')".format(self.prefix) + return "PrefixResolver('{0}')".format(self.template) #============================================================================= diff --git a/pywb/warcserver/resource/test/test_pathresolvers.py b/pywb/warcserver/resource/test/test_pathresolvers.py index 4566d511..013f550c 100644 --- a/pywb/warcserver/resource/test/test_pathresolvers.py +++ b/pywb/warcserver/resource/test/test_pathresolvers.py @@ -11,63 +11,113 @@ from fakeredis import FakeStrictRedis from mock import patch -def test_path_index_resolvers(): - path_index = PathIndexResolver(get_test_dir() + 'text_content/pathindex.txt') +# ============================================================================ +class TestPathIndex(object): + def test_path_index_resolvers(self): + path = os.path.join(get_test_dir(), 'text_content', 'pathindex.txt') + path_index = PathIndexResolver(path) - cdx = CDXObject() - assert list(path_index('example.warc.gz', cdx)) == ['invalid_path', 'sample_archive/warcs/example.warc.gz'] - assert list(path_index('iana.warc.gz', cdx)) == ['sample_archive/warcs/iana.warc.gz'] - assert list(path_index('not-found.gz', cdx)) == [] + cdx = CDXObject() + assert list(path_index('example.warc.gz', cdx)) == ['invalid_path', 'sample_archive/warcs/example.warc.gz'] + assert list(path_index('iana.warc.gz', cdx)) == ['sample_archive/warcs/iana.warc.gz'] + assert list(path_index('not-found.gz', cdx)) == [] + def test_resolver_dir_wildcard(self): + resolver = DefaultResolverMixin.make_best_resolver(os.path.join(get_test_dir(), '*', '')) -@patch('redis.StrictRedis', FakeStrictRedis) -def test_redis_resolver(): - resolver = RedisResolver('redis://127.0.0.1:6379/0/warc_map') + cdx = CDXObject() + res = resolver('example.warc.gz', cdx) + assert len(res) == 1 + assert res[0] == os.path.join(get_test_dir(), 'warcs', 'example.warc.gz') - cdx = CDXObject() - assert resolver('example.warc.gz', cdx) == None + def test_resolver_dir_wildcard_as_file_url(self): + url = to_file_url(get_test_dir()) + '/*/' + resolver = DefaultResolverMixin.make_best_resolver(url) - resolver.redis.hset(resolver.redis_key_template, 'example.warc.gz', 'some_path/example.warc.gz') + cdx = CDXObject() + res = resolver('example.warc.gz', cdx) + assert len(res) == 1 + assert res[0] == os.path.abspath(os.path.join(get_test_dir(), 'warcs', 'example.warc.gz')) - assert resolver('example.warc.gz', cdx) == 'some_path/example.warc.gz' + def test_resolver_http_prefix(self): + resolver = DefaultResolverMixin.make_best_resolver('http://example.com/prefix/') + cdx = CDXObject() + res = resolver('example.warc.gz', cdx) + assert res == 'http://example.com/prefix/example.warc.gz' -def test_make_best_resolver_http(): - res = DefaultResolverMixin.make_best_resolver('http://myhost.example.com/warcs/') - assert isinstance(res, PrefixResolver) + def test_resolver_http_prefix_not_wildcard(self): + resolver = DefaultResolverMixin.make_best_resolver('http://example.com/*/') + cdx = CDXObject() + res = resolver('example.warc.gz', cdx) + assert res == 'http://example.com/*/example.warc.gz' -def test_make_best_resolver_redis(): - res = DefaultResolverMixin.make_best_resolver('redis://myhost.example.com:1234/1') - assert isinstance(res, RedisResolver) + @patch('redis.StrictRedis', FakeStrictRedis) + def test_redis_resolver(self): + resolver = RedisResolver('redis://127.0.0.1:6379/0/warc_map') + cdx = CDXObject() + assert resolver('example.warc.gz', cdx) == None -def test_resolver_dir_and_file(): - a_file = os.path.realpath(__file__) - a_dir = os.path.dirname(a_file) + resolver.redis.hset(resolver.redis_key_template, 'example.warc.gz', 'some_path/example.warc.gz') - # a file -- assume path index - res = DefaultResolverMixin.make_best_resolver(a_file) - assert isinstance(res, PathIndexResolver) + assert resolver('example.warc.gz', cdx) == 'some_path/example.warc.gz' - # a dir -- asume prefix - res = DefaultResolverMixin.make_best_resolver(a_dir) - assert isinstance(res, PrefixResolver) + @patch('redis.StrictRedis', FakeStrictRedis) + def test_redis_resolver_multi_key(self): + resolver = RedisResolver('redis://127.0.0.1:6379/0/*:warc') - # not a valid file -- default to prefix - res = DefaultResolverMixin.make_best_resolver('file://test/x_invalid') - assert isinstance(res, PrefixResolver) + cdx = CDXObject() + assert resolver('example.warc.gz', cdx) == None + resolver.redis.hset('A:warc', 'example.warc.gz', 'some_path/example.warc.gz') + resolver.redis.hset('B:warc', 'example-2.warc.gz', 'some_path/example-2.warc.gz') -def test_resolver_list(): - paths = [to_file_url(os.path.realpath(__file__)), - 'http://myhost.example.com/warcs/', - 'redis://localhost:1234/0'] + assert resolver('example.warc.gz', cdx) == 'some_path/example.warc.gz' + assert resolver('example-2.warc.gz', cdx) == 'some_path/example-2.warc.gz' - res = DefaultResolverMixin.make_resolvers(paths) - assert isinstance(res[0], PathIndexResolver) - assert isinstance(res[1], PrefixResolver) - assert isinstance(res[2], RedisResolver) + def test_make_best_resolver_http(self): + res = DefaultResolverMixin.make_best_resolver('http://myhost.example.com/warcs/') + assert isinstance(res, PrefixResolver) + assert repr(res) == "PrefixResolver('http://myhost.example.com/warcs/')" + + def test_make_best_resolver_redis(self): + res = DefaultResolverMixin.make_best_resolver('redis://myhost.example.com:1234/1') + assert isinstance(res, RedisResolver) + assert repr(res) == "RedisResolver('redis://myhost.example.com:1234/1')" + + def test_make_best_resolver_pathindex(self): + path = os.path.join(get_test_dir(), 'text_content', 'pathindex.txt') + res = DefaultResolverMixin.make_best_resolver(path) + assert isinstance(res, PathIndexResolver) + assert repr(res) == "PathIndexResolver('{0}')".format(path) + + def test_resolver_dir_and_file(self): + a_file = os.path.realpath(__file__) + a_dir = os.path.dirname(a_file) + + # a file -- assume path index + res = DefaultResolverMixin.make_best_resolver(a_file) + assert isinstance(res, PathIndexResolver) + + # a dir -- asume prefix + res = DefaultResolverMixin.make_best_resolver(a_dir) + assert isinstance(res, PrefixResolver) + + # not a valid file -- default to prefix + res = DefaultResolverMixin.make_best_resolver('file://test/x_invalid') + assert isinstance(res, PrefixResolver) + + def test_resolver_list(self): + paths = [to_file_url(os.path.realpath(__file__)), + 'http://myhost.example.com/warcs/', + 'redis://localhost:1234/0'] + + res = DefaultResolverMixin.make_resolvers(paths) + assert isinstance(res[0], PathIndexResolver) + assert isinstance(res[1], PrefixResolver) + assert isinstance(res[2], RedisResolver) #=================================================================