1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

tests: add pathresolver tests for RedisResolver and PathIndexResolver

This commit is contained in:
Ilya Kreymer 2014-04-02 11:41:20 -07:00
parent 90f4833df3
commit 8d3d326c9e
4 changed files with 87 additions and 49 deletions

View File

@ -46,12 +46,8 @@ class RedisResolver:
self.redis = redis.StrictRedis.from_url(redis_url)
def __call__(self, filename):
try:
redis_val = self.redis.hget(self.key_prefix + filename, 'path')
return [redis_val] if redis_val else None
except Exception as e:
print e
return None
return [redis_val] if redis_val else []
def __repr__(self):
return "RedisResolver('{0}')".format(self.redis_url)
@ -68,13 +64,13 @@ class PathIndexResolver:
def gen_list(result):
for pathline in result:
path = pathline.split('\t')
if len(path) == 2:
yield path[1]
paths = pathline.split('\t')[1:]
for path in paths:
yield path
return gen_list(result)
def __repr__(self):
def __repr__(self): # pragma: no cover
return "PathIndexResolver('{0}')".format(self.pathindex_file)
@ -82,32 +78,6 @@ class PathIndexResolver:
#TODO: more options (remote files, contains param, etc..)
# find best resolver given the path
def make_best_resolver(param):
"""
# http path
>>> make_best_resolver('http://myhost.example.com/warcs/')
PrefixResolver('http://myhost.example.com/warcs/')
# http path w/ contains param
>>> make_best_resolver(['http://myhost.example.com/warcs/', '/'])
PrefixResolver('http://myhost.example.com/warcs/', contains = '/')
# redis path
>>> make_best_resolver('redis://myhost.example.com:1234/1')
RedisResolver('redis://myhost.example.com:1234/1')
# a file
>>> r = make_best_resolver('file://' + os.path.realpath(__file__))
>>> r.__class__.__name__
'PathIndexResolver'
# a dir
>>> path = os.path.realpath(__file__)
>>> r = make_best_resolver('file://' + os.path.dirname(path))
>>> r.__class__.__name__
'PrefixResolver'
"""
if isinstance(param, list):
path = param[0]
arg = param[1]
@ -136,19 +106,7 @@ def make_best_resolver(param):
#=================================================================
def make_best_resolvers(paths):
"""
>>> r = make_best_resolvers(['http://example.com/warcs/',\
'redis://example.com:1234/1'])
>>> map(lambda x: x.__class__.__name__, r)
['PrefixResolver', 'RedisResolver']
"""
if hasattr(paths, '__iter__'):
return map(make_best_resolver, paths)
else:
return [make_best_resolver(paths)]
#=================================================================
if __name__ == "__main__":
import doctest
doctest.testmod()

View File

@ -31,7 +31,8 @@ class ArchiveLoadFailed(WbException):
#=================================================================
class ArcWarcRecordLoader:
# Standard ARC headers
# Standard ARC v1.0 headers
# TODO: support ARV v2.0 also?
ARC_HEADERS = ["uri", "ip-address", "archive-date",
"content-type", "length"]

View File

@ -0,0 +1,77 @@
"""
# PathIndexResolver tests
>>> list(PathIndexResolver(get_test_dir() + 'text_content/pathindex.txt')('example.warc.gz'))
['invalid_path', 'sample_archive/warcs/example.warc.gz']
>>> list(PathIndexResolver(get_test_dir() + 'text_content/pathindex.txt')('iana.warc.gz'))
['sample_archive/warcs/iana.warc.gz']
>>> list(PathIndexResolver(get_test_dir() + 'text_content/pathindex.txt')('not-found.gz'))
[]
# RedisResolver tests
# not set, no match
>>> redis_resolver('example.warc.gz')
[]
>>> hset_path('example.warc.gz', 'some_path/example.warc.gz')
>>> redis_resolver('example.warc.gz')
['some_path/example.warc.gz']
make_best_resolver tests
# http path
>>> make_best_resolver('http://myhost.example.com/warcs/')
PrefixResolver('http://myhost.example.com/warcs/')
# http path w/ contains param
>>> make_best_resolver(['http://myhost.example.com/warcs/', '/'])
PrefixResolver('http://myhost.example.com/warcs/', contains = '/')
# redis path
>>> make_best_resolver('redis://myhost.example.com:1234/1')
RedisResolver('redis://myhost.example.com:1234/1')
# a file
>>> r = make_best_resolver('file://' + os.path.realpath(__file__))
>>> r.__class__.__name__
'PathIndexResolver'
# a dir
>>> path = os.path.realpath(__file__)
>>> r = make_best_resolver('file://' + os.path.dirname(path))
>>> r.__class__.__name__
'PrefixResolver'
# make_best_resolvers
>>> r = make_best_resolvers(['http://example.com/warcs/',\
'redis://example.com:1234/1'])
>>> map(lambda x: x.__class__.__name__, r)
['PrefixResolver', 'RedisResolver']
"""
from pywb import get_test_dir
from pywb.warc.pathresolvers import PrefixResolver, PathIndexResolver, RedisResolver
from pywb.warc.pathresolvers import make_best_resolver, make_best_resolvers
import os
from fakeredis import FakeStrictRedis
from mock import patch
@patch('redis.StrictRedis', FakeStrictRedis)
def init_redis_resolver():
return RedisResolver('redis://127.0.0.1:6379/0')
def hset_path(filename, path):
redis_resolver.redis.hset(redis_resolver.key_prefix + filename, 'path', path)
redis_resolver = init_redis_resolver()
#=================================================================
if __name__ == "__main__":
import doctest
doctest.testmod()

View File

@ -0,0 +1,2 @@
example.warc.gz invalid_path sample_archive/warcs/example.warc.gz
iana.warc.gz sample_archive/warcs/iana.warc.gz