mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
tests: add pathresolver tests for RedisResolver and PathIndexResolver
This commit is contained in:
parent
90f4833df3
commit
8d3d326c9e
@ -46,12 +46,8 @@ class RedisResolver:
|
||||
self.redis = redis.StrictRedis.from_url(redis_url)
|
||||
|
||||
def __call__(self, filename):
|
||||
try:
|
||||
redis_val = self.redis.hget(self.key_prefix + filename, 'path')
|
||||
return [redis_val] if redis_val else None
|
||||
except Exception as e:
|
||||
print e
|
||||
return None
|
||||
return [redis_val] if redis_val else []
|
||||
|
||||
def __repr__(self):
|
||||
return "RedisResolver('{0}')".format(self.redis_url)
|
||||
@ -68,13 +64,13 @@ class PathIndexResolver:
|
||||
|
||||
def gen_list(result):
|
||||
for pathline in result:
|
||||
path = pathline.split('\t')
|
||||
if len(path) == 2:
|
||||
yield path[1]
|
||||
paths = pathline.split('\t')[1:]
|
||||
for path in paths:
|
||||
yield path
|
||||
|
||||
return gen_list(result)
|
||||
|
||||
def __repr__(self):
|
||||
def __repr__(self): # pragma: no cover
|
||||
return "PathIndexResolver('{0}')".format(self.pathindex_file)
|
||||
|
||||
|
||||
@ -82,32 +78,6 @@ class PathIndexResolver:
|
||||
#TODO: more options (remote files, contains param, etc..)
|
||||
# find best resolver given the path
|
||||
def make_best_resolver(param):
|
||||
"""
|
||||
# http path
|
||||
>>> make_best_resolver('http://myhost.example.com/warcs/')
|
||||
PrefixResolver('http://myhost.example.com/warcs/')
|
||||
|
||||
# http path w/ contains param
|
||||
>>> make_best_resolver(['http://myhost.example.com/warcs/', '/'])
|
||||
PrefixResolver('http://myhost.example.com/warcs/', contains = '/')
|
||||
|
||||
# redis path
|
||||
>>> make_best_resolver('redis://myhost.example.com:1234/1')
|
||||
RedisResolver('redis://myhost.example.com:1234/1')
|
||||
|
||||
# a file
|
||||
>>> r = make_best_resolver('file://' + os.path.realpath(__file__))
|
||||
>>> r.__class__.__name__
|
||||
'PathIndexResolver'
|
||||
|
||||
# a dir
|
||||
>>> path = os.path.realpath(__file__)
|
||||
>>> r = make_best_resolver('file://' + os.path.dirname(path))
|
||||
>>> r.__class__.__name__
|
||||
'PrefixResolver'
|
||||
|
||||
"""
|
||||
|
||||
if isinstance(param, list):
|
||||
path = param[0]
|
||||
arg = param[1]
|
||||
@ -136,19 +106,7 @@ def make_best_resolver(param):
|
||||
|
||||
#=================================================================
|
||||
def make_best_resolvers(paths):
|
||||
"""
|
||||
>>> r = make_best_resolvers(['http://example.com/warcs/',\
|
||||
'redis://example.com:1234/1'])
|
||||
>>> map(lambda x: x.__class__.__name__, r)
|
||||
['PrefixResolver', 'RedisResolver']
|
||||
"""
|
||||
if hasattr(paths, '__iter__'):
|
||||
return map(make_best_resolver, paths)
|
||||
else:
|
||||
return [make_best_resolver(paths)]
|
||||
|
||||
|
||||
#=================================================================
|
||||
if __name__ == "__main__":
|
||||
import doctest
|
||||
doctest.testmod()
|
||||
|
@ -31,7 +31,8 @@ class ArchiveLoadFailed(WbException):
|
||||
|
||||
#=================================================================
|
||||
class ArcWarcRecordLoader:
|
||||
# Standard ARC headers
|
||||
# Standard ARC v1.0 headers
|
||||
# TODO: support ARV v2.0 also?
|
||||
ARC_HEADERS = ["uri", "ip-address", "archive-date",
|
||||
"content-type", "length"]
|
||||
|
||||
|
77
pywb/warc/test/test_pathresolvers.py
Normal file
77
pywb/warc/test/test_pathresolvers.py
Normal file
@ -0,0 +1,77 @@
|
||||
"""
|
||||
# PathIndexResolver tests
|
||||
>>> list(PathIndexResolver(get_test_dir() + 'text_content/pathindex.txt')('example.warc.gz'))
|
||||
['invalid_path', 'sample_archive/warcs/example.warc.gz']
|
||||
|
||||
>>> list(PathIndexResolver(get_test_dir() + 'text_content/pathindex.txt')('iana.warc.gz'))
|
||||
['sample_archive/warcs/iana.warc.gz']
|
||||
|
||||
>>> list(PathIndexResolver(get_test_dir() + 'text_content/pathindex.txt')('not-found.gz'))
|
||||
[]
|
||||
|
||||
# RedisResolver tests
|
||||
# not set, no match
|
||||
>>> redis_resolver('example.warc.gz')
|
||||
[]
|
||||
|
||||
>>> hset_path('example.warc.gz', 'some_path/example.warc.gz')
|
||||
>>> redis_resolver('example.warc.gz')
|
||||
['some_path/example.warc.gz']
|
||||
|
||||
|
||||
make_best_resolver tests
|
||||
# http path
|
||||
>>> make_best_resolver('http://myhost.example.com/warcs/')
|
||||
PrefixResolver('http://myhost.example.com/warcs/')
|
||||
|
||||
# http path w/ contains param
|
||||
>>> make_best_resolver(['http://myhost.example.com/warcs/', '/'])
|
||||
PrefixResolver('http://myhost.example.com/warcs/', contains = '/')
|
||||
|
||||
# redis path
|
||||
>>> make_best_resolver('redis://myhost.example.com:1234/1')
|
||||
RedisResolver('redis://myhost.example.com:1234/1')
|
||||
|
||||
# a file
|
||||
>>> r = make_best_resolver('file://' + os.path.realpath(__file__))
|
||||
>>> r.__class__.__name__
|
||||
'PathIndexResolver'
|
||||
|
||||
# a dir
|
||||
>>> path = os.path.realpath(__file__)
|
||||
>>> r = make_best_resolver('file://' + os.path.dirname(path))
|
||||
>>> r.__class__.__name__
|
||||
'PrefixResolver'
|
||||
|
||||
|
||||
# make_best_resolvers
|
||||
>>> r = make_best_resolvers(['http://example.com/warcs/',\
|
||||
'redis://example.com:1234/1'])
|
||||
>>> map(lambda x: x.__class__.__name__, r)
|
||||
['PrefixResolver', 'RedisResolver']
|
||||
"""
|
||||
|
||||
from pywb import get_test_dir
|
||||
from pywb.warc.pathresolvers import PrefixResolver, PathIndexResolver, RedisResolver
|
||||
from pywb.warc.pathresolvers import make_best_resolver, make_best_resolvers
|
||||
import os
|
||||
|
||||
|
||||
from fakeredis import FakeStrictRedis
|
||||
from mock import patch
|
||||
|
||||
@patch('redis.StrictRedis', FakeStrictRedis)
|
||||
def init_redis_resolver():
|
||||
return RedisResolver('redis://127.0.0.1:6379/0')
|
||||
|
||||
|
||||
def hset_path(filename, path):
|
||||
redis_resolver.redis.hset(redis_resolver.key_prefix + filename, 'path', path)
|
||||
|
||||
|
||||
redis_resolver = init_redis_resolver()
|
||||
|
||||
#=================================================================
|
||||
if __name__ == "__main__":
|
||||
import doctest
|
||||
doctest.testmod()
|
2
sample_archive/text_content/pathindex.txt
Normal file
2
sample_archive/text_content/pathindex.txt
Normal file
@ -0,0 +1,2 @@
|
||||
example.warc.gz invalid_path sample_archive/warcs/example.warc.gz
|
||||
iana.warc.gz sample_archive/warcs/iana.warc.gz
|
Loading…
x
Reference in New Issue
Block a user