1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

redis: redisindexsource and pathresolver:

- for wildcard/multi-key lookup, support redis hashmap as well as redis set to be used as member lookup key
- if using hashmap, the propery names are used for lookup
- track type of redis key in RedisIndexSource
tests: add tests with set and hashmap member keys
This commit is contained in:
Ilya Kreymer 2018-01-28 18:14:25 -08:00
parent 131c5ff5da
commit 52ca95eba5
2 changed files with 66 additions and 2 deletions

View File

@ -270,6 +270,8 @@ class RedisIndexSource(BaseIndexSource):
self.redis_key_template = key_template
self.member_key_template = kwargs.get('member_key_templ')
self.member_key_type = None
@staticmethod
def parse_redis_url(redis_url, redis_=None):
parts = redis_url.split('/')
@ -293,16 +295,28 @@ class RedisIndexSource(BaseIndexSource):
key = res_template(member_key, params)
scan_key = 'scan:' + key
# check if already have keys to avoid extra smembers call
# check if already have keys to avoid extra redis call
keys = params.get(scan_key)
if not keys:
keys = self.redis.smembers(key)
keys = self._load_key_set(key)
params[scan_key] = keys
match_templ = match_templ.encode('utf-8')
return [match_templ.replace(b'*', key) for key in keys]
def _load_key_set(self, key):
if not self.member_key_type:
self.member_key_type = self.redis.type(key)
if self.member_key_type == b'set':
return self.redis.smembers(key)
elif self.member_key_type == b'hash':
return self.redis.hvals(key)
return []
def load_index(self, params):
return self.load_key_index(self.redis_key_template, params)

View File

@ -87,6 +87,56 @@ class TestPathIndex(object):
assert resolver('example.warc.gz', cdx) == 'some_path/example.warc.gz'
assert resolver('example-2.warc.gz', cdx) == 'some_path/example-2.warc.gz'
@patch('redis.StrictRedis', FakeStrictRedis)
def test_redis_resolver_multi_key_with_member_set(self):
resolver = RedisResolver('redis://127.0.0.1:6379/0/*:warc',
member_key_templ='member_set')
cdx = CDXObject()
assert resolver('example.warc.gz', cdx) == None
resolver.redis.hset('A:warc', 'example.warc.gz', 'some_path/example.warc.gz')
resolver.redis.hset('B:warc', 'example-2.warc.gz', 'some_path/example-2.warc.gz')
resolver.redis.sadd('member_set', 'A')
# only A:warc used
assert resolver('example.warc.gz', cdx) == 'some_path/example.warc.gz'
assert resolver('example-2.warc.gz', cdx) == None
resolver.redis.sadd('member_set', 'B')
# A:warc and B:warc used
assert resolver('example.warc.gz', cdx) == 'some_path/example.warc.gz'
assert resolver('example-2.warc.gz', cdx) == 'some_path/example-2.warc.gz'
assert resolver.member_key_type == b'set'
@patch('redis.StrictRedis', FakeStrictRedis)
def test_redis_resolver_multi_key_with_member_hash(self):
resolver = RedisResolver('redis://127.0.0.1:6379/0/*:warc',
member_key_templ='member_hash')
cdx = CDXObject()
assert resolver('example.warc.gz', cdx) == None
resolver.redis.hset('A:warc', 'example.warc.gz', 'some_path/example.warc.gz')
resolver.redis.hset('B:warc', 'example-2.warc.gz', 'some_path/example-2.warc.gz')
resolver.redis.hset('member_hash', '1', 'A')
# only A:warc used
assert resolver('example.warc.gz', cdx) == 'some_path/example.warc.gz'
assert resolver('example-2.warc.gz', cdx) == None
resolver.redis.hset('member_hash', '2', 'B')
# A:warc and B:warc used
assert resolver('example.warc.gz', cdx) == 'some_path/example.warc.gz'
assert resolver('example-2.warc.gz', cdx) == 'some_path/example-2.warc.gz'
assert resolver.member_key_type == b'hash'
def test_make_best_resolver_http(self):
res = DefaultResolverMixin.make_best_resolver('http://myhost.example.com/warcs/')
assert isinstance(res, PrefixResolver)