mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
redis: redisindexsource and pathresolver:
- for wildcard/multi-key lookup, support redis hashmap as well as redis set to be used as member lookup key - if using hashmap, the propery names are used for lookup - track type of redis key in RedisIndexSource tests: add tests with set and hashmap member keys
This commit is contained in:
parent
131c5ff5da
commit
52ca95eba5
@ -270,6 +270,8 @@ class RedisIndexSource(BaseIndexSource):
|
||||
self.redis_key_template = key_template
|
||||
self.member_key_template = kwargs.get('member_key_templ')
|
||||
|
||||
self.member_key_type = None
|
||||
|
||||
@staticmethod
|
||||
def parse_redis_url(redis_url, redis_=None):
|
||||
parts = redis_url.split('/')
|
||||
@ -293,16 +295,28 @@ class RedisIndexSource(BaseIndexSource):
|
||||
key = res_template(member_key, params)
|
||||
|
||||
scan_key = 'scan:' + key
|
||||
# check if already have keys to avoid extra smembers call
|
||||
# check if already have keys to avoid extra redis call
|
||||
keys = params.get(scan_key)
|
||||
if not keys:
|
||||
keys = self.redis.smembers(key)
|
||||
keys = self._load_key_set(key)
|
||||
params[scan_key] = keys
|
||||
|
||||
match_templ = match_templ.encode('utf-8')
|
||||
|
||||
return [match_templ.replace(b'*', key) for key in keys]
|
||||
|
||||
def _load_key_set(self, key):
|
||||
if not self.member_key_type:
|
||||
self.member_key_type = self.redis.type(key)
|
||||
|
||||
if self.member_key_type == b'set':
|
||||
return self.redis.smembers(key)
|
||||
|
||||
elif self.member_key_type == b'hash':
|
||||
return self.redis.hvals(key)
|
||||
|
||||
return []
|
||||
|
||||
def load_index(self, params):
|
||||
return self.load_key_index(self.redis_key_template, params)
|
||||
|
||||
|
@ -87,6 +87,56 @@ class TestPathIndex(object):
|
||||
assert resolver('example.warc.gz', cdx) == 'some_path/example.warc.gz'
|
||||
assert resolver('example-2.warc.gz', cdx) == 'some_path/example-2.warc.gz'
|
||||
|
||||
@patch('redis.StrictRedis', FakeStrictRedis)
|
||||
def test_redis_resolver_multi_key_with_member_set(self):
|
||||
resolver = RedisResolver('redis://127.0.0.1:6379/0/*:warc',
|
||||
member_key_templ='member_set')
|
||||
|
||||
cdx = CDXObject()
|
||||
assert resolver('example.warc.gz', cdx) == None
|
||||
|
||||
resolver.redis.hset('A:warc', 'example.warc.gz', 'some_path/example.warc.gz')
|
||||
resolver.redis.hset('B:warc', 'example-2.warc.gz', 'some_path/example-2.warc.gz')
|
||||
|
||||
resolver.redis.sadd('member_set', 'A')
|
||||
|
||||
# only A:warc used
|
||||
assert resolver('example.warc.gz', cdx) == 'some_path/example.warc.gz'
|
||||
assert resolver('example-2.warc.gz', cdx) == None
|
||||
|
||||
resolver.redis.sadd('member_set', 'B')
|
||||
|
||||
# A:warc and B:warc used
|
||||
assert resolver('example.warc.gz', cdx) == 'some_path/example.warc.gz'
|
||||
assert resolver('example-2.warc.gz', cdx) == 'some_path/example-2.warc.gz'
|
||||
|
||||
assert resolver.member_key_type == b'set'
|
||||
|
||||
@patch('redis.StrictRedis', FakeStrictRedis)
|
||||
def test_redis_resolver_multi_key_with_member_hash(self):
|
||||
resolver = RedisResolver('redis://127.0.0.1:6379/0/*:warc',
|
||||
member_key_templ='member_hash')
|
||||
|
||||
cdx = CDXObject()
|
||||
assert resolver('example.warc.gz', cdx) == None
|
||||
|
||||
resolver.redis.hset('A:warc', 'example.warc.gz', 'some_path/example.warc.gz')
|
||||
resolver.redis.hset('B:warc', 'example-2.warc.gz', 'some_path/example-2.warc.gz')
|
||||
|
||||
resolver.redis.hset('member_hash', '1', 'A')
|
||||
|
||||
# only A:warc used
|
||||
assert resolver('example.warc.gz', cdx) == 'some_path/example.warc.gz'
|
||||
assert resolver('example-2.warc.gz', cdx) == None
|
||||
|
||||
resolver.redis.hset('member_hash', '2', 'B')
|
||||
|
||||
# A:warc and B:warc used
|
||||
assert resolver('example.warc.gz', cdx) == 'some_path/example.warc.gz'
|
||||
assert resolver('example-2.warc.gz', cdx) == 'some_path/example-2.warc.gz'
|
||||
|
||||
assert resolver.member_key_type == b'hash'
|
||||
|
||||
def test_make_best_resolver_http(self):
|
||||
res = DefaultResolverMixin.make_best_resolver('http://myhost.example.com/warcs/')
|
||||
assert isinstance(res, PrefixResolver)
|
||||
|
Loading…
x
Reference in New Issue
Block a user