diff --git a/pywb/warcserver/index/indexsource.py b/pywb/warcserver/index/indexsource.py index 66ca7216..6aabcb08 100644 --- a/pywb/warcserver/index/indexsource.py +++ b/pywb/warcserver/index/indexsource.py @@ -270,6 +270,8 @@ class RedisIndexSource(BaseIndexSource): self.redis_key_template = key_template self.member_key_template = kwargs.get('member_key_templ') + self.member_key_type = None + @staticmethod def parse_redis_url(redis_url, redis_=None): parts = redis_url.split('/') @@ -293,16 +295,28 @@ class RedisIndexSource(BaseIndexSource): key = res_template(member_key, params) scan_key = 'scan:' + key - # check if already have keys to avoid extra smembers call + # check if already have keys to avoid extra redis call keys = params.get(scan_key) if not keys: - keys = self.redis.smembers(key) + keys = self._load_key_set(key) params[scan_key] = keys match_templ = match_templ.encode('utf-8') return [match_templ.replace(b'*', key) for key in keys] + def _load_key_set(self, key): + if not self.member_key_type: + self.member_key_type = self.redis.type(key) + + if self.member_key_type == b'set': + return self.redis.smembers(key) + + elif self.member_key_type == b'hash': + return self.redis.hvals(key) + + return [] + def load_index(self, params): return self.load_key_index(self.redis_key_template, params) diff --git a/pywb/warcserver/resource/test/test_pathresolvers.py b/pywb/warcserver/resource/test/test_pathresolvers.py index 9b130402..dcb24abf 100644 --- a/pywb/warcserver/resource/test/test_pathresolvers.py +++ b/pywb/warcserver/resource/test/test_pathresolvers.py @@ -87,6 +87,56 @@ class TestPathIndex(object): assert resolver('example.warc.gz', cdx) == 'some_path/example.warc.gz' assert resolver('example-2.warc.gz', cdx) == 'some_path/example-2.warc.gz' + @patch('redis.StrictRedis', FakeStrictRedis) + def test_redis_resolver_multi_key_with_member_set(self): + resolver = RedisResolver('redis://127.0.0.1:6379/0/*:warc', + member_key_templ='member_set') + + cdx = CDXObject() + assert resolver('example.warc.gz', cdx) == None + + resolver.redis.hset('A:warc', 'example.warc.gz', 'some_path/example.warc.gz') + resolver.redis.hset('B:warc', 'example-2.warc.gz', 'some_path/example-2.warc.gz') + + resolver.redis.sadd('member_set', 'A') + + # only A:warc used + assert resolver('example.warc.gz', cdx) == 'some_path/example.warc.gz' + assert resolver('example-2.warc.gz', cdx) == None + + resolver.redis.sadd('member_set', 'B') + + # A:warc and B:warc used + assert resolver('example.warc.gz', cdx) == 'some_path/example.warc.gz' + assert resolver('example-2.warc.gz', cdx) == 'some_path/example-2.warc.gz' + + assert resolver.member_key_type == b'set' + + @patch('redis.StrictRedis', FakeStrictRedis) + def test_redis_resolver_multi_key_with_member_hash(self): + resolver = RedisResolver('redis://127.0.0.1:6379/0/*:warc', + member_key_templ='member_hash') + + cdx = CDXObject() + assert resolver('example.warc.gz', cdx) == None + + resolver.redis.hset('A:warc', 'example.warc.gz', 'some_path/example.warc.gz') + resolver.redis.hset('B:warc', 'example-2.warc.gz', 'some_path/example-2.warc.gz') + + resolver.redis.hset('member_hash', '1', 'A') + + # only A:warc used + assert resolver('example.warc.gz', cdx) == 'some_path/example.warc.gz' + assert resolver('example-2.warc.gz', cdx) == None + + resolver.redis.hset('member_hash', '2', 'B') + + # A:warc and B:warc used + assert resolver('example.warc.gz', cdx) == 'some_path/example.warc.gz' + assert resolver('example-2.warc.gz', cdx) == 'some_path/example-2.warc.gz' + + assert resolver.member_key_type == b'hash' + def test_make_best_resolver_http(self): res = DefaultResolverMixin.make_best_resolver('http://myhost.example.com/warcs/') assert isinstance(res, PrefixResolver)