diff --git a/webagg/aggregator.py b/webagg/aggregator.py index 73560ae0..8ddbe04f 100644 --- a/webagg/aggregator.py +++ b/webagg/aggregator.py @@ -275,12 +275,9 @@ class CacheDirectoryIndexSource(DirectoryIndexSource): #============================================================================= class RedisMultiKeyIndexSource(SeqAggMixin, BaseAggregator, RedisIndexSource): - def _iter_sources2(self, params): + def _iter_sources(self, params): redis_key_pattern = res_template(self.redis_key_template, params) for key in self.redis.scan_iter(match=redis_key_pattern): key = key.decode('utf-8') - yield '', RedisIndexSource(None, self.redis, key) - - def _iter_sources(self, params): - return list(self._iter_sources2(params)) + yield key, RedisIndexSource(None, self.redis, key) diff --git a/webagg/test/test_redis_agg.py b/webagg/test/test_redis_agg.py new file mode 100644 index 00000000..505350f7 --- /dev/null +++ b/webagg/test/test_redis_agg.py @@ -0,0 +1,45 @@ +from webagg.aggregator import RedisMultiKeyIndexSource +from .testutils import to_path, to_json_list, FakeRedisTests, BaseTestClass + + +class TestRedisAgg(FakeRedisTests, BaseTestClass): + @classmethod + def setup_class(cls): + super(TestRedisAgg, cls).setup_class() + cls.add_cdx_to_redis(to_path('testdata/example.cdxj'), 'FOO:example:cdxj') + cls.add_cdx_to_redis(to_path('testdata/dupes.cdxj'), 'FOO:dupes:cdxj') + + cls.indexloader = RedisMultiKeyIndexSource('redis://localhost/2/{user}:{coll}:cdxj') + + def test_redis_agg_all(self): + res, errs = self.indexloader({'url': 'example.com/', 'param.user': 'FOO', 'param.coll': '*'}) + + exp = [ + {'source': 'FOO:dupes:cdxj', 'timestamp': '20140127171200', 'filename': 'dupes.warc.gz'}, + {'source': 'FOO:dupes:cdxj', 'timestamp': '20140127171251', 'filename': 'dupes.warc.gz'}, + {'source': 'FOO:example:cdxj', 'timestamp': '20160225042329', 'filename': 'example.warc.gz'} + ] + + assert(errs == {}) + assert(to_json_list(res) == exp) + + def test_redis_agg_one(self): + res, errs = self.indexloader({'url': 'example.com/', 'param.user': 'FOO', 'param.coll': 'dupes'}) + + exp = [ + {'source': 'FOO:dupes:cdxj', 'timestamp': '20140127171200', 'filename': 'dupes.warc.gz'}, + {'source': 'FOO:dupes:cdxj', 'timestamp': '20140127171251', 'filename': 'dupes.warc.gz'}, + ] + + assert(errs == {}) + assert(to_json_list(res) == exp) + + def test_redis_not_found(self): + res, errs = self.indexloader({'url': 'example.com/'}) + + exp = [] + + assert(errs == {}) + assert(to_json_list(res) == exp) + +