1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-24 06:59:52 +01:00

misc test improvements:

- add tests for WBMementoIndexSource, member-list based RedisIndexSource
- convert redis aggregator and index source tests to use testutils BaseTestClass system
- rename configwarcserver -> warcserver
This commit is contained in:
Ilya Kreymer 2017-08-09 12:04:03 -07:00
parent 496defda42
commit c6d196c9fe
6 changed files with 232 additions and 252 deletions

View File

@ -522,7 +522,7 @@ class WBMementoIndexSource(MementoIndexSource):
def handle_timegate(self, params, timestamp): def handle_timegate(self, params, timestamp):
url = params['url'] url = params['url']
load_url = self.timegate_url.format(url=url, timestamp=ts) load_url = self.timegate_url.format(url=url, timestamp=timestamp)
try: try:
headers = self._get_headers(params) headers = self._get_headers(params)

View File

@ -1,219 +1,196 @@
from pywb.warcserver.index.indexsource import FileIndexSource, RemoteIndexSource, MementoIndexSource, RedisIndexSource from pywb.warcserver.index.indexsource import FileIndexSource, RemoteIndexSource, MementoIndexSource, RedisIndexSource
from pywb.warcserver.index.indexsource import LiveIndexSource from pywb.warcserver.index.indexsource import LiveIndexSource, WBMementoIndexSource
from pywb.warcserver.index.aggregator import SimpleAggregator from pywb.warcserver.index.aggregator import SimpleAggregator
from warcio.timeutils import timestamp_now from warcio.timeutils import timestamp_now
from pywb.warcserver.test.testutils import key_ts_res, TEST_CDX_PATH from pywb.warcserver.test.testutils import key_ts_res, TEST_CDX_PATH, FakeRedisTests, BaseTestClass
import pytest import pytest
import os import os
from fakeredis import FakeStrictRedis
from mock import patch
redismock = patch('redis.StrictRedis', FakeStrictRedis) local_sources = ['file', 'redis']
redismock.start() remote_sources = ['remote_cdx', 'memento']
all_sources = local_sources + remote_sources
def setup_module():
r = FakeStrictRedis.from_url('redis://localhost:6379/2')
r.delete('test:rediscdx')
with open(TEST_CDX_PATH + 'iana.cdxj', 'rb') as fh:
for line in fh:
r.zadd('test:rediscdx', 0, line.rstrip())
def teardown_module():
redismock.stop()
local_sources = [
FileIndexSource(TEST_CDX_PATH + 'iana.cdxj'),
RedisIndexSource('redis://localhost:6379/2/test:rediscdx')
]
remote_sources = [
RemoteIndexSource('http://webenact.rhizome.org/all-cdx?url={url}',
'http://webenact.rhizome.org/all/{timestamp}id_/{url}'),
MementoIndexSource('http://webenact.rhizome.org/all/{url}',
'http://webenact.rhizome.org/all/timemap/*/{url}',
'http://webenact.rhizome.org/all/{timestamp}id_/{url}')
]
ait_source = RemoteIndexSource('http://wayback.archive-it.org/cdx?url={url}',
'http://wayback.archive-it.org/all/{timestamp}id_/{url}')
def query_single_source(source, params):
string = str(source)
return SimpleAggregator({'source': source})(params)
# Url Match -- Local Loaders
# ============================================================================ # ============================================================================
@pytest.mark.parametrize("source", local_sources, ids=["file", "redis"]) class TestIndexSources(FakeRedisTests, BaseTestClass):
def test_local_cdxj_loader(source): @classmethod
url = 'http://www.iana.org/_css/2013.1/fonts/Inconsolata.otf' def setup_class(cls):
res, errs = query_single_source(source, dict(url=url, limit=3)) super(TestIndexSources, cls).setup_class()
cls.add_cdx_to_redis(TEST_CDX_PATH + 'iana.cdxj', 'test:rediscdx')
expected = """\ cls.all_sources = {
'file': FileIndexSource(TEST_CDX_PATH + 'iana.cdxj'),
'redis': RedisIndexSource('redis://localhost:6379/2/test:rediscdx'),
'remote_cdx': RemoteIndexSource('http://webenact.rhizome.org/all-cdx?url={url}',
'http://webenact.rhizome.org/all/{timestamp}id_/{url}'),
'memento': MementoIndexSource('http://webenact.rhizome.org/all/{url}',
'http://webenact.rhizome.org/all/timemap/*/{url}',
'http://webenact.rhizome.org/all/{timestamp}id_/{url}')
}
@pytest.fixture(params=local_sources)
def local_source(self, request):
return self.all_sources[request.param]
@pytest.fixture(params=remote_sources)
def remote_source(self, request):
return self.all_sources[request.param]
@pytest.fixture(params=all_sources)
def all_source(self, request):
return self.all_sources[request.param]
@staticmethod
def query_single_source(source, params):
string = str(source)
return SimpleAggregator({'source': source})(params)
# Url Match -- Local Loaders
def test_local_cdxj_loader(self, local_source):
url = 'http://www.iana.org/_css/2013.1/fonts/Inconsolata.otf'
res, errs = self.query_single_source(local_source, dict(url=url, limit=3))
expected = """\
org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126200826 iana.warc.gz org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126200826 iana.warc.gz
org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126200912 iana.warc.gz org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126200912 iana.warc.gz
org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126200930 iana.warc.gz""" org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126200930 iana.warc.gz"""
assert(key_ts_res(res) == expected) assert(key_ts_res(res) == expected)
assert(errs == {}) assert(errs == {})
# Closest -- Local Loaders # Closest -- Local Loaders
# ============================================================================ def test_local_closest_loader(self, local_source):
@pytest.mark.parametrize("source", local_sources, ids=["file", "redis"]) url = 'http://www.iana.org/_css/2013.1/fonts/Inconsolata.otf'
def test_local_closest_loader(source): res, errs = self.query_single_source(local_source, dict(url=url,
url = 'http://www.iana.org/_css/2013.1/fonts/Inconsolata.otf' closest='20140126200930',
res, errs = query_single_source(source, dict(url=url, limit=3))
closest='20140126200930',
limit=3))
expected = """\ expected = """\
org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126200930 iana.warc.gz org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126200930 iana.warc.gz
org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126200912 iana.warc.gz org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126200912 iana.warc.gz
org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126200826 iana.warc.gz""" org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126200826 iana.warc.gz"""
assert(key_ts_res(res) == expected) assert(key_ts_res(res) == expected)
assert(errs == {}) assert(errs == {})
# Prefix -- Local Loaders # Prefix -- Local Loaders
# ============================================================================ def test_file_prefix_loader(self, local_source):
@pytest.mark.parametrize("source", local_sources, ids=["file", "redis"]) res, errs = self.query_single_source(local_source, dict(url='http://iana.org/domains/root/*'))
def test_file_prefix_loader(source):
res, errs = query_single_source(source, dict(url='http://iana.org/domains/root/*'))
expected = """\ expected = """\
org,iana)/domains/root/db 20140126200927 iana.warc.gz org,iana)/domains/root/db 20140126200927 iana.warc.gz
org,iana)/domains/root/db 20140126200928 iana.warc.gz org,iana)/domains/root/db 20140126200928 iana.warc.gz
org,iana)/domains/root/servers 20140126201227 iana.warc.gz""" org,iana)/domains/root/servers 20140126201227 iana.warc.gz"""
assert(key_ts_res(res) == expected) assert(key_ts_res(res) == expected)
assert(errs == {}) assert(errs == {})
# Url Match -- Remote Loaders
def test_remote_loader(self, remote_source):
url = 'http://instagram.com/amaliaulman'
res, errs = self.query_single_source(remote_source, dict(url=url))
# Url Match -- Remote Loaders expected = """\
# ============================================================================
@pytest.mark.parametrize("source", remote_sources, ids=["remote_cdx", "memento"])
def test_remote_loader(source):
url = 'http://instagram.com/amaliaulman'
res, errs = query_single_source(source, dict(url=url))
expected = """\
com,instagram)/amaliaulman 20141014150552 http://webenact.rhizome.org/all/20141014150552id_/http://instagram.com/amaliaulman com,instagram)/amaliaulman 20141014150552 http://webenact.rhizome.org/all/20141014150552id_/http://instagram.com/amaliaulman
com,instagram)/amaliaulman 20141014155217 http://webenact.rhizome.org/all/20141014155217id_/http://instagram.com/amaliaulman com,instagram)/amaliaulman 20141014155217 http://webenact.rhizome.org/all/20141014155217id_/http://instagram.com/amaliaulman
com,instagram)/amaliaulman 20141014162333 http://webenact.rhizome.org/all/20141014162333id_/http://instagram.com/amaliaulman com,instagram)/amaliaulman 20141014162333 http://webenact.rhizome.org/all/20141014162333id_/http://instagram.com/amaliaulman
com,instagram)/amaliaulman 20141014171636 http://webenact.rhizome.org/all/20141014171636id_/http://instagram.com/amaliaulman""" com,instagram)/amaliaulman 20141014171636 http://webenact.rhizome.org/all/20141014171636id_/http://instagram.com/amaliaulman"""
assert(key_ts_res(res, 'load_url') == expected) assert(key_ts_res(res, 'load_url') == expected)
assert(errs == {})
# Url Match -- Remote Loaders
# ============================================================================
@pytest.mark.parametrize("source", remote_sources, ids=["remote_cdx", "memento"])
def test_remote_closest_loader(source):
url = 'http://instagram.com/amaliaulman'
res, errs = query_single_source(source, dict(url=url, closest='20141014162332', limit=1))
expected = """\
com,instagram)/amaliaulman 20141014162333 http://webenact.rhizome.org/all/20141014162333id_/http://instagram.com/amaliaulman"""
assert(key_ts_res(res, 'load_url') == expected)
assert(errs == {})
# Url Match -- Memento
# ============================================================================
@pytest.mark.parametrize("source", remote_sources, ids=["remote_cdx", "memento"])
def test_remote_closest_loader(source):
url = 'http://instagram.com/amaliaulman'
res, errs = query_single_source(source, dict(url=url, closest='20141014162332', limit=1))
expected = """\
com,instagram)/amaliaulman 20141014162333 http://webenact.rhizome.org/all/20141014162333id_/http://instagram.com/amaliaulman"""
assert(key_ts_res(res, 'load_url') == expected)
assert(errs == {})
# Live Index -- No Load!
# ============================================================================
def test_live():
url = 'http://example.com/'
source = LiveIndexSource()
res, errs = query_single_source(source, dict(url=url))
expected = 'com,example)/ {0} http://example.com/'.format(timestamp_now())
assert(key_ts_res(res, 'load_url') == expected)
assert(errs == {})
# Errors -- Not Found All
# ============================================================================
@pytest.mark.parametrize("source", local_sources + remote_sources, ids=["file", "redis", "remote_cdx", "memento"])
def test_all_not_found(source):
url = 'http://x-not-found-x.notfound/'
res, errs = query_single_source(source, dict(url=url, limit=3))
expected = ''
assert(key_ts_res(res) == expected)
if source == remote_sources[0]:
assert('http%3A//x-not-found-x.notfound/' in errs['source'])
else:
assert(errs == {}) assert(errs == {})
# ============================================================================ # Url Match -- Remote Loaders Closest
def test_another_remote_not_found(): def test_remote_closest_loader(self, remote_source):
source = MementoIndexSource.from_timegate_url('http://webenact.rhizome.org/all/') url = 'http://instagram.com/amaliaulman'
url = 'http://x-not-found-x.notfound/' res, errs = self.query_single_source(remote_source, dict(url=url, closest='20141014162332', limit=1))
res, errs = query_single_source(source, dict(url=url, limit=3))
expected = """\
com,instagram)/amaliaulman 20141014162333 http://webenact.rhizome.org/all/20141014162333id_/http://instagram.com/amaliaulman"""
assert(key_ts_res(res, 'load_url') == expected)
assert(errs == {})
# Url Match -- Wb Memento
def test_remote_closest_wb_memnto_loader(self):
replay = 'http://webenact.rhizome.org/all/{timestamp}id_/{url}'
source = WBMementoIndexSource(replay, '', replay)
url = 'http://instagram.com/amaliaulman'
res, errs = self.query_single_source(source, dict(url=url, closest='20141014162332', limit=1))
expected = """\
com,instagram)/amaliaulman 20141014162333 http://webenact.rhizome.org/all/20141014162333id_/http://instagram.com/amaliaulman"""
assert(key_ts_res(res, 'load_url') == expected)
assert(errs == {})
# Live Index -- No Load!
def test_live(self):
url = 'http://example.com/'
source = LiveIndexSource()
res, errs = self.query_single_source(source, dict(url=url))
expected = 'com,example)/ {0} http://example.com/'.format(timestamp_now())
assert(key_ts_res(res, 'load_url') == expected)
assert(errs == {})
# Errors -- Not Found All
def test_all_not_found(self, all_source):
url = 'http://x-not-found-x.notfound/'
res, errs = self.query_single_source(all_source, dict(url=url, limit=3))
expected = ''
assert(key_ts_res(res) == expected)
if all_source == self.all_sources[remote_sources[0]]:
assert('http%3A//x-not-found-x.notfound/' in errs['source'])
else:
assert(errs == {})
def test_another_remote_not_found(self):
source = MementoIndexSource.from_timegate_url('http://webenact.rhizome.org/all/')
url = 'http://x-not-found-x.notfound/'
res, errs = self.query_single_source(source, dict(url=url, limit=3))
expected = '' expected = ''
assert(key_ts_res(res) == expected) assert(key_ts_res(res) == expected)
assert(errs['source'] == "NotFoundException('http://webenact.rhizome.org/all/timemap/link/http://x-not-found-x.notfound/',)") assert(errs['source'] == "NotFoundException('http://webenact.rhizome.org/all/timemap/link/http://x-not-found-x.notfound/',)")
# ============================================================================ def test_file_not_found(self):
def test_file_not_found(): source = FileIndexSource('testdata/not-found-x')
source = FileIndexSource('testdata/not-found-x') url = 'http://x-not-found-x.notfound/'
url = 'http://x-not-found-x.notfound/' res, errs = self.query_single_source(source, dict(url=url, limit=3))
res, errs = query_single_source(source, dict(url=url, limit=3))
expected = '' expected = ''
assert(key_ts_res(res) == expected) assert(key_ts_res(res) == expected)
assert(errs['source'] == "NotFoundException('testdata/not-found-x',)"), errs assert(errs['source'] == "NotFoundException('testdata/not-found-x',)"), errs
def test_ait_filters(self):
ait_source = RemoteIndexSource('http://wayback.archive-it.org/cdx/search/cdx?url={url}&filter=filename:ARCHIVEIT-({colls})-.*',
'http://wayback.archive-it.org/all/{timestamp}id_/{url}')
cdxlist, errs = self.query_single_source(ait_source, {'url': 'http://iana.org/', 'param.source.colls': '5610|933'})
filenames = [cdx['filename'] for cdx in cdxlist]
prefix = ('ARCHIVEIT-5610-', 'ARCHIVEIT-933-')
assert(all([x.startswith(prefix) for x in filenames]))
# ============================================================================ cdxlist, errs = self.query_single_source(ait_source, {'url': 'http://iana.org/', 'param.source.colls': '1883|366|905'})
def test_ait_filters(): filenames = [cdx['filename'] for cdx in cdxlist]
ait_source = RemoteIndexSource('http://wayback.archive-it.org/cdx/search/cdx?url={url}&filter=filename:ARCHIVEIT-({colls})-.*',
'http://wayback.archive-it.org/all/{timestamp}id_/{url}')
cdxlist, errs = query_single_source(ait_source, {'url': 'http://iana.org/', 'param.source.colls': '5610|933'}) prefix = ('ARCHIVEIT-1883-', 'ARCHIVEIT-366-', 'ARCHIVEIT-905-')
filenames = [cdx['filename'] for cdx in cdxlist]
prefix = ('ARCHIVEIT-5610-', 'ARCHIVEIT-933-') assert(all([x.startswith(prefix) for x in filenames]))
assert(all([x.startswith(prefix) for x in filenames]))
cdxlist, errs = query_single_source(ait_source, {'url': 'http://iana.org/', 'param.source.colls': '1883|366|905'})
filenames = [cdx['filename'] for cdx in cdxlist]
prefix = ('ARCHIVEIT-1883-', 'ARCHIVEIT-366-', 'ARCHIVEIT-905-')
assert(all([x.startswith(prefix) for x in filenames]))

View File

@ -1,5 +1,6 @@
from pywb.warcserver.index.aggregator import RedisMultiKeyIndexSource from pywb.warcserver.index.aggregator import RedisMultiKeyIndexSource
from pywb.warcserver.test.testutils import to_path, to_json_list, FakeRedisTests, BaseTestClass, TEST_CDX_PATH from pywb.warcserver.test.testutils import to_path, to_json_list, FakeRedisTests, BaseTestClass, TEST_CDX_PATH
import pytest
class TestRedisAgg(FakeRedisTests, BaseTestClass): class TestRedisAgg(FakeRedisTests, BaseTestClass):
@ -9,10 +10,24 @@ class TestRedisAgg(FakeRedisTests, BaseTestClass):
cls.add_cdx_to_redis(TEST_CDX_PATH + 'example2.cdxj', 'FOO:example:cdxj') cls.add_cdx_to_redis(TEST_CDX_PATH + 'example2.cdxj', 'FOO:example:cdxj')
cls.add_cdx_to_redis(TEST_CDX_PATH + 'dupes.cdxj', 'FOO:dupes:cdxj') cls.add_cdx_to_redis(TEST_CDX_PATH + 'dupes.cdxj', 'FOO:dupes:cdxj')
cls.indexloader = RedisMultiKeyIndexSource('redis://localhost/2/{user}:{coll}:cdxj') # scan loader
cls.scan_loader = RedisMultiKeyIndexSource('redis://localhost/2/{user}:{coll}:cdxj')
def test_redis_agg_all(self): cls.redis.sadd('FOO:<all>:list', 'dupes')
res, errs = self.indexloader({'url': 'example.com/', 'param.user': 'FOO', 'param.coll': '*'}) cls.redis.sadd('FOO:<all>:list', 'example')
cls.member_list_loader = RedisMultiKeyIndexSource('redis://localhost/2/{user}:{coll}:cdxj',
member_key_templ='FOO:<all>:list')
@pytest.fixture(params=['scan', 'member-list'])
def indexloader(self, request):
if request.param == 'scan':
return self.scan_loader
else:
return self.member_list_loader
def test_redis_agg_all(self, indexloader):
res, errs = indexloader({'url': 'example.com/', 'param.user': 'FOO', 'param.coll': '*'})
exp = [ exp = [
{'source': 'FOO:dupes:cdxj', 'timestamp': '20140127171200', 'filename': 'dupes.warc.gz'}, {'source': 'FOO:dupes:cdxj', 'timestamp': '20140127171200', 'filename': 'dupes.warc.gz'},
@ -23,8 +38,8 @@ class TestRedisAgg(FakeRedisTests, BaseTestClass):
assert(errs == {}) assert(errs == {})
assert(to_json_list(res) == exp) assert(to_json_list(res) == exp)
def test_redis_agg_one(self): def test_redis_agg_one(self, indexloader):
res, errs = self.indexloader({'url': 'example.com/', 'param.user': 'FOO', 'param.coll': 'dupes'}) res, errs = indexloader({'url': 'example.com/', 'param.user': 'FOO', 'param.coll': 'dupes'})
exp = [ exp = [
{'source': 'FOO:dupes:cdxj', 'timestamp': '20140127171200', 'filename': 'dupes.warc.gz'}, {'source': 'FOO:dupes:cdxj', 'timestamp': '20140127171200', 'filename': 'dupes.warc.gz'},
@ -34,8 +49,8 @@ class TestRedisAgg(FakeRedisTests, BaseTestClass):
assert(errs == {}) assert(errs == {})
assert(to_json_list(res) == exp) assert(to_json_list(res) == exp)
def test_redis_not_found(self): def test_redis_not_found(self, indexloader):
res, errs = self.indexloader({'url': 'example.com/'}) res, errs = indexloader({'url': 'example.com/'})
exp = [] exp = []

View File

@ -20,99 +20,92 @@ class TimeoutFileSource(FileIndexSource):
time.sleep(self.timeout) time.sleep(self.timeout)
return super(TimeoutFileSource, self).load_index(params) return super(TimeoutFileSource, self).load_index(params)
TimeoutAggregator = GeventTimeoutAggregator
class TestTimeouts(object):
@classmethod
def setup_class(cls):
cls.sources = {'slow': TimeoutFileSource(TEST_CDX_PATH + 'example2.cdxj', 0.2),
'slower': TimeoutFileSource(TEST_CDX_PATH + 'dupes.cdxj', 0.5)
}
def test_timeout_long_all_pass(self):
agg = GeventTimeoutAggregator(self.sources, timeout=1.0)
def setup_module(): res, errs = agg(dict(url='http://example.com/'))
global sources
sources = {'slow': TimeoutFileSource(TEST_CDX_PATH + 'example2.cdxj', 0.2),
'slower': TimeoutFileSource(TEST_CDX_PATH + 'dupes.cdxj', 0.5)
}
exp = [{'source': 'slower', 'timestamp': '20140127171200'},
{'source': 'slower', 'timestamp': '20140127171251'},
{'source': 'slow', 'timestamp': '20160225042329'}]
assert(to_json_list(res, fields=['source', 'timestamp']) == exp)
def test_timeout_long_all_pass(): assert(errs == {})
agg = TimeoutAggregator(sources, timeout=1.0)
res, errs = agg(dict(url='http://example.com/')) def test_timeout_slower_skipped_1(self):
agg = GeventTimeoutAggregator(self.sources, timeout=0.40)
exp = [{'source': 'slower', 'timestamp': '20140127171200'}, res, errs = agg(dict(url='http://example.com/'))
{'source': 'slower', 'timestamp': '20140127171251'},
{'source': 'slow', 'timestamp': '20160225042329'}]
assert(to_json_list(res, fields=['source', 'timestamp']) == exp) exp = [{'source': 'slow', 'timestamp': '20160225042329'}]
assert(errs == {}) assert(to_json_list(res, fields=['source', 'timestamp']) == exp)
assert(errs == {'slower': 'timeout'})
def test_timeout_slower_skipped_1(): def test_timeout_slower_all_skipped(self):
agg = GeventTimeoutAggregator(sources, timeout=0.40) agg = GeventTimeoutAggregator(self.sources, timeout=0.10)
res, errs = agg(dict(url='http://example.com/')) res, errs = agg(dict(url='http://example.com/'))
exp = [{'source': 'slow', 'timestamp': '20160225042329'}] exp = []
assert(to_json_list(res, fields=['source', 'timestamp']) == exp) assert(to_json_list(res, fields=['source', 'timestamp']) == exp)
assert(errs == {'slower': 'timeout'}) assert(errs == {'slower': 'timeout', 'slow': 'timeout'})
def test_timeout_skipping(self):
assert(self.sources['slow'].calls == 3)
assert(self.sources['slower'].calls == 3)
def test_timeout_slower_all_skipped(): agg = GeventTimeoutAggregator(self.sources, timeout=0.40,
agg = GeventTimeoutAggregator(sources, timeout=0.10) t_count=2, t_duration=1.0)
res, errs = agg(dict(url='http://example.com/')) exp = [{'source': 'slow', 'timestamp': '20160225042329'}]
exp = [] res, errs = agg(dict(url='http://example.com/'))
assert(to_json_list(res, fields=['source', 'timestamp']) == exp)
assert(self.sources['slow'].calls == 4)
assert(self.sources['slower'].calls == 4)
assert(to_json_list(res, fields=['source', 'timestamp']) == exp) assert(errs == {'slower': 'timeout'})
assert(errs == {'slower': 'timeout', 'slow': 'timeout'}) res, errs = agg(dict(url='http://example.com/'))
assert(to_json_list(res, fields=['source', 'timestamp']) == exp)
assert(self.sources['slow'].calls == 5)
assert(self.sources['slower'].calls == 5)
assert(errs == {'slower': 'timeout'})
def test_timeout_skipping(): res, errs = agg(dict(url='http://example.com/'))
assert(sources['slow'].calls == 3) assert(to_json_list(res, fields=['source', 'timestamp']) == exp)
assert(sources['slower'].calls == 3) assert(self.sources['slow'].calls == 6)
assert(self.sources['slower'].calls == 5)
agg = GeventTimeoutAggregator(sources, timeout=0.40, assert(errs == {})
t_count=2, t_duration=1.0)
exp = [{'source': 'slow', 'timestamp': '20160225042329'}] res, errs = agg(dict(url='http://example.com/'))
assert(to_json_list(res, fields=['source', 'timestamp']) == exp)
assert(self.sources['slow'].calls == 7)
assert(self.sources['slower'].calls == 5)
res, errs = agg(dict(url='http://example.com/')) assert(errs == {})
assert(to_json_list(res, fields=['source', 'timestamp']) == exp)
assert(sources['slow'].calls == 4)
assert(sources['slower'].calls == 4)
assert(errs == {'slower': 'timeout'}) time.sleep(1.5)
res, errs = agg(dict(url='http://example.com/')) res, errs = agg(dict(url='http://example.com/'))
assert(to_json_list(res, fields=['source', 'timestamp']) == exp) assert(to_json_list(res, fields=['source', 'timestamp']) == exp)
assert(sources['slow'].calls == 5) assert(self.sources['slow'].calls == 8)
assert(sources['slower'].calls == 5) assert(self.sources['slower'].calls == 6)
assert(errs == {'slower': 'timeout'}) assert(errs == {'slower': 'timeout'})
res, errs = agg(dict(url='http://example.com/'))
assert(to_json_list(res, fields=['source', 'timestamp']) == exp)
assert(sources['slow'].calls == 6)
assert(sources['slower'].calls == 5)
assert(errs == {})
res, errs = agg(dict(url='http://example.com/'))
assert(to_json_list(res, fields=['source', 'timestamp']) == exp)
assert(sources['slow'].calls == 7)
assert(sources['slower'].calls == 5)
assert(errs == {})
time.sleep(1.5)
res, errs = agg(dict(url='http://example.com/'))
assert(to_json_list(res, fields=['source', 'timestamp']) == exp)
assert(sources['slow'].calls == 8)
assert(sources['slower'].calls == 6)
assert(errs == {'slower': 'timeout'})

View File

@ -125,9 +125,3 @@ class TestWarcServer(TempDirTests, BaseTestClass):
assert len(sources) == 1 assert len(sources) == 1
assert isinstance(sources['live'], LiveIndexSource) assert isinstance(sources['live'], LiveIndexSource)

View File

@ -62,22 +62,23 @@ class FakeStrictRedisSharedPubSub(FakeStrictRedis):
# ============================================================================ # ============================================================================
class FakeRedisTests(object): class FakeRedisTests(object):
@classmethod @classmethod
def setup_class(cls): def setup_class(cls, redis_url='redis://localhost:6379/2'):
super(FakeRedisTests, cls).setup_class() super(FakeRedisTests, cls).setup_class()
cls.redismock = patch('redis.StrictRedis', FakeStrictRedisSharedPubSub) cls.redismock = patch('redis.StrictRedis', FakeStrictRedisSharedPubSub)
cls.redismock.start() cls.redismock.start()
@staticmethod cls.redis = FakeStrictRedis.from_url(redis_url)
def add_cdx_to_redis(filename, key, redis_url='redis://localhost:6379/2'):
r = FakeStrictRedis.from_url(redis_url) @classmethod
def add_cdx_to_redis(cls, filename, key):
with open(filename, 'rb') as fh: with open(filename, 'rb') as fh:
for line in fh: for line in fh:
r.zadd(key, 0, line.rstrip()) cls.redis.zadd(key, 0, line.rstrip())
@classmethod @classmethod
def teardown_class(cls): def teardown_class(cls):
super(FakeRedisTests, cls).teardown_class() super(FakeRedisTests, cls).teardown_class()
FakeStrictRedis().flushall() cls.redis.flushall()
cls.redismock.stop() cls.redismock.stop()