mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 08:04:49 +01:00
misc test improvements:
- add tests for WBMementoIndexSource, member-list based RedisIndexSource - convert redis aggregator and index source tests to use testutils BaseTestClass system - rename configwarcserver -> warcserver
This commit is contained in:
parent
496defda42
commit
c6d196c9fe
@ -522,7 +522,7 @@ class WBMementoIndexSource(MementoIndexSource):
|
||||
|
||||
def handle_timegate(self, params, timestamp):
|
||||
url = params['url']
|
||||
load_url = self.timegate_url.format(url=url, timestamp=ts)
|
||||
load_url = self.timegate_url.format(url=url, timestamp=timestamp)
|
||||
|
||||
try:
|
||||
headers = self._get_headers(params)
|
||||
|
@ -1,219 +1,196 @@
|
||||
from pywb.warcserver.index.indexsource import FileIndexSource, RemoteIndexSource, MementoIndexSource, RedisIndexSource
|
||||
from pywb.warcserver.index.indexsource import LiveIndexSource
|
||||
from pywb.warcserver.index.indexsource import LiveIndexSource, WBMementoIndexSource
|
||||
|
||||
from pywb.warcserver.index.aggregator import SimpleAggregator
|
||||
|
||||
from warcio.timeutils import timestamp_now
|
||||
|
||||
from pywb.warcserver.test.testutils import key_ts_res, TEST_CDX_PATH
|
||||
from pywb.warcserver.test.testutils import key_ts_res, TEST_CDX_PATH, FakeRedisTests, BaseTestClass
|
||||
|
||||
import pytest
|
||||
import os
|
||||
|
||||
from fakeredis import FakeStrictRedis
|
||||
from mock import patch
|
||||
|
||||
redismock = patch('redis.StrictRedis', FakeStrictRedis)
|
||||
redismock.start()
|
||||
|
||||
def setup_module():
|
||||
r = FakeStrictRedis.from_url('redis://localhost:6379/2')
|
||||
r.delete('test:rediscdx')
|
||||
with open(TEST_CDX_PATH + 'iana.cdxj', 'rb') as fh:
|
||||
for line in fh:
|
||||
r.zadd('test:rediscdx', 0, line.rstrip())
|
||||
local_sources = ['file', 'redis']
|
||||
remote_sources = ['remote_cdx', 'memento']
|
||||
all_sources = local_sources + remote_sources
|
||||
|
||||
|
||||
def teardown_module():
|
||||
redismock.stop()
|
||||
|
||||
|
||||
local_sources = [
|
||||
FileIndexSource(TEST_CDX_PATH + 'iana.cdxj'),
|
||||
RedisIndexSource('redis://localhost:6379/2/test:rediscdx')
|
||||
]
|
||||
|
||||
|
||||
remote_sources = [
|
||||
RemoteIndexSource('http://webenact.rhizome.org/all-cdx?url={url}',
|
||||
'http://webenact.rhizome.org/all/{timestamp}id_/{url}'),
|
||||
|
||||
MementoIndexSource('http://webenact.rhizome.org/all/{url}',
|
||||
'http://webenact.rhizome.org/all/timemap/*/{url}',
|
||||
'http://webenact.rhizome.org/all/{timestamp}id_/{url}')
|
||||
]
|
||||
|
||||
ait_source = RemoteIndexSource('http://wayback.archive-it.org/cdx?url={url}',
|
||||
'http://wayback.archive-it.org/all/{timestamp}id_/{url}')
|
||||
|
||||
|
||||
def query_single_source(source, params):
|
||||
string = str(source)
|
||||
return SimpleAggregator({'source': source})(params)
|
||||
|
||||
|
||||
|
||||
# Url Match -- Local Loaders
|
||||
# ============================================================================
|
||||
@pytest.mark.parametrize("source", local_sources, ids=["file", "redis"])
|
||||
def test_local_cdxj_loader(source):
|
||||
url = 'http://www.iana.org/_css/2013.1/fonts/Inconsolata.otf'
|
||||
res, errs = query_single_source(source, dict(url=url, limit=3))
|
||||
class TestIndexSources(FakeRedisTests, BaseTestClass):
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
super(TestIndexSources, cls).setup_class()
|
||||
cls.add_cdx_to_redis(TEST_CDX_PATH + 'iana.cdxj', 'test:rediscdx')
|
||||
|
||||
expected = """\
|
||||
cls.all_sources = {
|
||||
'file': FileIndexSource(TEST_CDX_PATH + 'iana.cdxj'),
|
||||
'redis': RedisIndexSource('redis://localhost:6379/2/test:rediscdx'),
|
||||
'remote_cdx': RemoteIndexSource('http://webenact.rhizome.org/all-cdx?url={url}',
|
||||
'http://webenact.rhizome.org/all/{timestamp}id_/{url}'),
|
||||
|
||||
'memento': MementoIndexSource('http://webenact.rhizome.org/all/{url}',
|
||||
'http://webenact.rhizome.org/all/timemap/*/{url}',
|
||||
'http://webenact.rhizome.org/all/{timestamp}id_/{url}')
|
||||
}
|
||||
|
||||
@pytest.fixture(params=local_sources)
|
||||
def local_source(self, request):
|
||||
return self.all_sources[request.param]
|
||||
|
||||
@pytest.fixture(params=remote_sources)
|
||||
def remote_source(self, request):
|
||||
return self.all_sources[request.param]
|
||||
|
||||
@pytest.fixture(params=all_sources)
|
||||
def all_source(self, request):
|
||||
return self.all_sources[request.param]
|
||||
|
||||
@staticmethod
|
||||
def query_single_source(source, params):
|
||||
string = str(source)
|
||||
return SimpleAggregator({'source': source})(params)
|
||||
|
||||
# Url Match -- Local Loaders
|
||||
def test_local_cdxj_loader(self, local_source):
|
||||
url = 'http://www.iana.org/_css/2013.1/fonts/Inconsolata.otf'
|
||||
res, errs = self.query_single_source(local_source, dict(url=url, limit=3))
|
||||
|
||||
expected = """\
|
||||
org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126200826 iana.warc.gz
|
||||
org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126200912 iana.warc.gz
|
||||
org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126200930 iana.warc.gz"""
|
||||
|
||||
assert(key_ts_res(res) == expected)
|
||||
assert(errs == {})
|
||||
assert(key_ts_res(res) == expected)
|
||||
assert(errs == {})
|
||||
|
||||
|
||||
# Closest -- Local Loaders
|
||||
# ============================================================================
|
||||
@pytest.mark.parametrize("source", local_sources, ids=["file", "redis"])
|
||||
def test_local_closest_loader(source):
|
||||
url = 'http://www.iana.org/_css/2013.1/fonts/Inconsolata.otf'
|
||||
res, errs = query_single_source(source, dict(url=url,
|
||||
closest='20140126200930',
|
||||
limit=3))
|
||||
# Closest -- Local Loaders
|
||||
def test_local_closest_loader(self, local_source):
|
||||
url = 'http://www.iana.org/_css/2013.1/fonts/Inconsolata.otf'
|
||||
res, errs = self.query_single_source(local_source, dict(url=url,
|
||||
closest='20140126200930',
|
||||
limit=3))
|
||||
|
||||
expected = """\
|
||||
expected = """\
|
||||
org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126200930 iana.warc.gz
|
||||
org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126200912 iana.warc.gz
|
||||
org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126200826 iana.warc.gz"""
|
||||
|
||||
assert(key_ts_res(res) == expected)
|
||||
assert(errs == {})
|
||||
assert(key_ts_res(res) == expected)
|
||||
assert(errs == {})
|
||||
|
||||
|
||||
# Prefix -- Local Loaders
|
||||
# ============================================================================
|
||||
@pytest.mark.parametrize("source", local_sources, ids=["file", "redis"])
|
||||
def test_file_prefix_loader(source):
|
||||
res, errs = query_single_source(source, dict(url='http://iana.org/domains/root/*'))
|
||||
# Prefix -- Local Loaders
|
||||
def test_file_prefix_loader(self, local_source):
|
||||
res, errs = self.query_single_source(local_source, dict(url='http://iana.org/domains/root/*'))
|
||||
|
||||
expected = """\
|
||||
expected = """\
|
||||
org,iana)/domains/root/db 20140126200927 iana.warc.gz
|
||||
org,iana)/domains/root/db 20140126200928 iana.warc.gz
|
||||
org,iana)/domains/root/servers 20140126201227 iana.warc.gz"""
|
||||
|
||||
assert(key_ts_res(res) == expected)
|
||||
assert(errs == {})
|
||||
assert(key_ts_res(res) == expected)
|
||||
assert(errs == {})
|
||||
|
||||
# Url Match -- Remote Loaders
|
||||
def test_remote_loader(self, remote_source):
|
||||
url = 'http://instagram.com/amaliaulman'
|
||||
res, errs = self.query_single_source(remote_source, dict(url=url))
|
||||
|
||||
# Url Match -- Remote Loaders
|
||||
# ============================================================================
|
||||
@pytest.mark.parametrize("source", remote_sources, ids=["remote_cdx", "memento"])
|
||||
def test_remote_loader(source):
|
||||
url = 'http://instagram.com/amaliaulman'
|
||||
res, errs = query_single_source(source, dict(url=url))
|
||||
|
||||
expected = """\
|
||||
expected = """\
|
||||
com,instagram)/amaliaulman 20141014150552 http://webenact.rhizome.org/all/20141014150552id_/http://instagram.com/amaliaulman
|
||||
com,instagram)/amaliaulman 20141014155217 http://webenact.rhizome.org/all/20141014155217id_/http://instagram.com/amaliaulman
|
||||
com,instagram)/amaliaulman 20141014162333 http://webenact.rhizome.org/all/20141014162333id_/http://instagram.com/amaliaulman
|
||||
com,instagram)/amaliaulman 20141014171636 http://webenact.rhizome.org/all/20141014171636id_/http://instagram.com/amaliaulman"""
|
||||
|
||||
assert(key_ts_res(res, 'load_url') == expected)
|
||||
assert(errs == {})
|
||||
|
||||
|
||||
# Url Match -- Remote Loaders
|
||||
# ============================================================================
|
||||
@pytest.mark.parametrize("source", remote_sources, ids=["remote_cdx", "memento"])
|
||||
def test_remote_closest_loader(source):
|
||||
url = 'http://instagram.com/amaliaulman'
|
||||
res, errs = query_single_source(source, dict(url=url, closest='20141014162332', limit=1))
|
||||
|
||||
expected = """\
|
||||
com,instagram)/amaliaulman 20141014162333 http://webenact.rhizome.org/all/20141014162333id_/http://instagram.com/amaliaulman"""
|
||||
|
||||
assert(key_ts_res(res, 'load_url') == expected)
|
||||
assert(errs == {})
|
||||
|
||||
|
||||
# Url Match -- Memento
|
||||
# ============================================================================
|
||||
@pytest.mark.parametrize("source", remote_sources, ids=["remote_cdx", "memento"])
|
||||
def test_remote_closest_loader(source):
|
||||
url = 'http://instagram.com/amaliaulman'
|
||||
res, errs = query_single_source(source, dict(url=url, closest='20141014162332', limit=1))
|
||||
|
||||
expected = """\
|
||||
com,instagram)/amaliaulman 20141014162333 http://webenact.rhizome.org/all/20141014162333id_/http://instagram.com/amaliaulman"""
|
||||
|
||||
assert(key_ts_res(res, 'load_url') == expected)
|
||||
assert(errs == {})
|
||||
|
||||
|
||||
# Live Index -- No Load!
|
||||
# ============================================================================
|
||||
def test_live():
|
||||
url = 'http://example.com/'
|
||||
source = LiveIndexSource()
|
||||
res, errs = query_single_source(source, dict(url=url))
|
||||
|
||||
expected = 'com,example)/ {0} http://example.com/'.format(timestamp_now())
|
||||
|
||||
assert(key_ts_res(res, 'load_url') == expected)
|
||||
assert(errs == {})
|
||||
|
||||
|
||||
# Errors -- Not Found All
|
||||
# ============================================================================
|
||||
@pytest.mark.parametrize("source", local_sources + remote_sources, ids=["file", "redis", "remote_cdx", "memento"])
|
||||
def test_all_not_found(source):
|
||||
url = 'http://x-not-found-x.notfound/'
|
||||
res, errs = query_single_source(source, dict(url=url, limit=3))
|
||||
|
||||
expected = ''
|
||||
assert(key_ts_res(res) == expected)
|
||||
if source == remote_sources[0]:
|
||||
assert('http%3A//x-not-found-x.notfound/' in errs['source'])
|
||||
else:
|
||||
assert(key_ts_res(res, 'load_url') == expected)
|
||||
assert(errs == {})
|
||||
|
||||
|
||||
# ============================================================================
|
||||
def test_another_remote_not_found():
|
||||
source = MementoIndexSource.from_timegate_url('http://webenact.rhizome.org/all/')
|
||||
url = 'http://x-not-found-x.notfound/'
|
||||
res, errs = query_single_source(source, dict(url=url, limit=3))
|
||||
# Url Match -- Remote Loaders Closest
|
||||
def test_remote_closest_loader(self, remote_source):
|
||||
url = 'http://instagram.com/amaliaulman'
|
||||
res, errs = self.query_single_source(remote_source, dict(url=url, closest='20141014162332', limit=1))
|
||||
|
||||
expected = """\
|
||||
com,instagram)/amaliaulman 20141014162333 http://webenact.rhizome.org/all/20141014162333id_/http://instagram.com/amaliaulman"""
|
||||
|
||||
assert(key_ts_res(res, 'load_url') == expected)
|
||||
assert(errs == {})
|
||||
|
||||
# Url Match -- Wb Memento
|
||||
def test_remote_closest_wb_memnto_loader(self):
|
||||
replay = 'http://webenact.rhizome.org/all/{timestamp}id_/{url}'
|
||||
source = WBMementoIndexSource(replay, '', replay)
|
||||
|
||||
url = 'http://instagram.com/amaliaulman'
|
||||
res, errs = self.query_single_source(source, dict(url=url, closest='20141014162332', limit=1))
|
||||
|
||||
expected = """\
|
||||
com,instagram)/amaliaulman 20141014162333 http://webenact.rhizome.org/all/20141014162333id_/http://instagram.com/amaliaulman"""
|
||||
|
||||
assert(key_ts_res(res, 'load_url') == expected)
|
||||
assert(errs == {})
|
||||
|
||||
# Live Index -- No Load!
|
||||
def test_live(self):
|
||||
url = 'http://example.com/'
|
||||
source = LiveIndexSource()
|
||||
res, errs = self.query_single_source(source, dict(url=url))
|
||||
|
||||
expected = 'com,example)/ {0} http://example.com/'.format(timestamp_now())
|
||||
|
||||
assert(key_ts_res(res, 'load_url') == expected)
|
||||
assert(errs == {})
|
||||
|
||||
# Errors -- Not Found All
|
||||
def test_all_not_found(self, all_source):
|
||||
url = 'http://x-not-found-x.notfound/'
|
||||
res, errs = self.query_single_source(all_source, dict(url=url, limit=3))
|
||||
|
||||
expected = ''
|
||||
assert(key_ts_res(res) == expected)
|
||||
if all_source == self.all_sources[remote_sources[0]]:
|
||||
assert('http%3A//x-not-found-x.notfound/' in errs['source'])
|
||||
else:
|
||||
assert(errs == {})
|
||||
|
||||
def test_another_remote_not_found(self):
|
||||
source = MementoIndexSource.from_timegate_url('http://webenact.rhizome.org/all/')
|
||||
url = 'http://x-not-found-x.notfound/'
|
||||
res, errs = self.query_single_source(source, dict(url=url, limit=3))
|
||||
|
||||
|
||||
expected = ''
|
||||
assert(key_ts_res(res) == expected)
|
||||
assert(errs['source'] == "NotFoundException('http://webenact.rhizome.org/all/timemap/link/http://x-not-found-x.notfound/',)")
|
||||
expected = ''
|
||||
assert(key_ts_res(res) == expected)
|
||||
assert(errs['source'] == "NotFoundException('http://webenact.rhizome.org/all/timemap/link/http://x-not-found-x.notfound/',)")
|
||||
|
||||
# ============================================================================
|
||||
def test_file_not_found():
|
||||
source = FileIndexSource('testdata/not-found-x')
|
||||
url = 'http://x-not-found-x.notfound/'
|
||||
res, errs = query_single_source(source, dict(url=url, limit=3))
|
||||
def test_file_not_found(self):
|
||||
source = FileIndexSource('testdata/not-found-x')
|
||||
url = 'http://x-not-found-x.notfound/'
|
||||
res, errs = self.query_single_source(source, dict(url=url, limit=3))
|
||||
|
||||
expected = ''
|
||||
assert(key_ts_res(res) == expected)
|
||||
assert(errs['source'] == "NotFoundException('testdata/not-found-x',)"), errs
|
||||
expected = ''
|
||||
assert(key_ts_res(res) == expected)
|
||||
assert(errs['source'] == "NotFoundException('testdata/not-found-x',)"), errs
|
||||
|
||||
def test_ait_filters(self):
|
||||
ait_source = RemoteIndexSource('http://wayback.archive-it.org/cdx/search/cdx?url={url}&filter=filename:ARCHIVEIT-({colls})-.*',
|
||||
'http://wayback.archive-it.org/all/{timestamp}id_/{url}')
|
||||
|
||||
cdxlist, errs = self.query_single_source(ait_source, {'url': 'http://iana.org/', 'param.source.colls': '5610|933'})
|
||||
filenames = [cdx['filename'] for cdx in cdxlist]
|
||||
|
||||
prefix = ('ARCHIVEIT-5610-', 'ARCHIVEIT-933-')
|
||||
|
||||
assert(all([x.startswith(prefix) for x in filenames]))
|
||||
|
||||
|
||||
# ============================================================================
|
||||
def test_ait_filters():
|
||||
ait_source = RemoteIndexSource('http://wayback.archive-it.org/cdx/search/cdx?url={url}&filter=filename:ARCHIVEIT-({colls})-.*',
|
||||
'http://wayback.archive-it.org/all/{timestamp}id_/{url}')
|
||||
cdxlist, errs = self.query_single_source(ait_source, {'url': 'http://iana.org/', 'param.source.colls': '1883|366|905'})
|
||||
filenames = [cdx['filename'] for cdx in cdxlist]
|
||||
|
||||
cdxlist, errs = query_single_source(ait_source, {'url': 'http://iana.org/', 'param.source.colls': '5610|933'})
|
||||
filenames = [cdx['filename'] for cdx in cdxlist]
|
||||
prefix = ('ARCHIVEIT-1883-', 'ARCHIVEIT-366-', 'ARCHIVEIT-905-')
|
||||
|
||||
prefix = ('ARCHIVEIT-5610-', 'ARCHIVEIT-933-')
|
||||
|
||||
assert(all([x.startswith(prefix) for x in filenames]))
|
||||
|
||||
|
||||
cdxlist, errs = query_single_source(ait_source, {'url': 'http://iana.org/', 'param.source.colls': '1883|366|905'})
|
||||
filenames = [cdx['filename'] for cdx in cdxlist]
|
||||
|
||||
prefix = ('ARCHIVEIT-1883-', 'ARCHIVEIT-366-', 'ARCHIVEIT-905-')
|
||||
|
||||
assert(all([x.startswith(prefix) for x in filenames]))
|
||||
assert(all([x.startswith(prefix) for x in filenames]))
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
from pywb.warcserver.index.aggregator import RedisMultiKeyIndexSource
|
||||
from pywb.warcserver.test.testutils import to_path, to_json_list, FakeRedisTests, BaseTestClass, TEST_CDX_PATH
|
||||
import pytest
|
||||
|
||||
|
||||
class TestRedisAgg(FakeRedisTests, BaseTestClass):
|
||||
@ -9,10 +10,24 @@ class TestRedisAgg(FakeRedisTests, BaseTestClass):
|
||||
cls.add_cdx_to_redis(TEST_CDX_PATH + 'example2.cdxj', 'FOO:example:cdxj')
|
||||
cls.add_cdx_to_redis(TEST_CDX_PATH + 'dupes.cdxj', 'FOO:dupes:cdxj')
|
||||
|
||||
cls.indexloader = RedisMultiKeyIndexSource('redis://localhost/2/{user}:{coll}:cdxj')
|
||||
# scan loader
|
||||
cls.scan_loader = RedisMultiKeyIndexSource('redis://localhost/2/{user}:{coll}:cdxj')
|
||||
|
||||
def test_redis_agg_all(self):
|
||||
res, errs = self.indexloader({'url': 'example.com/', 'param.user': 'FOO', 'param.coll': '*'})
|
||||
cls.redis.sadd('FOO:<all>:list', 'dupes')
|
||||
cls.redis.sadd('FOO:<all>:list', 'example')
|
||||
|
||||
cls.member_list_loader = RedisMultiKeyIndexSource('redis://localhost/2/{user}:{coll}:cdxj',
|
||||
member_key_templ='FOO:<all>:list')
|
||||
|
||||
@pytest.fixture(params=['scan', 'member-list'])
|
||||
def indexloader(self, request):
|
||||
if request.param == 'scan':
|
||||
return self.scan_loader
|
||||
else:
|
||||
return self.member_list_loader
|
||||
|
||||
def test_redis_agg_all(self, indexloader):
|
||||
res, errs = indexloader({'url': 'example.com/', 'param.user': 'FOO', 'param.coll': '*'})
|
||||
|
||||
exp = [
|
||||
{'source': 'FOO:dupes:cdxj', 'timestamp': '20140127171200', 'filename': 'dupes.warc.gz'},
|
||||
@ -23,8 +38,8 @@ class TestRedisAgg(FakeRedisTests, BaseTestClass):
|
||||
assert(errs == {})
|
||||
assert(to_json_list(res) == exp)
|
||||
|
||||
def test_redis_agg_one(self):
|
||||
res, errs = self.indexloader({'url': 'example.com/', 'param.user': 'FOO', 'param.coll': 'dupes'})
|
||||
def test_redis_agg_one(self, indexloader):
|
||||
res, errs = indexloader({'url': 'example.com/', 'param.user': 'FOO', 'param.coll': 'dupes'})
|
||||
|
||||
exp = [
|
||||
{'source': 'FOO:dupes:cdxj', 'timestamp': '20140127171200', 'filename': 'dupes.warc.gz'},
|
||||
@ -34,8 +49,8 @@ class TestRedisAgg(FakeRedisTests, BaseTestClass):
|
||||
assert(errs == {})
|
||||
assert(to_json_list(res) == exp)
|
||||
|
||||
def test_redis_not_found(self):
|
||||
res, errs = self.indexloader({'url': 'example.com/'})
|
||||
def test_redis_not_found(self, indexloader):
|
||||
res, errs = indexloader({'url': 'example.com/'})
|
||||
|
||||
exp = []
|
||||
|
||||
|
@ -20,99 +20,92 @@ class TimeoutFileSource(FileIndexSource):
|
||||
time.sleep(self.timeout)
|
||||
return super(TimeoutFileSource, self).load_index(params)
|
||||
|
||||
TimeoutAggregator = GeventTimeoutAggregator
|
||||
|
||||
class TestTimeouts(object):
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
cls.sources = {'slow': TimeoutFileSource(TEST_CDX_PATH + 'example2.cdxj', 0.2),
|
||||
'slower': TimeoutFileSource(TEST_CDX_PATH + 'dupes.cdxj', 0.5)
|
||||
}
|
||||
|
||||
def test_timeout_long_all_pass(self):
|
||||
agg = GeventTimeoutAggregator(self.sources, timeout=1.0)
|
||||
|
||||
def setup_module():
|
||||
global sources
|
||||
sources = {'slow': TimeoutFileSource(TEST_CDX_PATH + 'example2.cdxj', 0.2),
|
||||
'slower': TimeoutFileSource(TEST_CDX_PATH + 'dupes.cdxj', 0.5)
|
||||
}
|
||||
res, errs = agg(dict(url='http://example.com/'))
|
||||
|
||||
exp = [{'source': 'slower', 'timestamp': '20140127171200'},
|
||||
{'source': 'slower', 'timestamp': '20140127171251'},
|
||||
{'source': 'slow', 'timestamp': '20160225042329'}]
|
||||
|
||||
assert(to_json_list(res, fields=['source', 'timestamp']) == exp)
|
||||
|
||||
def test_timeout_long_all_pass():
|
||||
agg = TimeoutAggregator(sources, timeout=1.0)
|
||||
assert(errs == {})
|
||||
|
||||
res, errs = agg(dict(url='http://example.com/'))
|
||||
def test_timeout_slower_skipped_1(self):
|
||||
agg = GeventTimeoutAggregator(self.sources, timeout=0.40)
|
||||
|
||||
exp = [{'source': 'slower', 'timestamp': '20140127171200'},
|
||||
{'source': 'slower', 'timestamp': '20140127171251'},
|
||||
{'source': 'slow', 'timestamp': '20160225042329'}]
|
||||
res, errs = agg(dict(url='http://example.com/'))
|
||||
|
||||
assert(to_json_list(res, fields=['source', 'timestamp']) == exp)
|
||||
exp = [{'source': 'slow', 'timestamp': '20160225042329'}]
|
||||
|
||||
assert(errs == {})
|
||||
assert(to_json_list(res, fields=['source', 'timestamp']) == exp)
|
||||
|
||||
assert(errs == {'slower': 'timeout'})
|
||||
|
||||
def test_timeout_slower_skipped_1():
|
||||
agg = GeventTimeoutAggregator(sources, timeout=0.40)
|
||||
def test_timeout_slower_all_skipped(self):
|
||||
agg = GeventTimeoutAggregator(self.sources, timeout=0.10)
|
||||
|
||||
res, errs = agg(dict(url='http://example.com/'))
|
||||
res, errs = agg(dict(url='http://example.com/'))
|
||||
|
||||
exp = [{'source': 'slow', 'timestamp': '20160225042329'}]
|
||||
exp = []
|
||||
|
||||
assert(to_json_list(res, fields=['source', 'timestamp']) == exp)
|
||||
assert(to_json_list(res, fields=['source', 'timestamp']) == exp)
|
||||
|
||||
assert(errs == {'slower': 'timeout'})
|
||||
assert(errs == {'slower': 'timeout', 'slow': 'timeout'})
|
||||
|
||||
def test_timeout_skipping(self):
|
||||
assert(self.sources['slow'].calls == 3)
|
||||
assert(self.sources['slower'].calls == 3)
|
||||
|
||||
def test_timeout_slower_all_skipped():
|
||||
agg = GeventTimeoutAggregator(sources, timeout=0.10)
|
||||
agg = GeventTimeoutAggregator(self.sources, timeout=0.40,
|
||||
t_count=2, t_duration=1.0)
|
||||
|
||||
res, errs = agg(dict(url='http://example.com/'))
|
||||
exp = [{'source': 'slow', 'timestamp': '20160225042329'}]
|
||||
|
||||
exp = []
|
||||
res, errs = agg(dict(url='http://example.com/'))
|
||||
assert(to_json_list(res, fields=['source', 'timestamp']) == exp)
|
||||
assert(self.sources['slow'].calls == 4)
|
||||
assert(self.sources['slower'].calls == 4)
|
||||
|
||||
assert(to_json_list(res, fields=['source', 'timestamp']) == exp)
|
||||
assert(errs == {'slower': 'timeout'})
|
||||
|
||||
assert(errs == {'slower': 'timeout', 'slow': 'timeout'})
|
||||
res, errs = agg(dict(url='http://example.com/'))
|
||||
assert(to_json_list(res, fields=['source', 'timestamp']) == exp)
|
||||
assert(self.sources['slow'].calls == 5)
|
||||
assert(self.sources['slower'].calls == 5)
|
||||
|
||||
assert(errs == {'slower': 'timeout'})
|
||||
|
||||
def test_timeout_skipping():
|
||||
assert(sources['slow'].calls == 3)
|
||||
assert(sources['slower'].calls == 3)
|
||||
res, errs = agg(dict(url='http://example.com/'))
|
||||
assert(to_json_list(res, fields=['source', 'timestamp']) == exp)
|
||||
assert(self.sources['slow'].calls == 6)
|
||||
assert(self.sources['slower'].calls == 5)
|
||||
|
||||
agg = GeventTimeoutAggregator(sources, timeout=0.40,
|
||||
t_count=2, t_duration=1.0)
|
||||
assert(errs == {})
|
||||
|
||||
exp = [{'source': 'slow', 'timestamp': '20160225042329'}]
|
||||
res, errs = agg(dict(url='http://example.com/'))
|
||||
assert(to_json_list(res, fields=['source', 'timestamp']) == exp)
|
||||
assert(self.sources['slow'].calls == 7)
|
||||
assert(self.sources['slower'].calls == 5)
|
||||
|
||||
res, errs = agg(dict(url='http://example.com/'))
|
||||
assert(to_json_list(res, fields=['source', 'timestamp']) == exp)
|
||||
assert(sources['slow'].calls == 4)
|
||||
assert(sources['slower'].calls == 4)
|
||||
assert(errs == {})
|
||||
|
||||
assert(errs == {'slower': 'timeout'})
|
||||
time.sleep(1.5)
|
||||
|
||||
res, errs = agg(dict(url='http://example.com/'))
|
||||
assert(to_json_list(res, fields=['source', 'timestamp']) == exp)
|
||||
assert(sources['slow'].calls == 5)
|
||||
assert(sources['slower'].calls == 5)
|
||||
res, errs = agg(dict(url='http://example.com/'))
|
||||
assert(to_json_list(res, fields=['source', 'timestamp']) == exp)
|
||||
assert(self.sources['slow'].calls == 8)
|
||||
assert(self.sources['slower'].calls == 6)
|
||||
|
||||
assert(errs == {'slower': 'timeout'})
|
||||
|
||||
res, errs = agg(dict(url='http://example.com/'))
|
||||
assert(to_json_list(res, fields=['source', 'timestamp']) == exp)
|
||||
assert(sources['slow'].calls == 6)
|
||||
assert(sources['slower'].calls == 5)
|
||||
|
||||
assert(errs == {})
|
||||
|
||||
res, errs = agg(dict(url='http://example.com/'))
|
||||
assert(to_json_list(res, fields=['source', 'timestamp']) == exp)
|
||||
assert(sources['slow'].calls == 7)
|
||||
assert(sources['slower'].calls == 5)
|
||||
|
||||
assert(errs == {})
|
||||
|
||||
time.sleep(1.5)
|
||||
|
||||
res, errs = agg(dict(url='http://example.com/'))
|
||||
assert(to_json_list(res, fields=['source', 'timestamp']) == exp)
|
||||
assert(sources['slow'].calls == 8)
|
||||
assert(sources['slower'].calls == 6)
|
||||
|
||||
assert(errs == {'slower': 'timeout'})
|
||||
assert(errs == {'slower': 'timeout'})
|
||||
|
||||
|
@ -125,9 +125,3 @@ class TestWarcServer(TempDirTests, BaseTestClass):
|
||||
assert len(sources) == 1
|
||||
assert isinstance(sources['live'], LiveIndexSource)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -62,22 +62,23 @@ class FakeStrictRedisSharedPubSub(FakeStrictRedis):
|
||||
# ============================================================================
|
||||
class FakeRedisTests(object):
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
def setup_class(cls, redis_url='redis://localhost:6379/2'):
|
||||
super(FakeRedisTests, cls).setup_class()
|
||||
cls.redismock = patch('redis.StrictRedis', FakeStrictRedisSharedPubSub)
|
||||
cls.redismock.start()
|
||||
|
||||
@staticmethod
|
||||
def add_cdx_to_redis(filename, key, redis_url='redis://localhost:6379/2'):
|
||||
r = FakeStrictRedis.from_url(redis_url)
|
||||
cls.redis = FakeStrictRedis.from_url(redis_url)
|
||||
|
||||
@classmethod
|
||||
def add_cdx_to_redis(cls, filename, key):
|
||||
with open(filename, 'rb') as fh:
|
||||
for line in fh:
|
||||
r.zadd(key, 0, line.rstrip())
|
||||
cls.redis.zadd(key, 0, line.rstrip())
|
||||
|
||||
@classmethod
|
||||
def teardown_class(cls):
|
||||
super(FakeRedisTests, cls).teardown_class()
|
||||
FakeStrictRedis().flushall()
|
||||
cls.redis.flushall()
|
||||
cls.redismock.stop()
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user