mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
tests: webagg test tweaks, create TempDirTests for sharing tests that require a temp dir
This commit is contained in:
parent
7b847311d5
commit
c309637a3a
@ -3,7 +3,7 @@ import os
|
|||||||
import shutil
|
import shutil
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from .testutils import to_path
|
from .testutils import to_path, to_json_list, TempDirTests
|
||||||
|
|
||||||
from mock import patch
|
from mock import patch
|
||||||
|
|
||||||
@ -12,202 +12,179 @@ from webagg.indexsource import MementoIndexSource
|
|||||||
|
|
||||||
|
|
||||||
#=============================================================================
|
#=============================================================================
|
||||||
root_dir = None
|
|
||||||
orig_cwd = None
|
|
||||||
dir_loader = None
|
|
||||||
|
|
||||||
linkheader = """\
|
linkheader = """\
|
||||||
<http://example.com/>; rel="original", <http://web.archive.org/web/timemap/link/http://example.com/>; rel="timemap"; type="application/link-format", <http://web.archive.org/web/20020120142510/http://example.com/>; rel="first memento"; datetime="Sun, 20 Jan 2002 14:25:10 GMT", <http://web.archive.org/web/20100501123414/http://example.com/>; rel="prev memento"; datetime="Sat, 01 May 2010 12:34:14 GMT", <http://web.archive.org/web/20100514231857/http://example.com/>; rel="memento"; datetime="Fri, 14 May 2010 23:18:57 GMT", <http://web.archive.org/web/20100519202418/http://example.com/>; rel="next memento"; datetime="Wed, 19 May 2010 20:24:18 GMT", <http://web.archive.org/web/20160307200619/http://example.com/>; rel="last memento"; datetime="Mon, 07 Mar 2016 20:06:19 GMT"\
|
<http://example.com/>; rel="original", <http://web.archive.org/web/timemap/link/http://example.com/>; rel="timemap"; type="application/link-format", <http://web.archive.org/web/20020120142510/http://example.com/>; rel="first memento"; datetime="Sun, 20 Jan 2002 14:25:10 GMT", <http://web.archive.org/web/20100501123414/http://example.com/>; rel="prev memento"; datetime="Sat, 01 May 2010 12:34:14 GMT", <http://web.archive.org/web/20100514231857/http://example.com/>; rel="memento"; datetime="Fri, 14 May 2010 23:18:57 GMT", <http://web.archive.org/web/20100519202418/http://example.com/>; rel="next memento"; datetime="Wed, 19 May 2010 20:24:18 GMT", <http://web.archive.org/web/20160307200619/http://example.com/>; rel="last memento"; datetime="Mon, 07 Mar 2016 20:06:19 GMT"\
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def setup_module():
|
|
||||||
global root_dir
|
|
||||||
root_dir = tempfile.mkdtemp()
|
|
||||||
|
|
||||||
coll_A = to_path(root_dir + '/colls/A/indexes')
|
|
||||||
coll_B = to_path(root_dir + '/colls/B/indexes')
|
|
||||||
coll_C = to_path(root_dir + '/colls/C/indexes')
|
|
||||||
|
|
||||||
os.makedirs(coll_A)
|
|
||||||
os.makedirs(coll_B)
|
|
||||||
os.makedirs(coll_C)
|
|
||||||
|
|
||||||
dir_prefix = to_path(root_dir)
|
|
||||||
dir_path ='colls/{coll}/indexes'
|
|
||||||
|
|
||||||
shutil.copy(to_path('testdata/example.cdxj'), coll_A)
|
|
||||||
shutil.copy(to_path('testdata/iana.cdxj'), coll_B)
|
|
||||||
shutil.copy(to_path('testdata/dupes.cdxj'), coll_C)
|
|
||||||
|
|
||||||
with open(to_path(root_dir) + 'somefile', 'w') as fh:
|
|
||||||
fh.write('foo')
|
|
||||||
|
|
||||||
global dir_loader
|
|
||||||
dir_loader = DirectoryIndexSource(dir_prefix, dir_path)
|
|
||||||
|
|
||||||
#global orig_cwd
|
|
||||||
#orig_cwd = os.getcwd()
|
|
||||||
#os.chdir(root_dir)
|
|
||||||
|
|
||||||
# use actually set dir
|
|
||||||
#root_dir = os.getcwd()
|
|
||||||
|
|
||||||
def teardown_module():
|
|
||||||
#global orig_cwd
|
|
||||||
#os.chdir(orig_cwd)
|
|
||||||
|
|
||||||
global root_dir
|
|
||||||
shutil.rmtree(root_dir)
|
|
||||||
|
|
||||||
|
|
||||||
def to_json_list(cdxlist, fields=['timestamp', 'load_url', 'filename', 'source']):
|
|
||||||
return list([json.loads(cdx.to_json(fields)) for cdx in cdxlist])
|
|
||||||
|
|
||||||
|
|
||||||
def test_agg_no_coll_set():
|
|
||||||
res, errs = dir_loader(dict(url='example.com/'))
|
|
||||||
assert(to_json_list(res) == [])
|
|
||||||
assert(errs == {})
|
|
||||||
|
|
||||||
def test_agg_collA_found():
|
|
||||||
res, errs = dir_loader({'url': 'example.com/', 'param.coll': 'A'})
|
|
||||||
|
|
||||||
exp = [{'source': 'colls/A/indexes/example.cdxj', 'timestamp': '20160225042329', 'filename': 'example.warc.gz'}]
|
|
||||||
|
|
||||||
assert(to_json_list(res) == exp)
|
|
||||||
assert(errs == {})
|
|
||||||
|
|
||||||
def test_agg_collB():
|
|
||||||
res, errs = dir_loader({'url': 'example.com/', 'param.coll': 'B'})
|
|
||||||
|
|
||||||
exp = []
|
|
||||||
|
|
||||||
assert(to_json_list(res) == exp)
|
|
||||||
assert(errs == {})
|
|
||||||
|
|
||||||
def test_agg_collB_found():
|
|
||||||
res, errs = dir_loader({'url': 'iana.org/', 'param.coll': 'B'})
|
|
||||||
|
|
||||||
exp = [{'source': 'colls/B/indexes/iana.cdxj', 'timestamp': '20140126200624', 'filename': 'iana.warc.gz'}]
|
|
||||||
|
|
||||||
assert(to_json_list(res) == exp)
|
|
||||||
assert(errs == {})
|
|
||||||
|
|
||||||
|
|
||||||
def test_extra_agg_collB():
|
|
||||||
agg_source = SimpleAggregator({'dir': dir_loader})
|
|
||||||
res, errs = agg_source({'url': 'iana.org/', 'param.coll': 'B'})
|
|
||||||
|
|
||||||
exp = [{'source': 'dir:colls/B/indexes/iana.cdxj', 'timestamp': '20140126200624', 'filename': 'iana.warc.gz'}]
|
|
||||||
|
|
||||||
assert(to_json_list(res) == exp)
|
|
||||||
assert(errs == {})
|
|
||||||
|
|
||||||
|
|
||||||
def test_agg_all_found_1():
|
|
||||||
res, errs = dir_loader({'url': 'iana.org/', 'param.coll': '*'})
|
|
||||||
|
|
||||||
exp = [
|
|
||||||
{'source': 'colls/B/indexes/iana.cdxj', 'timestamp': '20140126200624', 'filename': 'iana.warc.gz'},
|
|
||||||
{'source': 'colls/C/indexes/dupes.cdxj', 'timestamp': '20140127171238', 'filename': 'dupes.warc.gz'},
|
|
||||||
{'source': 'colls/C/indexes/dupes.cdxj', 'timestamp': '20140127171238', 'filename': 'dupes.warc.gz'},
|
|
||||||
]
|
|
||||||
|
|
||||||
assert(to_json_list(res) == exp)
|
|
||||||
assert(errs == {})
|
|
||||||
|
|
||||||
|
|
||||||
def test_agg_all_found_2():
|
|
||||||
res, errs = dir_loader({'url': 'example.com/', 'param.coll': '*'})
|
|
||||||
|
|
||||||
exp = [
|
|
||||||
{'source': 'colls/C/indexes/dupes.cdxj', 'timestamp': '20140127171200', 'filename': 'dupes.warc.gz'},
|
|
||||||
{'source': 'colls/C/indexes/dupes.cdxj', 'timestamp': '20140127171251', 'filename': 'dupes.warc.gz'},
|
|
||||||
{'source': 'colls/A/indexes/example.cdxj', 'timestamp': '20160225042329', 'filename': 'example.warc.gz'}
|
|
||||||
]
|
|
||||||
|
|
||||||
assert(to_json_list(res) == exp)
|
|
||||||
assert(errs == {})
|
|
||||||
|
|
||||||
|
|
||||||
def mock_link_header(*args, **kwargs):
|
def mock_link_header(*args, **kwargs):
|
||||||
return linkheader
|
return linkheader
|
||||||
|
|
||||||
@patch('webagg.indexsource.MementoIndexSource.get_timegate_links', mock_link_header)
|
|
||||||
def test_agg_dir_and_memento():
|
|
||||||
sources = {'ia': MementoIndexSource.from_timegate_url('http://web.archive.org/web/'),
|
|
||||||
'local': dir_loader}
|
|
||||||
agg_source = SimpleAggregator(sources)
|
|
||||||
|
|
||||||
res, errs = agg_source({'url': 'example.com/', 'param.local.coll': '*', 'closest': '20100512', 'limit': 6})
|
class TestDirAgg(TempDirTests):
|
||||||
|
@classmethod
|
||||||
|
def setup_class(cls):
|
||||||
|
super(TestDirAgg, cls).setup_class()
|
||||||
|
coll_A = to_path(cls.root_dir + '/colls/A/indexes')
|
||||||
|
coll_B = to_path(cls.root_dir + '/colls/B/indexes')
|
||||||
|
coll_C = to_path(cls.root_dir + '/colls/C/indexes')
|
||||||
|
|
||||||
exp = [
|
os.makedirs(coll_A)
|
||||||
{'source': 'ia', 'timestamp': '20100514231857', 'load_url': 'http://web.archive.org/web/20100514231857id_/http://example.com/'},
|
os.makedirs(coll_B)
|
||||||
{'source': 'ia', 'timestamp': '20100519202418', 'load_url': 'http://web.archive.org/web/20100519202418id_/http://example.com/'},
|
os.makedirs(coll_C)
|
||||||
{'source': 'ia', 'timestamp': '20100501123414', 'load_url': 'http://web.archive.org/web/20100501123414id_/http://example.com/'},
|
|
||||||
{'source': 'local:colls/C/indexes/dupes.cdxj', 'timestamp': '20140127171200', 'filename': 'dupes.warc.gz'},
|
|
||||||
{'source': 'local:colls/C/indexes/dupes.cdxj', 'timestamp': '20140127171251', 'filename': 'dupes.warc.gz'},
|
|
||||||
{'source': 'local:colls/A/indexes/example.cdxj', 'timestamp': '20160225042329', 'filename': 'example.warc.gz'}
|
|
||||||
]
|
|
||||||
|
|
||||||
assert(to_json_list(res) == exp)
|
dir_prefix = to_path(cls.root_dir)
|
||||||
assert(errs == {})
|
dir_path ='colls/{coll}/indexes'
|
||||||
|
|
||||||
|
shutil.copy(to_path('testdata/example.cdxj'), coll_A)
|
||||||
|
shutil.copy(to_path('testdata/iana.cdxj'), coll_B)
|
||||||
|
shutil.copy(to_path('testdata/dupes.cdxj'), coll_C)
|
||||||
|
|
||||||
|
with open(to_path(cls.root_dir) + 'somefile', 'w') as fh:
|
||||||
|
fh.write('foo')
|
||||||
|
|
||||||
|
cls.dir_loader = DirectoryIndexSource(dir_prefix, dir_path)
|
||||||
|
|
||||||
|
def test_agg_no_coll_set(self):
|
||||||
|
res, errs = self.dir_loader(dict(url='example.com/'))
|
||||||
|
assert(to_json_list(res) == [])
|
||||||
|
assert(errs == {})
|
||||||
|
|
||||||
|
def test_agg_collA_found(self):
|
||||||
|
res, errs = self.dir_loader({'url': 'example.com/', 'param.coll': 'A'})
|
||||||
|
|
||||||
|
exp = [{'source': 'colls/A/indexes/example.cdxj', 'timestamp': '20160225042329', 'filename': 'example.warc.gz'}]
|
||||||
|
|
||||||
|
assert(to_json_list(res) == exp)
|
||||||
|
assert(errs == {})
|
||||||
|
|
||||||
|
def test_agg_collB(self):
|
||||||
|
res, errs = self.dir_loader({'url': 'example.com/', 'param.coll': 'B'})
|
||||||
|
|
||||||
|
exp = []
|
||||||
|
|
||||||
|
assert(to_json_list(res) == exp)
|
||||||
|
assert(errs == {})
|
||||||
|
|
||||||
|
def test_agg_collB_found(self):
|
||||||
|
res, errs = self.dir_loader({'url': 'iana.org/', 'param.coll': 'B'})
|
||||||
|
|
||||||
|
exp = [{'source': 'colls/B/indexes/iana.cdxj', 'timestamp': '20140126200624', 'filename': 'iana.warc.gz'}]
|
||||||
|
|
||||||
|
assert(to_json_list(res) == exp)
|
||||||
|
assert(errs == {})
|
||||||
|
|
||||||
|
|
||||||
def test_agg_no_dir_1():
|
def test_extra_agg_collB(self):
|
||||||
res, errs = dir_loader({'url': 'example.com/', 'param.coll': 'X'})
|
agg_source = SimpleAggregator({'dir': self.dir_loader})
|
||||||
|
res, errs = agg_source({'url': 'iana.org/', 'param.coll': 'B'})
|
||||||
|
|
||||||
exp = []
|
exp = [{'source': 'dir:colls/B/indexes/iana.cdxj', 'timestamp': '20140126200624', 'filename': 'iana.warc.gz'}]
|
||||||
|
|
||||||
assert(to_json_list(res) == exp)
|
assert(to_json_list(res) == exp)
|
||||||
assert(errs == {})
|
assert(errs == {})
|
||||||
|
|
||||||
|
|
||||||
def test_agg_no_dir_2():
|
def test_agg_all_found_1(self):
|
||||||
loader = DirectoryIndexSource(root_dir, '')
|
res, errs = self.dir_loader({'url': 'iana.org/', 'param.coll': '*'})
|
||||||
res, errs = loader({'url': 'example.com/', 'param.coll': 'X'})
|
|
||||||
|
|
||||||
exp = []
|
exp = [
|
||||||
|
{'source': 'colls/B/indexes/iana.cdxj', 'timestamp': '20140126200624', 'filename': 'iana.warc.gz'},
|
||||||
|
{'source': 'colls/C/indexes/dupes.cdxj', 'timestamp': '20140127171238', 'filename': 'dupes.warc.gz'},
|
||||||
|
{'source': 'colls/C/indexes/dupes.cdxj', 'timestamp': '20140127171238', 'filename': 'dupes.warc.gz'},
|
||||||
|
]
|
||||||
|
|
||||||
assert(to_json_list(res) == exp)
|
assert(to_json_list(res) == exp)
|
||||||
assert(errs == {})
|
assert(errs == {})
|
||||||
|
|
||||||
|
|
||||||
def test_agg_dir_sources_1():
|
def test_agg_all_found_2(self):
|
||||||
res = dir_loader.get_source_list({'url': 'example.com/', 'param.coll': '*'})
|
res, errs = self.dir_loader({'url': 'example.com/', 'param.coll': '*'})
|
||||||
exp = {'sources': {'colls/A/indexes/example.cdxj': 'file',
|
|
||||||
'colls/B/indexes/iana.cdxj': 'file',
|
|
||||||
'colls/C/indexes/dupes.cdxj': 'file'}
|
|
||||||
}
|
|
||||||
|
|
||||||
assert(res == exp)
|
exp = [
|
||||||
|
{'source': 'colls/C/indexes/dupes.cdxj', 'timestamp': '20140127171200', 'filename': 'dupes.warc.gz'},
|
||||||
|
{'source': 'colls/C/indexes/dupes.cdxj', 'timestamp': '20140127171251', 'filename': 'dupes.warc.gz'},
|
||||||
|
{'source': 'colls/A/indexes/example.cdxj', 'timestamp': '20160225042329', 'filename': 'example.warc.gz'}
|
||||||
|
]
|
||||||
|
|
||||||
|
assert(to_json_list(res) == exp)
|
||||||
|
assert(errs == {})
|
||||||
|
|
||||||
|
@patch('webagg.indexsource.MementoIndexSource.get_timegate_links', mock_link_header)
|
||||||
|
def test_agg_dir_and_memento(self):
|
||||||
|
sources = {'ia': MementoIndexSource.from_timegate_url('http://web.archive.org/web/'),
|
||||||
|
'local': self.dir_loader}
|
||||||
|
agg_source = SimpleAggregator(sources)
|
||||||
|
|
||||||
|
res, errs = agg_source({'url': 'example.com/', 'param.local.coll': '*', 'closest': '20100512', 'limit': 6})
|
||||||
|
|
||||||
|
exp = [
|
||||||
|
{'source': 'ia', 'timestamp': '20100514231857', 'load_url': 'http://web.archive.org/web/20100514231857id_/http://example.com/'},
|
||||||
|
{'source': 'ia', 'timestamp': '20100519202418', 'load_url': 'http://web.archive.org/web/20100519202418id_/http://example.com/'},
|
||||||
|
{'source': 'ia', 'timestamp': '20100501123414', 'load_url': 'http://web.archive.org/web/20100501123414id_/http://example.com/'},
|
||||||
|
{'source': 'local:colls/C/indexes/dupes.cdxj', 'timestamp': '20140127171200', 'filename': 'dupes.warc.gz'},
|
||||||
|
{'source': 'local:colls/C/indexes/dupes.cdxj', 'timestamp': '20140127171251', 'filename': 'dupes.warc.gz'},
|
||||||
|
{'source': 'local:colls/A/indexes/example.cdxj', 'timestamp': '20160225042329', 'filename': 'example.warc.gz'}
|
||||||
|
]
|
||||||
|
|
||||||
|
assert(to_json_list(res) == exp)
|
||||||
|
assert(errs == {})
|
||||||
|
|
||||||
|
|
||||||
def test_agg_dir_sources_2():
|
def test_agg_no_dir_1(self):
|
||||||
res = dir_loader.get_source_list({'url': 'example.com/', 'param.coll': '[A,C]'})
|
res, errs = self.dir_loader({'url': 'example.com/', 'param.coll': 'X'})
|
||||||
exp = {'sources': {'colls/A/indexes/example.cdxj': 'file',
|
|
||||||
'colls/C/indexes/dupes.cdxj': 'file'}
|
|
||||||
}
|
|
||||||
|
|
||||||
assert(res == exp)
|
exp = []
|
||||||
|
|
||||||
|
assert(to_json_list(res) == exp)
|
||||||
|
assert(errs == {})
|
||||||
|
|
||||||
|
|
||||||
def test_agg_dir_sources_single_dir():
|
def test_agg_no_dir_2(self):
|
||||||
loader = DirectoryIndexSource(os.path.join(root_dir, 'colls', 'A', 'indexes'), '')
|
loader = DirectoryIndexSource(self.root_dir, '')
|
||||||
res = loader.get_source_list({'url': 'example.com/'})
|
res, errs = loader({'url': 'example.com/', 'param.coll': 'X'})
|
||||||
|
|
||||||
exp = {'sources': {'example.cdxj': 'file'}}
|
exp = []
|
||||||
|
|
||||||
assert(res == exp)
|
assert(to_json_list(res) == exp)
|
||||||
|
assert(errs == {})
|
||||||
|
|
||||||
|
|
||||||
def test_agg_dir_sources_not_found_dir():
|
def test_agg_dir_sources_1(self):
|
||||||
loader = DirectoryIndexSource(os.path.join(root_dir, 'colls', 'Z', 'indexes'), '')
|
res = self.dir_loader.get_source_list({'url': 'example.com/', 'param.coll': '*'})
|
||||||
res = loader.get_source_list({'url': 'example.com/'})
|
exp = {'sources': {'colls/A/indexes/example.cdxj': 'file',
|
||||||
|
'colls/B/indexes/iana.cdxj': 'file',
|
||||||
|
'colls/C/indexes/dupes.cdxj': 'file'}
|
||||||
|
}
|
||||||
|
|
||||||
exp = {'sources': {}}
|
assert(res == exp)
|
||||||
|
|
||||||
assert(res == exp)
|
|
||||||
|
def test_agg_dir_sources_2(self):
|
||||||
|
res = self.dir_loader.get_source_list({'url': 'example.com/', 'param.coll': '[A,C]'})
|
||||||
|
exp = {'sources': {'colls/A/indexes/example.cdxj': 'file',
|
||||||
|
'colls/C/indexes/dupes.cdxj': 'file'}
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(res == exp)
|
||||||
|
|
||||||
|
|
||||||
|
def test_agg_dir_sources_single_dir(self):
|
||||||
|
loader = DirectoryIndexSource(os.path.join(self.root_dir, 'colls', 'A', 'indexes'), '')
|
||||||
|
res = loader.get_source_list({'url': 'example.com/'})
|
||||||
|
|
||||||
|
exp = {'sources': {'example.cdxj': 'file'}}
|
||||||
|
|
||||||
|
assert(res == exp)
|
||||||
|
|
||||||
|
|
||||||
|
def test_agg_dir_sources_not_found_dir(self):
|
||||||
|
loader = DirectoryIndexSource(os.path.join(self.root_dir, 'colls', 'Z', 'indexes'), '')
|
||||||
|
res = loader.get_source_list({'url': 'example.com/'})
|
||||||
|
|
||||||
|
exp = {'sources': {}}
|
||||||
|
|
||||||
|
assert(res == exp)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -4,7 +4,7 @@ from webagg.aggregator import SimpleAggregator, GeventTimeoutAggregator
|
|||||||
from webagg.aggregator import BaseAggregator
|
from webagg.aggregator import BaseAggregator
|
||||||
|
|
||||||
from webagg.indexsource import FileIndexSource, RemoteIndexSource, MementoIndexSource
|
from webagg.indexsource import FileIndexSource, RemoteIndexSource, MementoIndexSource
|
||||||
from .testutils import json_list, to_path
|
from .testutils import to_json_list, to_path
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import pytest
|
import pytest
|
||||||
@ -48,7 +48,7 @@ def test_mem_agg_index_1(agg):
|
|||||||
{"timestamp": "20140107040552", "load_url": "http://wayback.archive-it.org/all/20140107040552id_/http://iana.org/", "source": "ait"}
|
{"timestamp": "20140107040552", "load_url": "http://wayback.archive-it.org/all/20140107040552id_/http://iana.org/", "source": "ait"}
|
||||||
]
|
]
|
||||||
|
|
||||||
assert(json_list(res) == exp)
|
assert(to_json_list(res) == exp)
|
||||||
assert(errs == {'bl': "NotFoundException('http://www.webarchive.org.uk/wayback/archive/http://iana.org/',)",
|
assert(errs == {'bl': "NotFoundException('http://www.webarchive.org.uk/wayback/archive/http://iana.org/',)",
|
||||||
'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"})
|
'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"})
|
||||||
|
|
||||||
@ -65,7 +65,7 @@ def test_mem_agg_index_2(agg):
|
|||||||
{"timestamp": "20100514231857", "load_url": "http://wayback.archive-it.org/all/20100514231857id_/http://example.com/", "source": "ait"},
|
{"timestamp": "20100514231857", "load_url": "http://wayback.archive-it.org/all/20100514231857id_/http://example.com/", "source": "ait"},
|
||||||
{"timestamp": "20100519202418", "load_url": "http://web.archive.org/web/20100519202418id_/http://example.com/", "source": "ia"}]
|
{"timestamp": "20100519202418", "load_url": "http://web.archive.org/web/20100519202418id_/http://example.com/", "source": "ia"}]
|
||||||
|
|
||||||
assert(json_list(res) == exp)
|
assert(to_json_list(res) == exp)
|
||||||
assert(errs == {'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://example.com/',)"})
|
assert(errs == {'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://example.com/',)"})
|
||||||
|
|
||||||
|
|
||||||
@ -80,7 +80,7 @@ def test_mem_agg_index_3(agg):
|
|||||||
{"timestamp": "20140806161228", "load_url": "http://web.archive.org/web/20140806161228id_/http://vvork.com/", "source": "ia"},
|
{"timestamp": "20140806161228", "load_url": "http://web.archive.org/web/20140806161228id_/http://vvork.com/", "source": "ia"},
|
||||||
{"timestamp": "20131004231540", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"}]
|
{"timestamp": "20131004231540", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"}]
|
||||||
|
|
||||||
assert(json_list(res) == exp)
|
assert(to_json_list(res) == exp)
|
||||||
assert(errs == {})
|
assert(errs == {})
|
||||||
|
|
||||||
|
|
||||||
@ -92,7 +92,7 @@ def test_mem_agg_index_4(agg):
|
|||||||
exp = [{"timestamp": "20141006184357", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"},
|
exp = [{"timestamp": "20141006184357", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"},
|
||||||
{"timestamp": "20131004231540", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"}]
|
{"timestamp": "20131004231540", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"}]
|
||||||
|
|
||||||
assert(json_list(res) == exp)
|
assert(to_json_list(res) == exp)
|
||||||
assert(errs == {})
|
assert(errs == {})
|
||||||
|
|
||||||
|
|
||||||
@ -101,7 +101,7 @@ def test_mem_agg_not_found(agg):
|
|||||||
url = 'http://vvork.com/'
|
url = 'http://vvork.com/'
|
||||||
res, errs = agg(dict(url=url, closest='20141001', limit=2))
|
res, errs = agg(dict(url=url, closest='20141001', limit=2))
|
||||||
|
|
||||||
assert(json_list(res) == [])
|
assert(to_json_list(res) == [])
|
||||||
assert(errs == {'notfound': "NotFoundException('testdata/not-found-x',)"})
|
assert(errs == {'notfound': "NotFoundException('testdata/not-found-x',)"})
|
||||||
|
|
||||||
|
|
||||||
@ -118,7 +118,7 @@ def test_mem_agg_timeout(agg):
|
|||||||
res, errs = agg(dict(url=url, closest='20141001', limit=2))
|
res, errs = agg(dict(url=url, closest='20141001', limit=2))
|
||||||
BaseAggregator.load_child_source = orig_source
|
BaseAggregator.load_child_source = orig_source
|
||||||
|
|
||||||
assert(json_list(res) == [])
|
assert(to_json_list(res) == [])
|
||||||
assert(errs == {'local': 'timeout',
|
assert(errs == {'local': 'timeout',
|
||||||
'ait': 'timeout', 'bl': 'timeout', 'ia': 'timeout', 'rhiz': 'timeout'})
|
'ait': 'timeout', 'bl': 'timeout', 'ia': 'timeout', 'rhiz': 'timeout'})
|
||||||
|
|
||||||
|
@ -5,7 +5,7 @@ from webagg.indexsource import FileIndexSource
|
|||||||
from webagg.aggregator import SimpleAggregator, TimeoutMixin
|
from webagg.aggregator import SimpleAggregator, TimeoutMixin
|
||||||
from webagg.aggregator import GeventTimeoutAggregator, GeventTimeoutAggregator
|
from webagg.aggregator import GeventTimeoutAggregator, GeventTimeoutAggregator
|
||||||
|
|
||||||
from .testutils import json_list
|
from .testutils import to_json_list
|
||||||
|
|
||||||
|
|
||||||
class TimeoutFileSource(FileIndexSource):
|
class TimeoutFileSource(FileIndexSource):
|
||||||
@ -41,7 +41,7 @@ def test_timeout_long_all_pass():
|
|||||||
{'source': 'slower', 'timestamp': '20140127171251'},
|
{'source': 'slower', 'timestamp': '20140127171251'},
|
||||||
{'source': 'slow', 'timestamp': '20160225042329'}]
|
{'source': 'slow', 'timestamp': '20160225042329'}]
|
||||||
|
|
||||||
assert(json_list(res, fields=['source', 'timestamp']) == exp)
|
assert(to_json_list(res, fields=['source', 'timestamp']) == exp)
|
||||||
|
|
||||||
assert(errs == {})
|
assert(errs == {})
|
||||||
|
|
||||||
@ -53,7 +53,7 @@ def test_timeout_slower_skipped_1():
|
|||||||
|
|
||||||
exp = [{'source': 'slow', 'timestamp': '20160225042329'}]
|
exp = [{'source': 'slow', 'timestamp': '20160225042329'}]
|
||||||
|
|
||||||
assert(json_list(res, fields=['source', 'timestamp']) == exp)
|
assert(to_json_list(res, fields=['source', 'timestamp']) == exp)
|
||||||
|
|
||||||
assert(errs == {'slower': 'timeout'})
|
assert(errs == {'slower': 'timeout'})
|
||||||
|
|
||||||
@ -65,7 +65,7 @@ def test_timeout_slower_skipped_2():
|
|||||||
|
|
||||||
exp = []
|
exp = []
|
||||||
|
|
||||||
assert(json_list(res, fields=['source', 'timestamp']) == exp)
|
assert(to_json_list(res, fields=['source', 'timestamp']) == exp)
|
||||||
|
|
||||||
assert(errs == {'slower': 'timeout', 'slow': 'timeout'})
|
assert(errs == {'slower': 'timeout', 'slow': 'timeout'})
|
||||||
|
|
||||||
@ -80,28 +80,28 @@ def test_timeout_skipping():
|
|||||||
exp = [{'source': 'slow', 'timestamp': '20160225042329'}]
|
exp = [{'source': 'slow', 'timestamp': '20160225042329'}]
|
||||||
|
|
||||||
res, errs = agg(dict(url='http://example.com/'))
|
res, errs = agg(dict(url='http://example.com/'))
|
||||||
assert(json_list(res, fields=['source', 'timestamp']) == exp)
|
assert(to_json_list(res, fields=['source', 'timestamp']) == exp)
|
||||||
assert(sources['slow'].calls == 4)
|
assert(sources['slow'].calls == 4)
|
||||||
assert(sources['slower'].calls == 4)
|
assert(sources['slower'].calls == 4)
|
||||||
|
|
||||||
assert(errs == {'slower': 'timeout'})
|
assert(errs == {'slower': 'timeout'})
|
||||||
|
|
||||||
res, errs = agg(dict(url='http://example.com/'))
|
res, errs = agg(dict(url='http://example.com/'))
|
||||||
assert(json_list(res, fields=['source', 'timestamp']) == exp)
|
assert(to_json_list(res, fields=['source', 'timestamp']) == exp)
|
||||||
assert(sources['slow'].calls == 5)
|
assert(sources['slow'].calls == 5)
|
||||||
assert(sources['slower'].calls == 5)
|
assert(sources['slower'].calls == 5)
|
||||||
|
|
||||||
assert(errs == {'slower': 'timeout'})
|
assert(errs == {'slower': 'timeout'})
|
||||||
|
|
||||||
res, errs = agg(dict(url='http://example.com/'))
|
res, errs = agg(dict(url='http://example.com/'))
|
||||||
assert(json_list(res, fields=['source', 'timestamp']) == exp)
|
assert(to_json_list(res, fields=['source', 'timestamp']) == exp)
|
||||||
assert(sources['slow'].calls == 6)
|
assert(sources['slow'].calls == 6)
|
||||||
assert(sources['slower'].calls == 5)
|
assert(sources['slower'].calls == 5)
|
||||||
|
|
||||||
assert(errs == {})
|
assert(errs == {})
|
||||||
|
|
||||||
res, errs = agg(dict(url='http://example.com/'))
|
res, errs = agg(dict(url='http://example.com/'))
|
||||||
assert(json_list(res, fields=['source', 'timestamp']) == exp)
|
assert(to_json_list(res, fields=['source', 'timestamp']) == exp)
|
||||||
assert(sources['slow'].calls == 7)
|
assert(sources['slow'].calls == 7)
|
||||||
assert(sources['slower'].calls == 5)
|
assert(sources['slower'].calls == 5)
|
||||||
|
|
||||||
@ -110,7 +110,7 @@ def test_timeout_skipping():
|
|||||||
time.sleep(2.01)
|
time.sleep(2.01)
|
||||||
|
|
||||||
res, errs = agg(dict(url='http://example.com/'))
|
res, errs = agg(dict(url='http://example.com/'))
|
||||||
assert(json_list(res, fields=['source', 'timestamp']) == exp)
|
assert(to_json_list(res, fields=['source', 'timestamp']) == exp)
|
||||||
assert(sources['slow'].calls == 8)
|
assert(sources['slow'].calls == 8)
|
||||||
assert(sources['slower'].calls == 6)
|
assert(sources['slower'].calls == 6)
|
||||||
|
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
import tempfile
|
||||||
|
import shutil
|
||||||
|
|
||||||
def json_list(cdxlist, fields=['timestamp', 'load_url', 'filename', 'source']):
|
def to_json_list(cdxlist, fields=['timestamp', 'load_url', 'filename', 'source']):
|
||||||
return list([json.loads(cdx.to_json(fields)) for cdx in cdxlist])
|
return list([json.loads(cdx.to_json(fields)) for cdx in cdxlist])
|
||||||
|
|
||||||
def key_ts_res(cdxlist, extra='filename'):
|
def key_ts_res(cdxlist, extra='filename'):
|
||||||
@ -14,3 +16,12 @@ def to_path(path):
|
|||||||
return path
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
class TempDirTests(object):
|
||||||
|
@classmethod
|
||||||
|
def setup_class(cls):
|
||||||
|
cls.root_dir = tempfile.mkdtemp()
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def teardown_class(cls):
|
||||||
|
shutil.rmtree(cls.root_dir)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user