1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

tests: add MementoOverrideTests as a reusable class, convert memento_agg tests to use class,

handlers: add saved link header data for memento tests for handlers
This commit is contained in:
Ilya Kreymer 2016-11-15 14:24:34 -08:00
parent c7fa8b711c
commit d24868db7a
4 changed files with 234 additions and 180 deletions

View File

@ -18,8 +18,9 @@ from six.moves.urllib.parse import urlencode
import webtest
from fakeredis import FakeStrictRedis
from mock import patch
from .testutils import to_path, FakeRedisTests, BaseTestClass, TEST_CDX_PATH, TEST_WARC_PATH
from .testutils import to_path, MementoOverrideTests, FakeRedisTests, BaseTestClass, TEST_CDX_PATH, TEST_WARC_PATH
import json
@ -31,7 +32,7 @@ sources = {
}
class TestResAgg(FakeRedisTests, BaseTestClass):
class TestResAgg(MementoOverrideTests, FakeRedisTests, BaseTestClass):
def setup_class(cls):
super(TestResAgg, cls).setup_class()
@ -162,6 +163,7 @@ class TestResAgg(FakeRedisTests, BaseTestClass):
assert 'ResErrors' not in resp.headers
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_mem_1'))
def test_agg_select_mem_1(self):
resp = self.testapp.get('/many/resource?url=http://vvork.com/&closest=20141001')
@ -176,6 +178,7 @@ class TestResAgg(FakeRedisTests, BaseTestClass):
assert 'ResErrors' not in resp.headers
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_mem_2'))
def test_agg_select_mem_2(self):
resp = self.testapp.get('/many/resource?url=http://vvork.com/&closest=20151231')
@ -190,6 +193,7 @@ class TestResAgg(FakeRedisTests, BaseTestClass):
assert 'ResErrors' not in resp.headers
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_live'))
def test_agg_select_live(self):
resp = self.testapp.get('/many/resource?url=http://vvork.com/&closest=2016')
@ -202,6 +206,7 @@ class TestResAgg(FakeRedisTests, BaseTestClass):
assert 'ResErrors' not in resp.headers
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_local'))
def test_agg_select_local(self):
resp = self.testapp.get('/many/resource?url=http://iana.org/&closest=20140126200624')
@ -214,6 +219,7 @@ class TestResAgg(FakeRedisTests, BaseTestClass):
assert json.loads(resp.headers['ResErrors']) == {"rhiz": "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"}
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_local_postreq'))
def test_agg_select_local_postreq(self):
req_data = """\
GET / HTTP/1.1
@ -233,6 +239,7 @@ Host: iana.org
assert json.loads(resp.headers['ResErrors']) == {"rhiz": "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"}
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_live_postreq'))
def test_agg_live_postreq(self):
req_data = """\
GET /get?foo=bar HTTP/1.1
@ -416,6 +423,7 @@ host: www.youtube.com\
assert resp.text == resp.headers['ResErrors']
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_local_revisit'))
def test_agg_local_revisit(self):
resp = self.testapp.get('/many/resource?url=http://www.example.com/&closest=20140127171251&sources=local')
@ -442,6 +450,7 @@ host: www.youtube.com\
assert resp.json == {'message': 'output=foobar not supported'}
assert resp.text == resp.headers['ResErrors']
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_not_found'))
def test_error_local_not_found(self):
resp = self.testapp.get('/many/resource?url=http://not-found.error/&sources=local', status=404)

View File

@ -4,21 +4,17 @@ from pywb.webagg.aggregator import SimpleAggregator, GeventTimeoutAggregator
from pywb.webagg.aggregator import BaseAggregator
from pywb.webagg.indexsource import FileIndexSource, RemoteIndexSource, MementoIndexSource
from .testutils import to_json_list, to_path, TEST_CDX_PATH
from .testutils import to_json_list, to_path, TEST_CDX_PATH, MementoOverrideTests, BaseTestClass
import json
import pytest
import time
import six
import yaml
from mock import patch
from pywb.webagg.handlers import IndexHandler
from pywb import get_test_dir
from pywb.utils.wbexception import NotFoundException
# Aggregator Mappings
sources = {
@ -40,239 +36,213 @@ agg_nf = {'simple': SimpleAggregator(nf),
'gevent': GeventTimeoutAggregator(nf, timeout=5.0),
}
# Load expected link headers
link_header_data = None
def setup_module():
global link_header_data
with open(to_path(get_test_dir() + '/text_content/link_headers.yaml')) as fh:
link_header_data = yaml.load(fh)
# ============================================================================
class TestMemAgg(MementoOverrideTests, BaseTestClass):
@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_1'))
def test_mem_agg_index_1(self, agg):
url = 'http://iana.org/'
res, errs = agg(dict(url=url, closest='20140126000000', limit=5))
exp = [{"timestamp": "20140126093743", "load_url": "http://web.archive.org/web/20140126093743id_/http://iana.org/", "source": "ia"},
{"timestamp": "20140126200624", "filename": "iana.warc.gz", "source": "local"},
{"timestamp": "20140123034755", "load_url": "http://web.archive.org/web/20140123034755id_/http://iana.org/", "source": "ia"},
{"timestamp": "20140129175203", "load_url": "http://web.archive.org/web/20140129175203id_/http://iana.org/", "source": "ia"},
{"timestamp": "20140107040552", "load_url": "http://wayback.archive-it.org/all/20140107040552id_/http://iana.org/", "source": "ait"}
]
assert(to_json_list(res) == exp)
assert(errs == {'bl': "NotFoundException('http://www.webarchive.org.uk/wayback/archive/http://iana.org/',)",
'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"})
orig_get_timegate_links = MementoIndexSource.get_timegate_links
@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_2'))
def test_mem_agg_index_2(self, agg):
url = 'http://example.com/'
res, errs = agg(dict(url=url, closest='20100512', limit=6))
def mock_link_header(test_name, load=False):
def mock_func(self, params, closest):
if load:
res = orig_get_timegate_links(self, params, closest)
print("'{0}': '{1}'".format(self.timegate_url, res))
return res
exp = [{"timestamp": "20100513010014", "load_url": "http://www.webarchive.org.uk/wayback/archive/20100513010014id_/http://example.com/", "source": "bl"},
{"timestamp": "20100512204410", "load_url": "http://www.webarchive.org.uk/wayback/archive/20100512204410id_/http://example.com/", "source": "bl"},
{"timestamp": "20100513224108", "load_url": "http://web.archive.org/web/20100513224108id_/http://example.com/", "source": "ia"},
{"timestamp": "20100511201151", 'load_url': "http://wayback.archive-it.org/all/20100511201151id_/http://example.com/", "source": "ait"},
{"timestamp": "20100514231857", "load_url": "http://wayback.archive-it.org/all/20100514231857id_/http://example.com/", "source": "ait"},
{"timestamp": "20100514231857", "load_url": "http://web.archive.org/web/20100514231857id_/http://example.com/", "source": "ia"},
]
try:
res = link_header_data[test_name][self.timegate_url]
time.sleep(0.2)
except:
msg = self.timegate_url.format(url=params['url'])
raise NotFoundException(msg)
return res
return mock_func
assert(to_json_list(res) == exp)
assert(errs == {'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://example.com/',)"})
@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', mock_link_header('agg_test_1'))
def test_mem_agg_index_1(agg):
url = 'http://iana.org/'
res, errs = agg(dict(url=url, closest='20140126000000', limit=5))
@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_3'))
def test_mem_agg_index_3(self, agg):
url = 'http://vvork.com/'
res, errs = agg(dict(url=url, closest='20141001', limit=5))
exp = [{"timestamp": "20140126093743", "load_url": "http://web.archive.org/web/20140126093743id_/http://iana.org/", "source": "ia"},
{"timestamp": "20140126200624", "filename": "iana.warc.gz", "source": "local"},
{"timestamp": "20140123034755", "load_url": "http://web.archive.org/web/20140123034755id_/http://iana.org/", "source": "ia"},
{"timestamp": "20140129175203", "load_url": "http://web.archive.org/web/20140129175203id_/http://iana.org/", "source": "ia"},
{"timestamp": "20140107040552", "load_url": "http://wayback.archive-it.org/all/20140107040552id_/http://iana.org/", "source": "ait"}
]
exp = [{"timestamp": "20141006184357", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"},
{"timestamp": "20141018133107", "load_url": "http://web.archive.org/web/20141018133107id_/http://vvork.com/", "source": "ia"},
{"timestamp": "20141020161243", "load_url": "http://web.archive.org/web/20141020161243id_/http://vvork.com/", "source": "ia"},
{"timestamp": "20140806161228", "load_url": "http://web.archive.org/web/20140806161228id_/http://vvork.com/", "source": "ia"},
{"timestamp": "20131004231540", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"}]
assert(to_json_list(res) == exp)
assert(errs == {'bl': "NotFoundException('http://www.webarchive.org.uk/wayback/archive/http://iana.org/',)",
'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"})
@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', mock_link_header('agg_test_2'))
def test_mem_agg_index_2(agg):
url = 'http://example.com/'
res, errs = agg(dict(url=url, closest='20100512', limit=6))
exp = [{"timestamp": "20100513010014", "load_url": "http://www.webarchive.org.uk/wayback/archive/20100513010014id_/http://example.com/", "source": "bl"},
{"timestamp": "20100512204410", "load_url": "http://www.webarchive.org.uk/wayback/archive/20100512204410id_/http://example.com/", "source": "bl"},
{"timestamp": "20100513224108", "load_url": "http://web.archive.org/web/20100513224108id_/http://example.com/", "source": "ia"},
{"timestamp": "20100511201151", 'load_url': "http://wayback.archive-it.org/all/20100511201151id_/http://example.com/", "source": "ait"},
{"timestamp": "20100514231857", "load_url": "http://wayback.archive-it.org/all/20100514231857id_/http://example.com/", "source": "ait"},
{"timestamp": "20100514231857", "load_url": "http://web.archive.org/web/20100514231857id_/http://example.com/", "source": "ia"},
]
assert(to_json_list(res) == exp)
assert(errs == {'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://example.com/',)"})
assert(to_json_list(res) == exp)
assert(errs == {})
@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', mock_link_header('agg_test_3'))
def test_mem_agg_index_3(agg):
url = 'http://vvork.com/'
res, errs = agg(dict(url=url, closest='20141001', limit=5))
@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_4'))
def test_mem_agg_index_4(self, agg):
url = 'http://vvork.com/'
res, errs = agg(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait'))
exp = [{"timestamp": "20141006184357", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"},
{"timestamp": "20141018133107", "load_url": "http://web.archive.org/web/20141018133107id_/http://vvork.com/", "source": "ia"},
{"timestamp": "20141020161243", "load_url": "http://web.archive.org/web/20141020161243id_/http://vvork.com/", "source": "ia"},
{"timestamp": "20140806161228", "load_url": "http://web.archive.org/web/20140806161228id_/http://vvork.com/", "source": "ia"},
{"timestamp": "20131004231540", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"}]
exp = [{"timestamp": "20141006184357", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"},
{"timestamp": "20131004231540", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"}]
assert(to_json_list(res) == exp)
assert(errs == {})
assert(to_json_list(res) == exp)
assert(errs == {})
@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', mock_link_header('agg_test_4'))
def test_mem_agg_index_4(agg):
url = 'http://vvork.com/'
res, errs = agg(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait'))
@pytest.mark.parametrize("agg", list(agg_nf.values()), ids=list(agg_nf.keys()))
def test_mem_agg_not_found(self, agg):
url = 'http://vvork.com/'
res, errs = agg(dict(url=url, closest='20141001', limit=2))
exp = [{"timestamp": "20141006184357", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"},
{"timestamp": "20131004231540", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"}]
assert(to_json_list(res) == exp)
assert(errs == {})
assert(to_json_list(res) == [])
assert(errs == {'notfound': "NotFoundException('testdata/not-found-x',)"})
@pytest.mark.parametrize("agg", list(agg_nf.values()), ids=list(agg_nf.keys()))
def test_mem_agg_not_found(agg):
url = 'http://vvork.com/'
res, errs = agg(dict(url=url, closest='20141001', limit=2))
@pytest.mark.parametrize("agg", list(agg_tm.values()), ids=list(agg_tm.keys()))
def test_mem_agg_timeout(self, agg):
url = 'http://vvork.com/'
assert(to_json_list(res) == [])
assert(errs == {'notfound': "NotFoundException('testdata/not-found-x',)"})
orig_source = BaseAggregator.load_child_source
def load_child_source(self, name, source, params):
time.sleep(0.1)
return orig_source(self, name, source, params)
BaseAggregator.load_child_source = load_child_source
res, errs = agg(dict(url=url, closest='20141001', limit=2))
BaseAggregator.load_child_source = orig_source
assert(to_json_list(res) == [])
assert(errs == {'local': 'timeout',
'ait': 'timeout', 'bl': 'timeout', 'ia': 'timeout', 'rhiz': 'timeout'})
@pytest.mark.parametrize("agg", list(agg_tm.values()), ids=list(agg_tm.keys()))
def test_mem_agg_timeout(agg):
url = 'http://vvork.com/'
def test_handler_output_cdxj(self):
agg = GeventTimeoutAggregator(sources, timeout=5.0)
handler = IndexHandler(agg)
url = 'http://vvork.com/'
headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait'))
orig_source = BaseAggregator.load_child_source
def load_child_source(self, name, source, params):
time.sleep(0.1)
return orig_source(name, source, params)
BaseAggregator.load_child_source = load_child_source
res, errs = agg(dict(url=url, closest='20141001', limit=2))
BaseAggregator.load_child_source = orig_source
assert(to_json_list(res) == [])
assert(errs == {'local': 'timeout',
'ait': 'timeout', 'bl': 'timeout', 'ia': 'timeout', 'rhiz': 'timeout'})
def test_handler_output_cdxj():
agg = GeventTimeoutAggregator(sources, timeout=5.0)
handler = IndexHandler(agg)
url = 'http://vvork.com/'
headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait'))
exp = b"""\
exp = b"""\
com,vvork)/ 20141006184357 {"url": "http://www.vvork.com/", "mem_rel": "memento", "memento_url": "http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"}
com,vvork)/ 20131004231540 {"url": "http://vvork.com/", "mem_rel": "last memento", "memento_url": "http://wayback.archive-it.org/all/20131004231540/http://vvork.com/", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"}
"""
assert(headers['Content-Type'] == 'text/x-cdxj')
assert(b''.join(res) == exp)
assert(errs == {})
assert(headers['Content-Type'] == 'text/x-cdxj')
assert(b''.join(res) == exp)
assert(errs == {})
def test_handler_output_json():
agg = GeventTimeoutAggregator(sources, timeout=5.0)
handler = IndexHandler(agg)
url = 'http://vvork.com/'
headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='json'))
def test_handler_output_json(self):
agg = GeventTimeoutAggregator(sources, timeout=5.0)
handler = IndexHandler(agg)
url = 'http://vvork.com/'
headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='json'))
exp = b"""\
exp = b"""\
{"urlkey": "com,vvork)/", "timestamp": "20141006184357", "url": "http://www.vvork.com/", "mem_rel": "memento", "memento_url": "http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"}
{"urlkey": "com,vvork)/", "timestamp": "20131004231540", "url": "http://vvork.com/", "mem_rel": "last memento", "memento_url": "http://wayback.archive-it.org/all/20131004231540/http://vvork.com/", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"}
"""
assert(headers['Content-Type'] == 'application/x-ndjson')
assert(b''.join(res) == exp)
assert(errs == {})
assert(headers['Content-Type'] == 'application/x-ndjson')
assert(b''.join(res) == exp)
assert(errs == {})
def test_handler_output_link():
agg = GeventTimeoutAggregator(sources, timeout=5.0)
handler = IndexHandler(agg)
url = 'http://vvork.com/'
headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='link'))
def test_handler_output_link(self):
agg = GeventTimeoutAggregator(sources, timeout=5.0)
handler = IndexHandler(agg)
url = 'http://vvork.com/'
headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='link'))
exp = b"""\
exp = b"""\
<http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/>; rel="memento"; datetime="Mon, 06 Oct 2014 18:43:57 GMT"; src="rhiz",
<http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/>; rel="memento"; datetime="Fri, 04 Oct 2013 23:15:40 GMT"; src="ait"
"""
assert(headers['Content-Type'] == 'application/link')
assert(b''.join(res) == exp)
assert(errs == {})
assert(headers['Content-Type'] == 'application/link')
assert(b''.join(res) == exp)
assert(errs == {})
def test_handler_output_link_2():
agg = GeventTimeoutAggregator(sources, timeout=5.0)
handler = IndexHandler(agg)
url = 'http://iana.org/'
headers, res, errs = handler(dict(url=url, closest='20140126000000', limit=5, output='link'))
def test_handler_output_link_2(self):
agg = GeventTimeoutAggregator(sources, timeout=5.0)
handler = IndexHandler(agg)
url = 'http://iana.org/'
headers, res, errs = handler(dict(url=url, closest='20140126000000', limit=5, output='link'))
exp = b"""\
exp = b"""\
<http://web.archive.org/web/20140126093743id_/http://iana.org/>; rel="memento"; datetime="Sun, 26 Jan 2014 09:37:43 GMT"; src="ia",
<file://iana.warc.gz:334:2258>; rel="memento"; datetime="Sun, 26 Jan 2014 20:06:24 GMT"; src="local",
<http://web.archive.org/web/20140123034755id_/http://iana.org/>; rel="memento"; datetime="Thu, 23 Jan 2014 03:47:55 GMT"; src="ia",
<http://web.archive.org/web/20140129175203id_/http://iana.org/>; rel="memento"; datetime="Wed, 29 Jan 2014 17:52:03 GMT"; src="ia",
<http://wayback.archive-it.org/all/20140107040552id_/http://iana.org/>; rel="memento"; datetime="Tue, 07 Jan 2014 04:05:52 GMT"; src="ait"
"""
assert(headers['Content-Type'] == 'application/link')
assert(b''.join(res) == exp)
assert(headers['Content-Type'] == 'application/link')
assert(b''.join(res) == exp)
exp_errs = {'bl': "NotFoundException('http://www.webarchive.org.uk/wayback/archive/http://iana.org/',)",
'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"}
exp_errs = {'bl': "NotFoundException('http://www.webarchive.org.uk/wayback/archive/http://iana.org/',)",
'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"}
assert(errs == exp_errs)
assert(errs == exp_errs)
def test_handler_output_link_3(self):
agg = GeventTimeoutAggregator(sources, timeout=5.0)
handler = IndexHandler(agg)
url = 'http://foo.bar.non-existent'
headers, res, errs = handler(dict(url=url, closest='20140126000000', limit=5, output='link'))
def test_handler_output_link_3():
agg = GeventTimeoutAggregator(sources, timeout=5.0)
handler = IndexHandler(agg)
url = 'http://foo.bar.non-existent'
headers, res, errs = handler(dict(url=url, closest='20140126000000', limit=5, output='link'))
exp = b''
exp = b''
assert(headers['Content-Type'] == 'application/link')
assert(b''.join(res) == exp)
assert(headers['Content-Type'] == 'application/link')
assert(b''.join(res) == exp)
exp_errs = {'ait': "NotFoundException('http://wayback.archive-it.org/all/http://foo.bar.non-existent',)",
'bl': "NotFoundException('http://www.webarchive.org.uk/wayback/archive/http://foo.bar.non-existent',)",
'ia': "NotFoundException('http://web.archive.org/web/http://foo.bar.non-existent',)",
'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://foo.bar.non-existent',)"}
exp_errs = {'ait': "NotFoundException('http://wayback.archive-it.org/all/http://foo.bar.non-existent',)",
'bl': "NotFoundException('http://www.webarchive.org.uk/wayback/archive/http://foo.bar.non-existent',)",
'ia': "NotFoundException('http://web.archive.org/web/http://foo.bar.non-existent',)",
'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://foo.bar.non-existent',)"}
assert(errs == exp_errs)
assert(errs == exp_errs)
def test_handler_output_text(self):
agg = GeventTimeoutAggregator(sources, timeout=5.0)
handler = IndexHandler(agg)
url = 'http://vvork.com/'
headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='text'))
def test_handler_output_text():
agg = GeventTimeoutAggregator(sources, timeout=5.0)
handler = IndexHandler(agg)
url = 'http://vvork.com/'
headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='text'))
exp = b"""\
exp = b"""\
com,vvork)/ 20141006184357 http://www.vvork.com/ memento http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/ http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/ rhiz
com,vvork)/ 20131004231540 http://vvork.com/ last memento http://wayback.archive-it.org/all/20131004231540/http://vvork.com/ http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/ ait
"""
assert(headers['Content-Type'] == 'text/plain')
assert(b''.join(res) == exp)
assert(errs == {})
assert(headers['Content-Type'] == 'text/plain')
assert(b''.join(res) == exp)
assert(errs == {})
def test_handler_list_sources():
agg = GeventTimeoutAggregator(sources, timeout=5.0)
handler = IndexHandler(agg)
headers, res, errs = handler(dict(mode='list_sources'))
def test_handler_list_sources(self):
agg = GeventTimeoutAggregator(sources, timeout=5.0)
handler = IndexHandler(agg)
headers, res, errs = handler(dict(mode='list_sources'))
assert(headers == {})
assert(res == {'sources': {'bl': 'memento',
'ait': 'memento',
'ia': 'memento',
'rhiz': 'memento',
'local': 'file'}})
assert(errs == {})
assert(headers == {})
assert(res == {'sources': {'bl': 'memento',
'ait': 'memento',
'ia': 'memento',
'rhiz': 'memento',
'local': 'file'}})
assert(errs == {})

View File

@ -2,6 +2,8 @@ import json
import os
import tempfile
import shutil
import yaml
import time
from multiprocessing import Process
@ -13,9 +15,11 @@ from wsgiref.simple_server import make_server
from pywb.webagg.aggregator import SimpleAggregator
from pywb.webagg.app import ResAggApp
from pywb.webagg.handlers import DefaultResourceHandler
from pywb.webagg.indexsource import LiveIndexSource
from pywb.webagg.indexsource import LiveIndexSource, MementoIndexSource
from pywb import get_test_dir
from pywb.utils.wbexception import NotFoundException
# ============================================================================
def to_json_list(cdxlist, fields=['timestamp', 'load_url', 'filename', 'source']):
@ -91,6 +95,44 @@ class TempDirTests(object):
shutil.rmtree(cls.root_dir)
# ============================================================================
class MementoOverrideTests(object):
link_header_data = None
orig_get_timegate_links = None
@classmethod
def setup_class(cls):
super(MementoOverrideTests, cls).setup_class()
# Load expected link headers
MementoOverrideTests.link_header_data = None
with open(to_path(get_test_dir() + '/text_content/link_headers.yaml')) as fh:
MementoOverrideTests.link_header_data = yaml.load(fh)
MementoOverrideTests.orig_get_timegate_links = MementoIndexSource.get_timegate_links
@classmethod
def mock_link_header(cls, test_name, load=False):
def mock_func(self, params, closest):
if load:
res = cls.orig_get_timegate_links(self, params, closest)
print(test_name + ': ')
print(" '{0}': '{1}'".format(self.timegate_url, res))
return res
try:
res = cls.link_header_data[test_name][self.timegate_url]
time.sleep(0.2)
except Exception as e:
print(e)
msg = self.timegate_url.format(url=params['url'])
raise NotFoundException(msg)
return res
return mock_func
# ============================================================================
class LiveServerTests(object):
@classmethod

View File

@ -28,4 +28,37 @@ agg_test_4:
'http://webenact.rhizome.org/vvork/{url}': '<http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/>; rel="memento"; datetime="Mon, 06 Oct 2014 18:43:57 GMT", <http://www.vvork.com/>; rel="original", <http://webenact.rhizome.org/vvork/timemap/*/http://www.vvork.com/>; rel="timemap"; type="application/link-format"'
select_mem_1:
'http://web.archive.org/web/{url}': '<http://vvork.com/>; rel="original", <http://web.archive.org/web/timemap/link/http://vvork.com/>; rel="timemap"; type="application/link-format", <http://web.archive.org/web/20020727091331/http://vvork.com/>; rel="first memento"; datetime="Sat, 27 Jul 2002 09:13:31 GMT", <http://web.archive.org/web/20140806161228/http://vvork.com/>; rel="prev memento"; datetime="Wed, 06 Aug 2014 16:12:28 GMT", <http://web.archive.org/web/20141018133107/http://vvork.com/>; rel="memento"; datetime="Sat, 18 Oct 2014 13:31:07 GMT", <http://web.archive.org/web/20141020161243/http://vvork.com/>; rel="next memento"; datetime="Mon, 20 Oct 2014 16:12:43 GMT", <http://web.archive.org/web/20161027001353/http://vvork.com/>; rel="last memento"; datetime="Thu, 27 Oct 2016 00:13:53 GMT"'
'http://webenact.rhizome.org/vvork/{url}': '<http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/>; rel="memento"; datetime="Mon, 06 Oct 2014 18:43:57 GMT", <http://www.vvork.com/>; rel="original", <http://webenact.rhizome.org/vvork/timemap/*/http://www.vvork.com/>; rel="timemap"; type="application/link-format"'
select_mem_2:
'http://webenact.rhizome.org/vvork/{url}': '<http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/>; rel="memento"; datetime="Mon, 06 Oct 2014 18:43:57 GMT", <http://www.vvork.com/>; rel="original", <http://webenact.rhizome.org/vvork/timemap/*/http://www.vvork.com/>; rel="timemap"; type="application/link-format"'
'http://web.archive.org/web/{url}': '<http://vvork.com/>; rel="original", <http://web.archive.org/web/timemap/link/http://vvork.com/>; rel="timemap"; type="application/link-format", <http://web.archive.org/web/20020727091331/http://vvork.com/>; rel="first memento"; datetime="Sat, 27 Jul 2002 09:13:31 GMT", <http://web.archive.org/web/20151105012627/http://vvork.com/>; rel="prev memento"; datetime="Thu, 05 Nov 2015 01:26:27 GMT", <http://web.archive.org/web/20160110134855/http://vvork.com/>; rel="memento"; datetime="Sun, 10 Jan 2016 13:48:55 GMT", <http://web.archive.org/web/20160112032847/http://vvork.com/>; rel="next memento"; datetime="Tue, 12 Jan 2016 03:28:47 GMT", <http://web.archive.org/web/20161027001353/http://vvork.com/>; rel="last memento"; datetime="Thu, 27 Oct 2016 00:13:53 GMT"'
select_live:
'http://web.archive.org/web/{url}': '<http://vvork.com/>; rel="original", <http://web.archive.org/web/timemap/link/http://vvork.com/>; rel="timemap"; type="application/link-format", <http://web.archive.org/web/20161027001353/http://vvork.com/>; rel="last memento"; datetime="Thu, 27 Oct 2016 00:13:53 GMT", <http://web.archive.org/web/20020727091331/http://vvork.com/>; rel="first memento"; datetime="Sat, 27 Jul 2002 09:13:31 GMT", <http://web.archive.org/web/20161011164443/http://vvork.com/>; rel="prev memento"; datetime="Tue, 11 Oct 2016 16:44:43 GMT"'
'http://webenact.rhizome.org/vvork/{url}': '<http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/>; rel="memento"; datetime="Mon, 06 Oct 2014 18:43:57 GMT", <http://www.vvork.com/>; rel="original", <http://webenact.rhizome.org/vvork/timemap/*/http://www.vvork.com/>; rel="timemap"; type="application/link-format"'
select_local:
'http://web.archive.org/web/{url}': '<http://iana.org/>; rel="original", <http://web.archive.org/web/timemap/link/http://iana.org/>; rel="timemap"; type="application/link-format", <http://web.archive.org/web/19971210061738/http://iana.org/>; rel="first memento"; datetime="Wed, 10 Dec 1997 06:17:38 GMT", <http://web.archive.org/web/20140123034755/http://iana.org/>; rel="prev memento"; datetime="Thu, 23 Jan 2014 03:47:55 GMT", <http://web.archive.org/web/20140126093743/http://iana.org/>; rel="memento"; datetime="Sun, 26 Jan 2014 09:37:43 GMT", <http://web.archive.org/web/20140129175203/http://iana.org/>; rel="next memento"; datetime="Wed, 29 Jan 2014 17:52:03 GMT", <http://web.archive.org/web/20161114190210/http://iana.org/>; rel="last memento"; datetime="Mon, 14 Nov 2016 19:02:10 GMT"'
select_local_postreq:
'http://web.archive.org/web/{url}': '<http://iana.org/>; rel="original", <http://web.archive.org/web/timemap/link/http://iana.org/>; rel="timemap"; type="application/link-format", <http://web.archive.org/web/19971210061738/http://iana.org/>; rel="first memento"; datetime="Wed, 10 Dec 1997 06:17:38 GMT", <http://web.archive.org/web/20140123034755/http://iana.org/>; rel="prev memento"; datetime="Thu, 23 Jan 2014 03:47:55 GMT", <http://web.archive.org/web/20140126093743/http://iana.org/>; rel="memento"; datetime="Sun, 26 Jan 2014 09:37:43 GMT", <http://web.archive.org/web/20140129175203/http://iana.org/>; rel="next memento"; datetime="Wed, 29 Jan 2014 17:52:03 GMT", <http://web.archive.org/web/20161114190210/http://iana.org/>; rel="last memento"; datetime="Mon, 14 Nov 2016 19:02:10 GMT"'
select_live_postreq:
'http://web.archive.org/web/{url}': '<http://httpbin.org/get?foo=bar>; rel="original", <http://web.archive.org/web/timemap/link/http://httpbin.org/get?foo=bar>; rel="timemap"; type="application/link-format", <http://web.archive.org/web/20151022094449/http://httpbin.org/get?foo=bar>; rel="first last memento"; datetime="Thu, 22 Oct 2015 09:44:49 GMT"'
select_local_revisit:
'http://web.archive.org/web/{url}': '<http://example.com>; rel="original", <http://web.archive.org/web/timemap/link/http://example.com>; rel="timemap"; type="application/link-format", <http://web.archive.org/web/20020120142510/http://example.com>; rel="first memento"; datetime="Sun, 20 Jan 2002 14:25:10 GMT", <http://web.archive.org/web/20140127153250/http://example.com>; rel="prev memento"; datetime="Mon, 27 Jan 2014 15:32:50 GMT", <http://web.archive.org/web/20140127182713/http://example.com>; rel="memento"; datetime="Mon, 27 Jan 2014 18:27:13 GMT", <http://web.archive.org/web/20140127201610/http://example.com>; rel="next memento"; datetime="Mon, 27 Jan 2014 20:16:10 GMT", <http://web.archive.org/web/20161115101437/http://example.com>; rel="last memento"; datetime="Tue, 15 Nov 2016 10:14:37 GMT"'