diff --git a/pywb/webagg/test/test_handlers.py b/pywb/webagg/test/test_handlers.py index ced433f1..d71bd26e 100644 --- a/pywb/webagg/test/test_handlers.py +++ b/pywb/webagg/test/test_handlers.py @@ -18,8 +18,9 @@ from six.moves.urllib.parse import urlencode import webtest from fakeredis import FakeStrictRedis +from mock import patch -from .testutils import to_path, FakeRedisTests, BaseTestClass, TEST_CDX_PATH, TEST_WARC_PATH +from .testutils import to_path, MementoOverrideTests, FakeRedisTests, BaseTestClass, TEST_CDX_PATH, TEST_WARC_PATH import json @@ -31,7 +32,7 @@ sources = { } -class TestResAgg(FakeRedisTests, BaseTestClass): +class TestResAgg(MementoOverrideTests, FakeRedisTests, BaseTestClass): def setup_class(cls): super(TestResAgg, cls).setup_class() @@ -162,6 +163,7 @@ class TestResAgg(FakeRedisTests, BaseTestClass): assert 'ResErrors' not in resp.headers + @patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_mem_1')) def test_agg_select_mem_1(self): resp = self.testapp.get('/many/resource?url=http://vvork.com/&closest=20141001') @@ -176,6 +178,7 @@ class TestResAgg(FakeRedisTests, BaseTestClass): assert 'ResErrors' not in resp.headers + @patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_mem_2')) def test_agg_select_mem_2(self): resp = self.testapp.get('/many/resource?url=http://vvork.com/&closest=20151231') @@ -190,6 +193,7 @@ class TestResAgg(FakeRedisTests, BaseTestClass): assert 'ResErrors' not in resp.headers + @patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_live')) def test_agg_select_live(self): resp = self.testapp.get('/many/resource?url=http://vvork.com/&closest=2016') @@ -202,6 +206,7 @@ class TestResAgg(FakeRedisTests, BaseTestClass): assert 'ResErrors' not in resp.headers + @patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_local')) def test_agg_select_local(self): resp = self.testapp.get('/many/resource?url=http://iana.org/&closest=20140126200624') @@ -214,6 +219,7 @@ class TestResAgg(FakeRedisTests, BaseTestClass): assert json.loads(resp.headers['ResErrors']) == {"rhiz": "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"} + @patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_local_postreq')) def test_agg_select_local_postreq(self): req_data = """\ GET / HTTP/1.1 @@ -233,6 +239,7 @@ Host: iana.org assert json.loads(resp.headers['ResErrors']) == {"rhiz": "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"} + @patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_live_postreq')) def test_agg_live_postreq(self): req_data = """\ GET /get?foo=bar HTTP/1.1 @@ -416,6 +423,7 @@ host: www.youtube.com\ assert resp.text == resp.headers['ResErrors'] + @patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_local_revisit')) def test_agg_local_revisit(self): resp = self.testapp.get('/many/resource?url=http://www.example.com/&closest=20140127171251&sources=local') @@ -442,6 +450,7 @@ host: www.youtube.com\ assert resp.json == {'message': 'output=foobar not supported'} assert resp.text == resp.headers['ResErrors'] + @patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_not_found')) def test_error_local_not_found(self): resp = self.testapp.get('/many/resource?url=http://not-found.error/&sources=local', status=404) diff --git a/pywb/webagg/test/test_memento_agg.py b/pywb/webagg/test/test_memento_agg.py index 94d4aa91..43553fe1 100644 --- a/pywb/webagg/test/test_memento_agg.py +++ b/pywb/webagg/test/test_memento_agg.py @@ -4,21 +4,17 @@ from pywb.webagg.aggregator import SimpleAggregator, GeventTimeoutAggregator from pywb.webagg.aggregator import BaseAggregator from pywb.webagg.indexsource import FileIndexSource, RemoteIndexSource, MementoIndexSource -from .testutils import to_json_list, to_path, TEST_CDX_PATH +from .testutils import to_json_list, to_path, TEST_CDX_PATH, MementoOverrideTests, BaseTestClass import json import pytest import time import six -import yaml from mock import patch from pywb.webagg.handlers import IndexHandler -from pywb import get_test_dir -from pywb.utils.wbexception import NotFoundException - # Aggregator Mappings sources = { @@ -40,239 +36,213 @@ agg_nf = {'simple': SimpleAggregator(nf), 'gevent': GeventTimeoutAggregator(nf, timeout=5.0), } -# Load expected link headers -link_header_data = None -def setup_module(): - global link_header_data - with open(to_path(get_test_dir() + '/text_content/link_headers.yaml')) as fh: - link_header_data = yaml.load(fh) + +# ============================================================================ +class TestMemAgg(MementoOverrideTests, BaseTestClass): + @pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys())) + @patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_1')) + def test_mem_agg_index_1(self, agg): + url = 'http://iana.org/' + res, errs = agg(dict(url=url, closest='20140126000000', limit=5)) + + exp = [{"timestamp": "20140126093743", "load_url": "http://web.archive.org/web/20140126093743id_/http://iana.org/", "source": "ia"}, + {"timestamp": "20140126200624", "filename": "iana.warc.gz", "source": "local"}, + {"timestamp": "20140123034755", "load_url": "http://web.archive.org/web/20140123034755id_/http://iana.org/", "source": "ia"}, + {"timestamp": "20140129175203", "load_url": "http://web.archive.org/web/20140129175203id_/http://iana.org/", "source": "ia"}, + {"timestamp": "20140107040552", "load_url": "http://wayback.archive-it.org/all/20140107040552id_/http://iana.org/", "source": "ait"} + ] + + assert(to_json_list(res) == exp) + assert(errs == {'bl': "NotFoundException('http://www.webarchive.org.uk/wayback/archive/http://iana.org/',)", + 'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"}) -orig_get_timegate_links = MementoIndexSource.get_timegate_links + @pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys())) + @patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_2')) + def test_mem_agg_index_2(self, agg): + url = 'http://example.com/' + res, errs = agg(dict(url=url, closest='20100512', limit=6)) -def mock_link_header(test_name, load=False): - def mock_func(self, params, closest): - if load: - res = orig_get_timegate_links(self, params, closest) - print("'{0}': '{1}'".format(self.timegate_url, res)) - return res + exp = [{"timestamp": "20100513010014", "load_url": "http://www.webarchive.org.uk/wayback/archive/20100513010014id_/http://example.com/", "source": "bl"}, + {"timestamp": "20100512204410", "load_url": "http://www.webarchive.org.uk/wayback/archive/20100512204410id_/http://example.com/", "source": "bl"}, + {"timestamp": "20100513224108", "load_url": "http://web.archive.org/web/20100513224108id_/http://example.com/", "source": "ia"}, + {"timestamp": "20100511201151", 'load_url': "http://wayback.archive-it.org/all/20100511201151id_/http://example.com/", "source": "ait"}, + {"timestamp": "20100514231857", "load_url": "http://wayback.archive-it.org/all/20100514231857id_/http://example.com/", "source": "ait"}, + {"timestamp": "20100514231857", "load_url": "http://web.archive.org/web/20100514231857id_/http://example.com/", "source": "ia"}, + ] - try: - res = link_header_data[test_name][self.timegate_url] - time.sleep(0.2) - except: - msg = self.timegate_url.format(url=params['url']) - raise NotFoundException(msg) - - return res - - return mock_func + assert(to_json_list(res) == exp) + assert(errs == {'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://example.com/',)"}) -@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys())) -@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', mock_link_header('agg_test_1')) -def test_mem_agg_index_1(agg): - url = 'http://iana.org/' - res, errs = agg(dict(url=url, closest='20140126000000', limit=5)) + @pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys())) + @patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_3')) + def test_mem_agg_index_3(self, agg): + url = 'http://vvork.com/' + res, errs = agg(dict(url=url, closest='20141001', limit=5)) - exp = [{"timestamp": "20140126093743", "load_url": "http://web.archive.org/web/20140126093743id_/http://iana.org/", "source": "ia"}, - {"timestamp": "20140126200624", "filename": "iana.warc.gz", "source": "local"}, - {"timestamp": "20140123034755", "load_url": "http://web.archive.org/web/20140123034755id_/http://iana.org/", "source": "ia"}, - {"timestamp": "20140129175203", "load_url": "http://web.archive.org/web/20140129175203id_/http://iana.org/", "source": "ia"}, - {"timestamp": "20140107040552", "load_url": "http://wayback.archive-it.org/all/20140107040552id_/http://iana.org/", "source": "ait"} - ] + exp = [{"timestamp": "20141006184357", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"}, + {"timestamp": "20141018133107", "load_url": "http://web.archive.org/web/20141018133107id_/http://vvork.com/", "source": "ia"}, + {"timestamp": "20141020161243", "load_url": "http://web.archive.org/web/20141020161243id_/http://vvork.com/", "source": "ia"}, + {"timestamp": "20140806161228", "load_url": "http://web.archive.org/web/20140806161228id_/http://vvork.com/", "source": "ia"}, + {"timestamp": "20131004231540", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"}] - assert(to_json_list(res) == exp) - assert(errs == {'bl': "NotFoundException('http://www.webarchive.org.uk/wayback/archive/http://iana.org/',)", - 'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"}) - -@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys())) -@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', mock_link_header('agg_test_2')) -def test_mem_agg_index_2(agg): - url = 'http://example.com/' - res, errs = agg(dict(url=url, closest='20100512', limit=6)) - - exp = [{"timestamp": "20100513010014", "load_url": "http://www.webarchive.org.uk/wayback/archive/20100513010014id_/http://example.com/", "source": "bl"}, - {"timestamp": "20100512204410", "load_url": "http://www.webarchive.org.uk/wayback/archive/20100512204410id_/http://example.com/", "source": "bl"}, - {"timestamp": "20100513224108", "load_url": "http://web.archive.org/web/20100513224108id_/http://example.com/", "source": "ia"}, - {"timestamp": "20100511201151", 'load_url': "http://wayback.archive-it.org/all/20100511201151id_/http://example.com/", "source": "ait"}, - {"timestamp": "20100514231857", "load_url": "http://wayback.archive-it.org/all/20100514231857id_/http://example.com/", "source": "ait"}, - {"timestamp": "20100514231857", "load_url": "http://web.archive.org/web/20100514231857id_/http://example.com/", "source": "ia"}, - ] - - assert(to_json_list(res) == exp) - assert(errs == {'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://example.com/',)"}) + assert(to_json_list(res) == exp) + assert(errs == {}) -@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys())) -@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', mock_link_header('agg_test_3')) -def test_mem_agg_index_3(agg): - url = 'http://vvork.com/' - res, errs = agg(dict(url=url, closest='20141001', limit=5)) + @pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys())) + @patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_4')) + def test_mem_agg_index_4(self, agg): + url = 'http://vvork.com/' + res, errs = agg(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait')) - exp = [{"timestamp": "20141006184357", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"}, - {"timestamp": "20141018133107", "load_url": "http://web.archive.org/web/20141018133107id_/http://vvork.com/", "source": "ia"}, - {"timestamp": "20141020161243", "load_url": "http://web.archive.org/web/20141020161243id_/http://vvork.com/", "source": "ia"}, - {"timestamp": "20140806161228", "load_url": "http://web.archive.org/web/20140806161228id_/http://vvork.com/", "source": "ia"}, - {"timestamp": "20131004231540", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"}] + exp = [{"timestamp": "20141006184357", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"}, + {"timestamp": "20131004231540", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"}] - assert(to_json_list(res) == exp) - assert(errs == {}) + assert(to_json_list(res) == exp) + assert(errs == {}) -@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys())) -@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', mock_link_header('agg_test_4')) -def test_mem_agg_index_4(agg): - url = 'http://vvork.com/' - res, errs = agg(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait')) + @pytest.mark.parametrize("agg", list(agg_nf.values()), ids=list(agg_nf.keys())) + def test_mem_agg_not_found(self, agg): + url = 'http://vvork.com/' + res, errs = agg(dict(url=url, closest='20141001', limit=2)) - exp = [{"timestamp": "20141006184357", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"}, - {"timestamp": "20131004231540", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"}] - - assert(to_json_list(res) == exp) - assert(errs == {}) + assert(to_json_list(res) == []) + assert(errs == {'notfound': "NotFoundException('testdata/not-found-x',)"}) -@pytest.mark.parametrize("agg", list(agg_nf.values()), ids=list(agg_nf.keys())) -def test_mem_agg_not_found(agg): - url = 'http://vvork.com/' - res, errs = agg(dict(url=url, closest='20141001', limit=2)) + @pytest.mark.parametrize("agg", list(agg_tm.values()), ids=list(agg_tm.keys())) + def test_mem_agg_timeout(self, agg): + url = 'http://vvork.com/' - assert(to_json_list(res) == []) - assert(errs == {'notfound': "NotFoundException('testdata/not-found-x',)"}) + orig_source = BaseAggregator.load_child_source + def load_child_source(self, name, source, params): + time.sleep(0.1) + return orig_source(self, name, source, params) + + BaseAggregator.load_child_source = load_child_source + res, errs = agg(dict(url=url, closest='20141001', limit=2)) + BaseAggregator.load_child_source = orig_source + + assert(to_json_list(res) == []) + assert(errs == {'local': 'timeout', + 'ait': 'timeout', 'bl': 'timeout', 'ia': 'timeout', 'rhiz': 'timeout'}) -@pytest.mark.parametrize("agg", list(agg_tm.values()), ids=list(agg_tm.keys())) -def test_mem_agg_timeout(agg): - url = 'http://vvork.com/' + def test_handler_output_cdxj(self): + agg = GeventTimeoutAggregator(sources, timeout=5.0) + handler = IndexHandler(agg) + url = 'http://vvork.com/' + headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait')) - orig_source = BaseAggregator.load_child_source - def load_child_source(self, name, source, params): - time.sleep(0.1) - return orig_source(name, source, params) - - BaseAggregator.load_child_source = load_child_source - res, errs = agg(dict(url=url, closest='20141001', limit=2)) - BaseAggregator.load_child_source = orig_source - - assert(to_json_list(res) == []) - assert(errs == {'local': 'timeout', - 'ait': 'timeout', 'bl': 'timeout', 'ia': 'timeout', 'rhiz': 'timeout'}) - - -def test_handler_output_cdxj(): - agg = GeventTimeoutAggregator(sources, timeout=5.0) - handler = IndexHandler(agg) - url = 'http://vvork.com/' - headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait')) - - exp = b"""\ + exp = b"""\ com,vvork)/ 20141006184357 {"url": "http://www.vvork.com/", "mem_rel": "memento", "memento_url": "http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"} com,vvork)/ 20131004231540 {"url": "http://vvork.com/", "mem_rel": "last memento", "memento_url": "http://wayback.archive-it.org/all/20131004231540/http://vvork.com/", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"} """ - assert(headers['Content-Type'] == 'text/x-cdxj') - assert(b''.join(res) == exp) - assert(errs == {}) + assert(headers['Content-Type'] == 'text/x-cdxj') + assert(b''.join(res) == exp) + assert(errs == {}) -def test_handler_output_json(): - agg = GeventTimeoutAggregator(sources, timeout=5.0) - handler = IndexHandler(agg) - url = 'http://vvork.com/' - headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='json')) + def test_handler_output_json(self): + agg = GeventTimeoutAggregator(sources, timeout=5.0) + handler = IndexHandler(agg) + url = 'http://vvork.com/' + headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='json')) - exp = b"""\ + exp = b"""\ {"urlkey": "com,vvork)/", "timestamp": "20141006184357", "url": "http://www.vvork.com/", "mem_rel": "memento", "memento_url": "http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"} {"urlkey": "com,vvork)/", "timestamp": "20131004231540", "url": "http://vvork.com/", "mem_rel": "last memento", "memento_url": "http://wayback.archive-it.org/all/20131004231540/http://vvork.com/", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"} """ - assert(headers['Content-Type'] == 'application/x-ndjson') - assert(b''.join(res) == exp) - assert(errs == {}) + assert(headers['Content-Type'] == 'application/x-ndjson') + assert(b''.join(res) == exp) + assert(errs == {}) -def test_handler_output_link(): - agg = GeventTimeoutAggregator(sources, timeout=5.0) - handler = IndexHandler(agg) - url = 'http://vvork.com/' - headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='link')) + def test_handler_output_link(self): + agg = GeventTimeoutAggregator(sources, timeout=5.0) + handler = IndexHandler(agg) + url = 'http://vvork.com/' + headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='link')) - exp = b"""\ + exp = b"""\ ; rel="memento"; datetime="Mon, 06 Oct 2014 18:43:57 GMT"; src="rhiz", ; rel="memento"; datetime="Fri, 04 Oct 2013 23:15:40 GMT"; src="ait" """ - assert(headers['Content-Type'] == 'application/link') - assert(b''.join(res) == exp) - assert(errs == {}) + assert(headers['Content-Type'] == 'application/link') + assert(b''.join(res) == exp) + assert(errs == {}) -def test_handler_output_link_2(): - agg = GeventTimeoutAggregator(sources, timeout=5.0) - handler = IndexHandler(agg) - url = 'http://iana.org/' - headers, res, errs = handler(dict(url=url, closest='20140126000000', limit=5, output='link')) + def test_handler_output_link_2(self): + agg = GeventTimeoutAggregator(sources, timeout=5.0) + handler = IndexHandler(agg) + url = 'http://iana.org/' + headers, res, errs = handler(dict(url=url, closest='20140126000000', limit=5, output='link')) - exp = b"""\ + exp = b"""\ ; rel="memento"; datetime="Sun, 26 Jan 2014 09:37:43 GMT"; src="ia", ; rel="memento"; datetime="Sun, 26 Jan 2014 20:06:24 GMT"; src="local", ; rel="memento"; datetime="Thu, 23 Jan 2014 03:47:55 GMT"; src="ia", ; rel="memento"; datetime="Wed, 29 Jan 2014 17:52:03 GMT"; src="ia", ; rel="memento"; datetime="Tue, 07 Jan 2014 04:05:52 GMT"; src="ait" """ - assert(headers['Content-Type'] == 'application/link') - assert(b''.join(res) == exp) + assert(headers['Content-Type'] == 'application/link') + assert(b''.join(res) == exp) - exp_errs = {'bl': "NotFoundException('http://www.webarchive.org.uk/wayback/archive/http://iana.org/',)", - 'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"} + exp_errs = {'bl': "NotFoundException('http://www.webarchive.org.uk/wayback/archive/http://iana.org/',)", + 'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"} - assert(errs == exp_errs) + assert(errs == exp_errs) + def test_handler_output_link_3(self): + agg = GeventTimeoutAggregator(sources, timeout=5.0) + handler = IndexHandler(agg) + url = 'http://foo.bar.non-existent' + headers, res, errs = handler(dict(url=url, closest='20140126000000', limit=5, output='link')) -def test_handler_output_link_3(): - agg = GeventTimeoutAggregator(sources, timeout=5.0) - handler = IndexHandler(agg) - url = 'http://foo.bar.non-existent' - headers, res, errs = handler(dict(url=url, closest='20140126000000', limit=5, output='link')) + exp = b'' - exp = b'' + assert(headers['Content-Type'] == 'application/link') + assert(b''.join(res) == exp) - assert(headers['Content-Type'] == 'application/link') - assert(b''.join(res) == exp) + exp_errs = {'ait': "NotFoundException('http://wayback.archive-it.org/all/http://foo.bar.non-existent',)", + 'bl': "NotFoundException('http://www.webarchive.org.uk/wayback/archive/http://foo.bar.non-existent',)", + 'ia': "NotFoundException('http://web.archive.org/web/http://foo.bar.non-existent',)", + 'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://foo.bar.non-existent',)"} - exp_errs = {'ait': "NotFoundException('http://wayback.archive-it.org/all/http://foo.bar.non-existent',)", - 'bl': "NotFoundException('http://www.webarchive.org.uk/wayback/archive/http://foo.bar.non-existent',)", - 'ia': "NotFoundException('http://web.archive.org/web/http://foo.bar.non-existent',)", - 'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://foo.bar.non-existent',)"} + assert(errs == exp_errs) - assert(errs == exp_errs) + def test_handler_output_text(self): + agg = GeventTimeoutAggregator(sources, timeout=5.0) + handler = IndexHandler(agg) + url = 'http://vvork.com/' + headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='text')) -def test_handler_output_text(): - agg = GeventTimeoutAggregator(sources, timeout=5.0) - handler = IndexHandler(agg) - url = 'http://vvork.com/' - headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='text')) - - exp = b"""\ + exp = b"""\ com,vvork)/ 20141006184357 http://www.vvork.com/ memento http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/ http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/ rhiz com,vvork)/ 20131004231540 http://vvork.com/ last memento http://wayback.archive-it.org/all/20131004231540/http://vvork.com/ http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/ ait """ - assert(headers['Content-Type'] == 'text/plain') - assert(b''.join(res) == exp) - assert(errs == {}) + assert(headers['Content-Type'] == 'text/plain') + assert(b''.join(res) == exp) + assert(errs == {}) -def test_handler_list_sources(): - agg = GeventTimeoutAggregator(sources, timeout=5.0) - handler = IndexHandler(agg) - headers, res, errs = handler(dict(mode='list_sources')) + def test_handler_list_sources(self): + agg = GeventTimeoutAggregator(sources, timeout=5.0) + handler = IndexHandler(agg) + headers, res, errs = handler(dict(mode='list_sources')) - assert(headers == {}) - assert(res == {'sources': {'bl': 'memento', - 'ait': 'memento', - 'ia': 'memento', - 'rhiz': 'memento', - 'local': 'file'}}) - assert(errs == {}) + assert(headers == {}) + assert(res == {'sources': {'bl': 'memento', + 'ait': 'memento', + 'ia': 'memento', + 'rhiz': 'memento', + 'local': 'file'}}) + assert(errs == {}) diff --git a/pywb/webagg/test/testutils.py b/pywb/webagg/test/testutils.py index 63bde954..632cdd8c 100644 --- a/pywb/webagg/test/testutils.py +++ b/pywb/webagg/test/testutils.py @@ -2,6 +2,8 @@ import json import os import tempfile import shutil +import yaml +import time from multiprocessing import Process @@ -13,9 +15,11 @@ from wsgiref.simple_server import make_server from pywb.webagg.aggregator import SimpleAggregator from pywb.webagg.app import ResAggApp from pywb.webagg.handlers import DefaultResourceHandler -from pywb.webagg.indexsource import LiveIndexSource +from pywb.webagg.indexsource import LiveIndexSource, MementoIndexSource from pywb import get_test_dir +from pywb.utils.wbexception import NotFoundException + # ============================================================================ def to_json_list(cdxlist, fields=['timestamp', 'load_url', 'filename', 'source']): @@ -91,6 +95,44 @@ class TempDirTests(object): shutil.rmtree(cls.root_dir) +# ============================================================================ +class MementoOverrideTests(object): + link_header_data = None + orig_get_timegate_links = None + + @classmethod + def setup_class(cls): + super(MementoOverrideTests, cls).setup_class() + + # Load expected link headers + MementoOverrideTests.link_header_data = None + with open(to_path(get_test_dir() + '/text_content/link_headers.yaml')) as fh: + MementoOverrideTests.link_header_data = yaml.load(fh) + + MementoOverrideTests.orig_get_timegate_links = MementoIndexSource.get_timegate_links + + @classmethod + def mock_link_header(cls, test_name, load=False): + def mock_func(self, params, closest): + if load: + res = cls.orig_get_timegate_links(self, params, closest) + print(test_name + ': ') + print(" '{0}': '{1}'".format(self.timegate_url, res)) + return res + + try: + res = cls.link_header_data[test_name][self.timegate_url] + time.sleep(0.2) + except Exception as e: + print(e) + msg = self.timegate_url.format(url=params['url']) + raise NotFoundException(msg) + + return res + + return mock_func + + # ============================================================================ class LiveServerTests(object): @classmethod diff --git a/sample_archive/text_content/link_headers.yaml b/sample_archive/text_content/link_headers.yaml index 376a63f7..c629f587 100644 --- a/sample_archive/text_content/link_headers.yaml +++ b/sample_archive/text_content/link_headers.yaml @@ -28,4 +28,37 @@ agg_test_4: 'http://webenact.rhizome.org/vvork/{url}': '; rel="memento"; datetime="Mon, 06 Oct 2014 18:43:57 GMT", ; rel="original", ; rel="timemap"; type="application/link-format"' +select_mem_1: + 'http://web.archive.org/web/{url}': '; rel="original", ; rel="timemap"; type="application/link-format", ; rel="first memento"; datetime="Sat, 27 Jul 2002 09:13:31 GMT", ; rel="prev memento"; datetime="Wed, 06 Aug 2014 16:12:28 GMT", ; rel="memento"; datetime="Sat, 18 Oct 2014 13:31:07 GMT", ; rel="next memento"; datetime="Mon, 20 Oct 2014 16:12:43 GMT", ; rel="last memento"; datetime="Thu, 27 Oct 2016 00:13:53 GMT"' + + 'http://webenact.rhizome.org/vvork/{url}': '; rel="memento"; datetime="Mon, 06 Oct 2014 18:43:57 GMT", ; rel="original", ; rel="timemap"; type="application/link-format"' + + +select_mem_2: + 'http://webenact.rhizome.org/vvork/{url}': '; rel="memento"; datetime="Mon, 06 Oct 2014 18:43:57 GMT", ; rel="original", ; rel="timemap"; type="application/link-format"' + + 'http://web.archive.org/web/{url}': '; rel="original", ; rel="timemap"; type="application/link-format", ; rel="first memento"; datetime="Sat, 27 Jul 2002 09:13:31 GMT", ; rel="prev memento"; datetime="Thu, 05 Nov 2015 01:26:27 GMT", ; rel="memento"; datetime="Sun, 10 Jan 2016 13:48:55 GMT", ; rel="next memento"; datetime="Tue, 12 Jan 2016 03:28:47 GMT", ; rel="last memento"; datetime="Thu, 27 Oct 2016 00:13:53 GMT"' + + +select_live: + 'http://web.archive.org/web/{url}': '; rel="original", ; rel="timemap"; type="application/link-format", ; rel="last memento"; datetime="Thu, 27 Oct 2016 00:13:53 GMT", ; rel="first memento"; datetime="Sat, 27 Jul 2002 09:13:31 GMT", ; rel="prev memento"; datetime="Tue, 11 Oct 2016 16:44:43 GMT"' + + 'http://webenact.rhizome.org/vvork/{url}': '; rel="memento"; datetime="Mon, 06 Oct 2014 18:43:57 GMT", ; rel="original", ; rel="timemap"; type="application/link-format"' + +select_local: + 'http://web.archive.org/web/{url}': '; rel="original", ; rel="timemap"; type="application/link-format", ; rel="first memento"; datetime="Wed, 10 Dec 1997 06:17:38 GMT", ; rel="prev memento"; datetime="Thu, 23 Jan 2014 03:47:55 GMT", ; rel="memento"; datetime="Sun, 26 Jan 2014 09:37:43 GMT", ; rel="next memento"; datetime="Wed, 29 Jan 2014 17:52:03 GMT", ; rel="last memento"; datetime="Mon, 14 Nov 2016 19:02:10 GMT"' + + +select_local_postreq: + 'http://web.archive.org/web/{url}': '; rel="original", ; rel="timemap"; type="application/link-format", ; rel="first memento"; datetime="Wed, 10 Dec 1997 06:17:38 GMT", ; rel="prev memento"; datetime="Thu, 23 Jan 2014 03:47:55 GMT", ; rel="memento"; datetime="Sun, 26 Jan 2014 09:37:43 GMT", ; rel="next memento"; datetime="Wed, 29 Jan 2014 17:52:03 GMT", ; rel="last memento"; datetime="Mon, 14 Nov 2016 19:02:10 GMT"' + + +select_live_postreq: + 'http://web.archive.org/web/{url}': '; rel="original", ; rel="timemap"; type="application/link-format", ; rel="first last memento"; datetime="Thu, 22 Oct 2015 09:44:49 GMT"' + + +select_local_revisit: + 'http://web.archive.org/web/{url}': '; rel="original", ; rel="timemap"; type="application/link-format", ; rel="first memento"; datetime="Sun, 20 Jan 2002 14:25:10 GMT", ; rel="prev memento"; datetime="Mon, 27 Jan 2014 15:32:50 GMT", ; rel="memento"; datetime="Mon, 27 Jan 2014 18:27:13 GMT", ; rel="next memento"; datetime="Mon, 27 Jan 2014 20:16:10 GMT", ; rel="last memento"; datetime="Tue, 15 Nov 2016 10:14:37 GMT"' + +