diff --git a/pywb/webagg/test/test_handlers.py b/pywb/webagg/test/test_handlers.py
index ced433f1..d71bd26e 100644
--- a/pywb/webagg/test/test_handlers.py
+++ b/pywb/webagg/test/test_handlers.py
@@ -18,8 +18,9 @@ from six.moves.urllib.parse import urlencode
import webtest
from fakeredis import FakeStrictRedis
+from mock import patch
-from .testutils import to_path, FakeRedisTests, BaseTestClass, TEST_CDX_PATH, TEST_WARC_PATH
+from .testutils import to_path, MementoOverrideTests, FakeRedisTests, BaseTestClass, TEST_CDX_PATH, TEST_WARC_PATH
import json
@@ -31,7 +32,7 @@ sources = {
}
-class TestResAgg(FakeRedisTests, BaseTestClass):
+class TestResAgg(MementoOverrideTests, FakeRedisTests, BaseTestClass):
def setup_class(cls):
super(TestResAgg, cls).setup_class()
@@ -162,6 +163,7 @@ class TestResAgg(FakeRedisTests, BaseTestClass):
assert 'ResErrors' not in resp.headers
+ @patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_mem_1'))
def test_agg_select_mem_1(self):
resp = self.testapp.get('/many/resource?url=http://vvork.com/&closest=20141001')
@@ -176,6 +178,7 @@ class TestResAgg(FakeRedisTests, BaseTestClass):
assert 'ResErrors' not in resp.headers
+ @patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_mem_2'))
def test_agg_select_mem_2(self):
resp = self.testapp.get('/many/resource?url=http://vvork.com/&closest=20151231')
@@ -190,6 +193,7 @@ class TestResAgg(FakeRedisTests, BaseTestClass):
assert 'ResErrors' not in resp.headers
+ @patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_live'))
def test_agg_select_live(self):
resp = self.testapp.get('/many/resource?url=http://vvork.com/&closest=2016')
@@ -202,6 +206,7 @@ class TestResAgg(FakeRedisTests, BaseTestClass):
assert 'ResErrors' not in resp.headers
+ @patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_local'))
def test_agg_select_local(self):
resp = self.testapp.get('/many/resource?url=http://iana.org/&closest=20140126200624')
@@ -214,6 +219,7 @@ class TestResAgg(FakeRedisTests, BaseTestClass):
assert json.loads(resp.headers['ResErrors']) == {"rhiz": "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"}
+ @patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_local_postreq'))
def test_agg_select_local_postreq(self):
req_data = """\
GET / HTTP/1.1
@@ -233,6 +239,7 @@ Host: iana.org
assert json.loads(resp.headers['ResErrors']) == {"rhiz": "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"}
+ @patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_live_postreq'))
def test_agg_live_postreq(self):
req_data = """\
GET /get?foo=bar HTTP/1.1
@@ -416,6 +423,7 @@ host: www.youtube.com\
assert resp.text == resp.headers['ResErrors']
+ @patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_local_revisit'))
def test_agg_local_revisit(self):
resp = self.testapp.get('/many/resource?url=http://www.example.com/&closest=20140127171251&sources=local')
@@ -442,6 +450,7 @@ host: www.youtube.com\
assert resp.json == {'message': 'output=foobar not supported'}
assert resp.text == resp.headers['ResErrors']
+ @patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_not_found'))
def test_error_local_not_found(self):
resp = self.testapp.get('/many/resource?url=http://not-found.error/&sources=local', status=404)
diff --git a/pywb/webagg/test/test_memento_agg.py b/pywb/webagg/test/test_memento_agg.py
index 94d4aa91..43553fe1 100644
--- a/pywb/webagg/test/test_memento_agg.py
+++ b/pywb/webagg/test/test_memento_agg.py
@@ -4,21 +4,17 @@ from pywb.webagg.aggregator import SimpleAggregator, GeventTimeoutAggregator
from pywb.webagg.aggregator import BaseAggregator
from pywb.webagg.indexsource import FileIndexSource, RemoteIndexSource, MementoIndexSource
-from .testutils import to_json_list, to_path, TEST_CDX_PATH
+from .testutils import to_json_list, to_path, TEST_CDX_PATH, MementoOverrideTests, BaseTestClass
import json
import pytest
import time
import six
-import yaml
from mock import patch
from pywb.webagg.handlers import IndexHandler
-from pywb import get_test_dir
-from pywb.utils.wbexception import NotFoundException
-
# Aggregator Mappings
sources = {
@@ -40,239 +36,213 @@ agg_nf = {'simple': SimpleAggregator(nf),
'gevent': GeventTimeoutAggregator(nf, timeout=5.0),
}
-# Load expected link headers
-link_header_data = None
-def setup_module():
- global link_header_data
- with open(to_path(get_test_dir() + '/text_content/link_headers.yaml')) as fh:
- link_header_data = yaml.load(fh)
+
+# ============================================================================
+class TestMemAgg(MementoOverrideTests, BaseTestClass):
+ @pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
+ @patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_1'))
+ def test_mem_agg_index_1(self, agg):
+ url = 'http://iana.org/'
+ res, errs = agg(dict(url=url, closest='20140126000000', limit=5))
+
+ exp = [{"timestamp": "20140126093743", "load_url": "http://web.archive.org/web/20140126093743id_/http://iana.org/", "source": "ia"},
+ {"timestamp": "20140126200624", "filename": "iana.warc.gz", "source": "local"},
+ {"timestamp": "20140123034755", "load_url": "http://web.archive.org/web/20140123034755id_/http://iana.org/", "source": "ia"},
+ {"timestamp": "20140129175203", "load_url": "http://web.archive.org/web/20140129175203id_/http://iana.org/", "source": "ia"},
+ {"timestamp": "20140107040552", "load_url": "http://wayback.archive-it.org/all/20140107040552id_/http://iana.org/", "source": "ait"}
+ ]
+
+ assert(to_json_list(res) == exp)
+ assert(errs == {'bl': "NotFoundException('http://www.webarchive.org.uk/wayback/archive/http://iana.org/',)",
+ 'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"})
-orig_get_timegate_links = MementoIndexSource.get_timegate_links
+ @pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
+ @patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_2'))
+ def test_mem_agg_index_2(self, agg):
+ url = 'http://example.com/'
+ res, errs = agg(dict(url=url, closest='20100512', limit=6))
-def mock_link_header(test_name, load=False):
- def mock_func(self, params, closest):
- if load:
- res = orig_get_timegate_links(self, params, closest)
- print("'{0}': '{1}'".format(self.timegate_url, res))
- return res
+ exp = [{"timestamp": "20100513010014", "load_url": "http://www.webarchive.org.uk/wayback/archive/20100513010014id_/http://example.com/", "source": "bl"},
+ {"timestamp": "20100512204410", "load_url": "http://www.webarchive.org.uk/wayback/archive/20100512204410id_/http://example.com/", "source": "bl"},
+ {"timestamp": "20100513224108", "load_url": "http://web.archive.org/web/20100513224108id_/http://example.com/", "source": "ia"},
+ {"timestamp": "20100511201151", 'load_url': "http://wayback.archive-it.org/all/20100511201151id_/http://example.com/", "source": "ait"},
+ {"timestamp": "20100514231857", "load_url": "http://wayback.archive-it.org/all/20100514231857id_/http://example.com/", "source": "ait"},
+ {"timestamp": "20100514231857", "load_url": "http://web.archive.org/web/20100514231857id_/http://example.com/", "source": "ia"},
+ ]
- try:
- res = link_header_data[test_name][self.timegate_url]
- time.sleep(0.2)
- except:
- msg = self.timegate_url.format(url=params['url'])
- raise NotFoundException(msg)
-
- return res
-
- return mock_func
+ assert(to_json_list(res) == exp)
+ assert(errs == {'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://example.com/',)"})
-@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
-@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', mock_link_header('agg_test_1'))
-def test_mem_agg_index_1(agg):
- url = 'http://iana.org/'
- res, errs = agg(dict(url=url, closest='20140126000000', limit=5))
+ @pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
+ @patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_3'))
+ def test_mem_agg_index_3(self, agg):
+ url = 'http://vvork.com/'
+ res, errs = agg(dict(url=url, closest='20141001', limit=5))
- exp = [{"timestamp": "20140126093743", "load_url": "http://web.archive.org/web/20140126093743id_/http://iana.org/", "source": "ia"},
- {"timestamp": "20140126200624", "filename": "iana.warc.gz", "source": "local"},
- {"timestamp": "20140123034755", "load_url": "http://web.archive.org/web/20140123034755id_/http://iana.org/", "source": "ia"},
- {"timestamp": "20140129175203", "load_url": "http://web.archive.org/web/20140129175203id_/http://iana.org/", "source": "ia"},
- {"timestamp": "20140107040552", "load_url": "http://wayback.archive-it.org/all/20140107040552id_/http://iana.org/", "source": "ait"}
- ]
+ exp = [{"timestamp": "20141006184357", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"},
+ {"timestamp": "20141018133107", "load_url": "http://web.archive.org/web/20141018133107id_/http://vvork.com/", "source": "ia"},
+ {"timestamp": "20141020161243", "load_url": "http://web.archive.org/web/20141020161243id_/http://vvork.com/", "source": "ia"},
+ {"timestamp": "20140806161228", "load_url": "http://web.archive.org/web/20140806161228id_/http://vvork.com/", "source": "ia"},
+ {"timestamp": "20131004231540", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"}]
- assert(to_json_list(res) == exp)
- assert(errs == {'bl': "NotFoundException('http://www.webarchive.org.uk/wayback/archive/http://iana.org/',)",
- 'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"})
-
-@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
-@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', mock_link_header('agg_test_2'))
-def test_mem_agg_index_2(agg):
- url = 'http://example.com/'
- res, errs = agg(dict(url=url, closest='20100512', limit=6))
-
- exp = [{"timestamp": "20100513010014", "load_url": "http://www.webarchive.org.uk/wayback/archive/20100513010014id_/http://example.com/", "source": "bl"},
- {"timestamp": "20100512204410", "load_url": "http://www.webarchive.org.uk/wayback/archive/20100512204410id_/http://example.com/", "source": "bl"},
- {"timestamp": "20100513224108", "load_url": "http://web.archive.org/web/20100513224108id_/http://example.com/", "source": "ia"},
- {"timestamp": "20100511201151", 'load_url': "http://wayback.archive-it.org/all/20100511201151id_/http://example.com/", "source": "ait"},
- {"timestamp": "20100514231857", "load_url": "http://wayback.archive-it.org/all/20100514231857id_/http://example.com/", "source": "ait"},
- {"timestamp": "20100514231857", "load_url": "http://web.archive.org/web/20100514231857id_/http://example.com/", "source": "ia"},
- ]
-
- assert(to_json_list(res) == exp)
- assert(errs == {'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://example.com/',)"})
+ assert(to_json_list(res) == exp)
+ assert(errs == {})
-@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
-@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', mock_link_header('agg_test_3'))
-def test_mem_agg_index_3(agg):
- url = 'http://vvork.com/'
- res, errs = agg(dict(url=url, closest='20141001', limit=5))
+ @pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
+ @patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_4'))
+ def test_mem_agg_index_4(self, agg):
+ url = 'http://vvork.com/'
+ res, errs = agg(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait'))
- exp = [{"timestamp": "20141006184357", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"},
- {"timestamp": "20141018133107", "load_url": "http://web.archive.org/web/20141018133107id_/http://vvork.com/", "source": "ia"},
- {"timestamp": "20141020161243", "load_url": "http://web.archive.org/web/20141020161243id_/http://vvork.com/", "source": "ia"},
- {"timestamp": "20140806161228", "load_url": "http://web.archive.org/web/20140806161228id_/http://vvork.com/", "source": "ia"},
- {"timestamp": "20131004231540", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"}]
+ exp = [{"timestamp": "20141006184357", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"},
+ {"timestamp": "20131004231540", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"}]
- assert(to_json_list(res) == exp)
- assert(errs == {})
+ assert(to_json_list(res) == exp)
+ assert(errs == {})
-@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
-@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', mock_link_header('agg_test_4'))
-def test_mem_agg_index_4(agg):
- url = 'http://vvork.com/'
- res, errs = agg(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait'))
+ @pytest.mark.parametrize("agg", list(agg_nf.values()), ids=list(agg_nf.keys()))
+ def test_mem_agg_not_found(self, agg):
+ url = 'http://vvork.com/'
+ res, errs = agg(dict(url=url, closest='20141001', limit=2))
- exp = [{"timestamp": "20141006184357", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"},
- {"timestamp": "20131004231540", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"}]
-
- assert(to_json_list(res) == exp)
- assert(errs == {})
+ assert(to_json_list(res) == [])
+ assert(errs == {'notfound': "NotFoundException('testdata/not-found-x',)"})
-@pytest.mark.parametrize("agg", list(agg_nf.values()), ids=list(agg_nf.keys()))
-def test_mem_agg_not_found(agg):
- url = 'http://vvork.com/'
- res, errs = agg(dict(url=url, closest='20141001', limit=2))
+ @pytest.mark.parametrize("agg", list(agg_tm.values()), ids=list(agg_tm.keys()))
+ def test_mem_agg_timeout(self, agg):
+ url = 'http://vvork.com/'
- assert(to_json_list(res) == [])
- assert(errs == {'notfound': "NotFoundException('testdata/not-found-x',)"})
+ orig_source = BaseAggregator.load_child_source
+ def load_child_source(self, name, source, params):
+ time.sleep(0.1)
+ return orig_source(self, name, source, params)
+
+ BaseAggregator.load_child_source = load_child_source
+ res, errs = agg(dict(url=url, closest='20141001', limit=2))
+ BaseAggregator.load_child_source = orig_source
+
+ assert(to_json_list(res) == [])
+ assert(errs == {'local': 'timeout',
+ 'ait': 'timeout', 'bl': 'timeout', 'ia': 'timeout', 'rhiz': 'timeout'})
-@pytest.mark.parametrize("agg", list(agg_tm.values()), ids=list(agg_tm.keys()))
-def test_mem_agg_timeout(agg):
- url = 'http://vvork.com/'
+ def test_handler_output_cdxj(self):
+ agg = GeventTimeoutAggregator(sources, timeout=5.0)
+ handler = IndexHandler(agg)
+ url = 'http://vvork.com/'
+ headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait'))
- orig_source = BaseAggregator.load_child_source
- def load_child_source(self, name, source, params):
- time.sleep(0.1)
- return orig_source(name, source, params)
-
- BaseAggregator.load_child_source = load_child_source
- res, errs = agg(dict(url=url, closest='20141001', limit=2))
- BaseAggregator.load_child_source = orig_source
-
- assert(to_json_list(res) == [])
- assert(errs == {'local': 'timeout',
- 'ait': 'timeout', 'bl': 'timeout', 'ia': 'timeout', 'rhiz': 'timeout'})
-
-
-def test_handler_output_cdxj():
- agg = GeventTimeoutAggregator(sources, timeout=5.0)
- handler = IndexHandler(agg)
- url = 'http://vvork.com/'
- headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait'))
-
- exp = b"""\
+ exp = b"""\
com,vvork)/ 20141006184357 {"url": "http://www.vvork.com/", "mem_rel": "memento", "memento_url": "http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"}
com,vvork)/ 20131004231540 {"url": "http://vvork.com/", "mem_rel": "last memento", "memento_url": "http://wayback.archive-it.org/all/20131004231540/http://vvork.com/", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"}
"""
- assert(headers['Content-Type'] == 'text/x-cdxj')
- assert(b''.join(res) == exp)
- assert(errs == {})
+ assert(headers['Content-Type'] == 'text/x-cdxj')
+ assert(b''.join(res) == exp)
+ assert(errs == {})
-def test_handler_output_json():
- agg = GeventTimeoutAggregator(sources, timeout=5.0)
- handler = IndexHandler(agg)
- url = 'http://vvork.com/'
- headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='json'))
+ def test_handler_output_json(self):
+ agg = GeventTimeoutAggregator(sources, timeout=5.0)
+ handler = IndexHandler(agg)
+ url = 'http://vvork.com/'
+ headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='json'))
- exp = b"""\
+ exp = b"""\
{"urlkey": "com,vvork)/", "timestamp": "20141006184357", "url": "http://www.vvork.com/", "mem_rel": "memento", "memento_url": "http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"}
{"urlkey": "com,vvork)/", "timestamp": "20131004231540", "url": "http://vvork.com/", "mem_rel": "last memento", "memento_url": "http://wayback.archive-it.org/all/20131004231540/http://vvork.com/", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"}
"""
- assert(headers['Content-Type'] == 'application/x-ndjson')
- assert(b''.join(res) == exp)
- assert(errs == {})
+ assert(headers['Content-Type'] == 'application/x-ndjson')
+ assert(b''.join(res) == exp)
+ assert(errs == {})
-def test_handler_output_link():
- agg = GeventTimeoutAggregator(sources, timeout=5.0)
- handler = IndexHandler(agg)
- url = 'http://vvork.com/'
- headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='link'))
+ def test_handler_output_link(self):
+ agg = GeventTimeoutAggregator(sources, timeout=5.0)
+ handler = IndexHandler(agg)
+ url = 'http://vvork.com/'
+ headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='link'))
- exp = b"""\
+ exp = b"""\
; rel="memento"; datetime="Mon, 06 Oct 2014 18:43:57 GMT"; src="rhiz",
; rel="memento"; datetime="Fri, 04 Oct 2013 23:15:40 GMT"; src="ait"
"""
- assert(headers['Content-Type'] == 'application/link')
- assert(b''.join(res) == exp)
- assert(errs == {})
+ assert(headers['Content-Type'] == 'application/link')
+ assert(b''.join(res) == exp)
+ assert(errs == {})
-def test_handler_output_link_2():
- agg = GeventTimeoutAggregator(sources, timeout=5.0)
- handler = IndexHandler(agg)
- url = 'http://iana.org/'
- headers, res, errs = handler(dict(url=url, closest='20140126000000', limit=5, output='link'))
+ def test_handler_output_link_2(self):
+ agg = GeventTimeoutAggregator(sources, timeout=5.0)
+ handler = IndexHandler(agg)
+ url = 'http://iana.org/'
+ headers, res, errs = handler(dict(url=url, closest='20140126000000', limit=5, output='link'))
- exp = b"""\
+ exp = b"""\
; rel="memento"; datetime="Sun, 26 Jan 2014 09:37:43 GMT"; src="ia",
; rel="memento"; datetime="Sun, 26 Jan 2014 20:06:24 GMT"; src="local",
; rel="memento"; datetime="Thu, 23 Jan 2014 03:47:55 GMT"; src="ia",
; rel="memento"; datetime="Wed, 29 Jan 2014 17:52:03 GMT"; src="ia",
; rel="memento"; datetime="Tue, 07 Jan 2014 04:05:52 GMT"; src="ait"
"""
- assert(headers['Content-Type'] == 'application/link')
- assert(b''.join(res) == exp)
+ assert(headers['Content-Type'] == 'application/link')
+ assert(b''.join(res) == exp)
- exp_errs = {'bl': "NotFoundException('http://www.webarchive.org.uk/wayback/archive/http://iana.org/',)",
- 'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"}
+ exp_errs = {'bl': "NotFoundException('http://www.webarchive.org.uk/wayback/archive/http://iana.org/',)",
+ 'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"}
- assert(errs == exp_errs)
+ assert(errs == exp_errs)
+ def test_handler_output_link_3(self):
+ agg = GeventTimeoutAggregator(sources, timeout=5.0)
+ handler = IndexHandler(agg)
+ url = 'http://foo.bar.non-existent'
+ headers, res, errs = handler(dict(url=url, closest='20140126000000', limit=5, output='link'))
-def test_handler_output_link_3():
- agg = GeventTimeoutAggregator(sources, timeout=5.0)
- handler = IndexHandler(agg)
- url = 'http://foo.bar.non-existent'
- headers, res, errs = handler(dict(url=url, closest='20140126000000', limit=5, output='link'))
+ exp = b''
- exp = b''
+ assert(headers['Content-Type'] == 'application/link')
+ assert(b''.join(res) == exp)
- assert(headers['Content-Type'] == 'application/link')
- assert(b''.join(res) == exp)
+ exp_errs = {'ait': "NotFoundException('http://wayback.archive-it.org/all/http://foo.bar.non-existent',)",
+ 'bl': "NotFoundException('http://www.webarchive.org.uk/wayback/archive/http://foo.bar.non-existent',)",
+ 'ia': "NotFoundException('http://web.archive.org/web/http://foo.bar.non-existent',)",
+ 'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://foo.bar.non-existent',)"}
- exp_errs = {'ait': "NotFoundException('http://wayback.archive-it.org/all/http://foo.bar.non-existent',)",
- 'bl': "NotFoundException('http://www.webarchive.org.uk/wayback/archive/http://foo.bar.non-existent',)",
- 'ia': "NotFoundException('http://web.archive.org/web/http://foo.bar.non-existent',)",
- 'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://foo.bar.non-existent',)"}
+ assert(errs == exp_errs)
- assert(errs == exp_errs)
+ def test_handler_output_text(self):
+ agg = GeventTimeoutAggregator(sources, timeout=5.0)
+ handler = IndexHandler(agg)
+ url = 'http://vvork.com/'
+ headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='text'))
-def test_handler_output_text():
- agg = GeventTimeoutAggregator(sources, timeout=5.0)
- handler = IndexHandler(agg)
- url = 'http://vvork.com/'
- headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='text'))
-
- exp = b"""\
+ exp = b"""\
com,vvork)/ 20141006184357 http://www.vvork.com/ memento http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/ http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/ rhiz
com,vvork)/ 20131004231540 http://vvork.com/ last memento http://wayback.archive-it.org/all/20131004231540/http://vvork.com/ http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/ ait
"""
- assert(headers['Content-Type'] == 'text/plain')
- assert(b''.join(res) == exp)
- assert(errs == {})
+ assert(headers['Content-Type'] == 'text/plain')
+ assert(b''.join(res) == exp)
+ assert(errs == {})
-def test_handler_list_sources():
- agg = GeventTimeoutAggregator(sources, timeout=5.0)
- handler = IndexHandler(agg)
- headers, res, errs = handler(dict(mode='list_sources'))
+ def test_handler_list_sources(self):
+ agg = GeventTimeoutAggregator(sources, timeout=5.0)
+ handler = IndexHandler(agg)
+ headers, res, errs = handler(dict(mode='list_sources'))
- assert(headers == {})
- assert(res == {'sources': {'bl': 'memento',
- 'ait': 'memento',
- 'ia': 'memento',
- 'rhiz': 'memento',
- 'local': 'file'}})
- assert(errs == {})
+ assert(headers == {})
+ assert(res == {'sources': {'bl': 'memento',
+ 'ait': 'memento',
+ 'ia': 'memento',
+ 'rhiz': 'memento',
+ 'local': 'file'}})
+ assert(errs == {})
diff --git a/pywb/webagg/test/testutils.py b/pywb/webagg/test/testutils.py
index 63bde954..632cdd8c 100644
--- a/pywb/webagg/test/testutils.py
+++ b/pywb/webagg/test/testutils.py
@@ -2,6 +2,8 @@ import json
import os
import tempfile
import shutil
+import yaml
+import time
from multiprocessing import Process
@@ -13,9 +15,11 @@ from wsgiref.simple_server import make_server
from pywb.webagg.aggregator import SimpleAggregator
from pywb.webagg.app import ResAggApp
from pywb.webagg.handlers import DefaultResourceHandler
-from pywb.webagg.indexsource import LiveIndexSource
+from pywb.webagg.indexsource import LiveIndexSource, MementoIndexSource
from pywb import get_test_dir
+from pywb.utils.wbexception import NotFoundException
+
# ============================================================================
def to_json_list(cdxlist, fields=['timestamp', 'load_url', 'filename', 'source']):
@@ -91,6 +95,44 @@ class TempDirTests(object):
shutil.rmtree(cls.root_dir)
+# ============================================================================
+class MementoOverrideTests(object):
+ link_header_data = None
+ orig_get_timegate_links = None
+
+ @classmethod
+ def setup_class(cls):
+ super(MementoOverrideTests, cls).setup_class()
+
+ # Load expected link headers
+ MementoOverrideTests.link_header_data = None
+ with open(to_path(get_test_dir() + '/text_content/link_headers.yaml')) as fh:
+ MementoOverrideTests.link_header_data = yaml.load(fh)
+
+ MementoOverrideTests.orig_get_timegate_links = MementoIndexSource.get_timegate_links
+
+ @classmethod
+ def mock_link_header(cls, test_name, load=False):
+ def mock_func(self, params, closest):
+ if load:
+ res = cls.orig_get_timegate_links(self, params, closest)
+ print(test_name + ': ')
+ print(" '{0}': '{1}'".format(self.timegate_url, res))
+ return res
+
+ try:
+ res = cls.link_header_data[test_name][self.timegate_url]
+ time.sleep(0.2)
+ except Exception as e:
+ print(e)
+ msg = self.timegate_url.format(url=params['url'])
+ raise NotFoundException(msg)
+
+ return res
+
+ return mock_func
+
+
# ============================================================================
class LiveServerTests(object):
@classmethod
diff --git a/sample_archive/text_content/link_headers.yaml b/sample_archive/text_content/link_headers.yaml
index 376a63f7..c629f587 100644
--- a/sample_archive/text_content/link_headers.yaml
+++ b/sample_archive/text_content/link_headers.yaml
@@ -28,4 +28,37 @@ agg_test_4:
'http://webenact.rhizome.org/vvork/{url}': '; rel="memento"; datetime="Mon, 06 Oct 2014 18:43:57 GMT", ; rel="original", ; rel="timemap"; type="application/link-format"'
+select_mem_1:
+ 'http://web.archive.org/web/{url}': '; rel="original", ; rel="timemap"; type="application/link-format", ; rel="first memento"; datetime="Sat, 27 Jul 2002 09:13:31 GMT", ; rel="prev memento"; datetime="Wed, 06 Aug 2014 16:12:28 GMT", ; rel="memento"; datetime="Sat, 18 Oct 2014 13:31:07 GMT", ; rel="next memento"; datetime="Mon, 20 Oct 2014 16:12:43 GMT", ; rel="last memento"; datetime="Thu, 27 Oct 2016 00:13:53 GMT"'
+
+ 'http://webenact.rhizome.org/vvork/{url}': '; rel="memento"; datetime="Mon, 06 Oct 2014 18:43:57 GMT", ; rel="original", ; rel="timemap"; type="application/link-format"'
+
+
+select_mem_2:
+ 'http://webenact.rhizome.org/vvork/{url}': '; rel="memento"; datetime="Mon, 06 Oct 2014 18:43:57 GMT", ; rel="original", ; rel="timemap"; type="application/link-format"'
+
+ 'http://web.archive.org/web/{url}': '; rel="original", ; rel="timemap"; type="application/link-format", ; rel="first memento"; datetime="Sat, 27 Jul 2002 09:13:31 GMT", ; rel="prev memento"; datetime="Thu, 05 Nov 2015 01:26:27 GMT", ; rel="memento"; datetime="Sun, 10 Jan 2016 13:48:55 GMT", ; rel="next memento"; datetime="Tue, 12 Jan 2016 03:28:47 GMT", ; rel="last memento"; datetime="Thu, 27 Oct 2016 00:13:53 GMT"'
+
+
+select_live:
+ 'http://web.archive.org/web/{url}': '; rel="original", ; rel="timemap"; type="application/link-format", ; rel="last memento"; datetime="Thu, 27 Oct 2016 00:13:53 GMT", ; rel="first memento"; datetime="Sat, 27 Jul 2002 09:13:31 GMT", ; rel="prev memento"; datetime="Tue, 11 Oct 2016 16:44:43 GMT"'
+
+ 'http://webenact.rhizome.org/vvork/{url}': '; rel="memento"; datetime="Mon, 06 Oct 2014 18:43:57 GMT", ; rel="original", ; rel="timemap"; type="application/link-format"'
+
+select_local:
+ 'http://web.archive.org/web/{url}': '; rel="original", ; rel="timemap"; type="application/link-format", ; rel="first memento"; datetime="Wed, 10 Dec 1997 06:17:38 GMT", ; rel="prev memento"; datetime="Thu, 23 Jan 2014 03:47:55 GMT", ; rel="memento"; datetime="Sun, 26 Jan 2014 09:37:43 GMT", ; rel="next memento"; datetime="Wed, 29 Jan 2014 17:52:03 GMT", ; rel="last memento"; datetime="Mon, 14 Nov 2016 19:02:10 GMT"'
+
+
+select_local_postreq:
+ 'http://web.archive.org/web/{url}': '; rel="original", ; rel="timemap"; type="application/link-format", ; rel="first memento"; datetime="Wed, 10 Dec 1997 06:17:38 GMT", ; rel="prev memento"; datetime="Thu, 23 Jan 2014 03:47:55 GMT", ; rel="memento"; datetime="Sun, 26 Jan 2014 09:37:43 GMT", ; rel="next memento"; datetime="Wed, 29 Jan 2014 17:52:03 GMT", ; rel="last memento"; datetime="Mon, 14 Nov 2016 19:02:10 GMT"'
+
+
+select_live_postreq:
+ 'http://web.archive.org/web/{url}': '; rel="original", ; rel="timemap"; type="application/link-format", ; rel="first last memento"; datetime="Thu, 22 Oct 2015 09:44:49 GMT"'
+
+
+select_local_revisit:
+ 'http://web.archive.org/web/{url}': '; rel="original", ; rel="timemap"; type="application/link-format", ; rel="first memento"; datetime="Sun, 20 Jan 2002 14:25:10 GMT", ; rel="prev memento"; datetime="Mon, 27 Jan 2014 15:32:50 GMT", ; rel="memento"; datetime="Mon, 27 Jan 2014 18:27:13 GMT", ; rel="next memento"; datetime="Mon, 27 Jan 2014 20:16:10 GMT", ; rel="last memento"; datetime="Tue, 15 Nov 2016 10:14:37 GMT"'
+
+