1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

tests: add MementoOverrideTests as a reusable class, convert memento_agg tests to use class,

handlers: add saved link header data for memento tests for handlers
This commit is contained in:
Ilya Kreymer 2016-11-15 14:24:34 -08:00
parent c7fa8b711c
commit d24868db7a
4 changed files with 234 additions and 180 deletions

View File

@ -18,8 +18,9 @@ from six.moves.urllib.parse import urlencode
import webtest import webtest
from fakeredis import FakeStrictRedis from fakeredis import FakeStrictRedis
from mock import patch
from .testutils import to_path, FakeRedisTests, BaseTestClass, TEST_CDX_PATH, TEST_WARC_PATH from .testutils import to_path, MementoOverrideTests, FakeRedisTests, BaseTestClass, TEST_CDX_PATH, TEST_WARC_PATH
import json import json
@ -31,7 +32,7 @@ sources = {
} }
class TestResAgg(FakeRedisTests, BaseTestClass): class TestResAgg(MementoOverrideTests, FakeRedisTests, BaseTestClass):
def setup_class(cls): def setup_class(cls):
super(TestResAgg, cls).setup_class() super(TestResAgg, cls).setup_class()
@ -162,6 +163,7 @@ class TestResAgg(FakeRedisTests, BaseTestClass):
assert 'ResErrors' not in resp.headers assert 'ResErrors' not in resp.headers
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_mem_1'))
def test_agg_select_mem_1(self): def test_agg_select_mem_1(self):
resp = self.testapp.get('/many/resource?url=http://vvork.com/&closest=20141001') resp = self.testapp.get('/many/resource?url=http://vvork.com/&closest=20141001')
@ -176,6 +178,7 @@ class TestResAgg(FakeRedisTests, BaseTestClass):
assert 'ResErrors' not in resp.headers assert 'ResErrors' not in resp.headers
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_mem_2'))
def test_agg_select_mem_2(self): def test_agg_select_mem_2(self):
resp = self.testapp.get('/many/resource?url=http://vvork.com/&closest=20151231') resp = self.testapp.get('/many/resource?url=http://vvork.com/&closest=20151231')
@ -190,6 +193,7 @@ class TestResAgg(FakeRedisTests, BaseTestClass):
assert 'ResErrors' not in resp.headers assert 'ResErrors' not in resp.headers
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_live'))
def test_agg_select_live(self): def test_agg_select_live(self):
resp = self.testapp.get('/many/resource?url=http://vvork.com/&closest=2016') resp = self.testapp.get('/many/resource?url=http://vvork.com/&closest=2016')
@ -202,6 +206,7 @@ class TestResAgg(FakeRedisTests, BaseTestClass):
assert 'ResErrors' not in resp.headers assert 'ResErrors' not in resp.headers
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_local'))
def test_agg_select_local(self): def test_agg_select_local(self):
resp = self.testapp.get('/many/resource?url=http://iana.org/&closest=20140126200624') resp = self.testapp.get('/many/resource?url=http://iana.org/&closest=20140126200624')
@ -214,6 +219,7 @@ class TestResAgg(FakeRedisTests, BaseTestClass):
assert json.loads(resp.headers['ResErrors']) == {"rhiz": "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"} assert json.loads(resp.headers['ResErrors']) == {"rhiz": "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"}
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_local_postreq'))
def test_agg_select_local_postreq(self): def test_agg_select_local_postreq(self):
req_data = """\ req_data = """\
GET / HTTP/1.1 GET / HTTP/1.1
@ -233,6 +239,7 @@ Host: iana.org
assert json.loads(resp.headers['ResErrors']) == {"rhiz": "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"} assert json.loads(resp.headers['ResErrors']) == {"rhiz": "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"}
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_live_postreq'))
def test_agg_live_postreq(self): def test_agg_live_postreq(self):
req_data = """\ req_data = """\
GET /get?foo=bar HTTP/1.1 GET /get?foo=bar HTTP/1.1
@ -416,6 +423,7 @@ host: www.youtube.com\
assert resp.text == resp.headers['ResErrors'] assert resp.text == resp.headers['ResErrors']
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_local_revisit'))
def test_agg_local_revisit(self): def test_agg_local_revisit(self):
resp = self.testapp.get('/many/resource?url=http://www.example.com/&closest=20140127171251&sources=local') resp = self.testapp.get('/many/resource?url=http://www.example.com/&closest=20140127171251&sources=local')
@ -442,6 +450,7 @@ host: www.youtube.com\
assert resp.json == {'message': 'output=foobar not supported'} assert resp.json == {'message': 'output=foobar not supported'}
assert resp.text == resp.headers['ResErrors'] assert resp.text == resp.headers['ResErrors']
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_not_found'))
def test_error_local_not_found(self): def test_error_local_not_found(self):
resp = self.testapp.get('/many/resource?url=http://not-found.error/&sources=local', status=404) resp = self.testapp.get('/many/resource?url=http://not-found.error/&sources=local', status=404)

View File

@ -4,21 +4,17 @@ from pywb.webagg.aggregator import SimpleAggregator, GeventTimeoutAggregator
from pywb.webagg.aggregator import BaseAggregator from pywb.webagg.aggregator import BaseAggregator
from pywb.webagg.indexsource import FileIndexSource, RemoteIndexSource, MementoIndexSource from pywb.webagg.indexsource import FileIndexSource, RemoteIndexSource, MementoIndexSource
from .testutils import to_json_list, to_path, TEST_CDX_PATH from .testutils import to_json_list, to_path, TEST_CDX_PATH, MementoOverrideTests, BaseTestClass
import json import json
import pytest import pytest
import time import time
import six import six
import yaml
from mock import patch from mock import patch
from pywb.webagg.handlers import IndexHandler from pywb.webagg.handlers import IndexHandler
from pywb import get_test_dir
from pywb.utils.wbexception import NotFoundException
# Aggregator Mappings # Aggregator Mappings
sources = { sources = {
@ -40,239 +36,213 @@ agg_nf = {'simple': SimpleAggregator(nf),
'gevent': GeventTimeoutAggregator(nf, timeout=5.0), 'gevent': GeventTimeoutAggregator(nf, timeout=5.0),
} }
# Load expected link headers
link_header_data = None # ============================================================================
def setup_module(): class TestMemAgg(MementoOverrideTests, BaseTestClass):
global link_header_data @pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
with open(to_path(get_test_dir() + '/text_content/link_headers.yaml')) as fh: @patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_1'))
link_header_data = yaml.load(fh) def test_mem_agg_index_1(self, agg):
url = 'http://iana.org/'
res, errs = agg(dict(url=url, closest='20140126000000', limit=5))
exp = [{"timestamp": "20140126093743", "load_url": "http://web.archive.org/web/20140126093743id_/http://iana.org/", "source": "ia"},
{"timestamp": "20140126200624", "filename": "iana.warc.gz", "source": "local"},
{"timestamp": "20140123034755", "load_url": "http://web.archive.org/web/20140123034755id_/http://iana.org/", "source": "ia"},
{"timestamp": "20140129175203", "load_url": "http://web.archive.org/web/20140129175203id_/http://iana.org/", "source": "ia"},
{"timestamp": "20140107040552", "load_url": "http://wayback.archive-it.org/all/20140107040552id_/http://iana.org/", "source": "ait"}
]
assert(to_json_list(res) == exp)
assert(errs == {'bl': "NotFoundException('http://www.webarchive.org.uk/wayback/archive/http://iana.org/',)",
'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"})
orig_get_timegate_links = MementoIndexSource.get_timegate_links @pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_2'))
def test_mem_agg_index_2(self, agg):
url = 'http://example.com/'
res, errs = agg(dict(url=url, closest='20100512', limit=6))
def mock_link_header(test_name, load=False): exp = [{"timestamp": "20100513010014", "load_url": "http://www.webarchive.org.uk/wayback/archive/20100513010014id_/http://example.com/", "source": "bl"},
def mock_func(self, params, closest): {"timestamp": "20100512204410", "load_url": "http://www.webarchive.org.uk/wayback/archive/20100512204410id_/http://example.com/", "source": "bl"},
if load: {"timestamp": "20100513224108", "load_url": "http://web.archive.org/web/20100513224108id_/http://example.com/", "source": "ia"},
res = orig_get_timegate_links(self, params, closest) {"timestamp": "20100511201151", 'load_url': "http://wayback.archive-it.org/all/20100511201151id_/http://example.com/", "source": "ait"},
print("'{0}': '{1}'".format(self.timegate_url, res)) {"timestamp": "20100514231857", "load_url": "http://wayback.archive-it.org/all/20100514231857id_/http://example.com/", "source": "ait"},
return res {"timestamp": "20100514231857", "load_url": "http://web.archive.org/web/20100514231857id_/http://example.com/", "source": "ia"},
]
try: assert(to_json_list(res) == exp)
res = link_header_data[test_name][self.timegate_url] assert(errs == {'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://example.com/',)"})
time.sleep(0.2)
except:
msg = self.timegate_url.format(url=params['url'])
raise NotFoundException(msg)
return res
return mock_func
@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys())) @pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', mock_link_header('agg_test_1')) @patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_3'))
def test_mem_agg_index_1(agg): def test_mem_agg_index_3(self, agg):
url = 'http://iana.org/' url = 'http://vvork.com/'
res, errs = agg(dict(url=url, closest='20140126000000', limit=5)) res, errs = agg(dict(url=url, closest='20141001', limit=5))
exp = [{"timestamp": "20140126093743", "load_url": "http://web.archive.org/web/20140126093743id_/http://iana.org/", "source": "ia"}, exp = [{"timestamp": "20141006184357", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"},
{"timestamp": "20140126200624", "filename": "iana.warc.gz", "source": "local"}, {"timestamp": "20141018133107", "load_url": "http://web.archive.org/web/20141018133107id_/http://vvork.com/", "source": "ia"},
{"timestamp": "20140123034755", "load_url": "http://web.archive.org/web/20140123034755id_/http://iana.org/", "source": "ia"}, {"timestamp": "20141020161243", "load_url": "http://web.archive.org/web/20141020161243id_/http://vvork.com/", "source": "ia"},
{"timestamp": "20140129175203", "load_url": "http://web.archive.org/web/20140129175203id_/http://iana.org/", "source": "ia"}, {"timestamp": "20140806161228", "load_url": "http://web.archive.org/web/20140806161228id_/http://vvork.com/", "source": "ia"},
{"timestamp": "20140107040552", "load_url": "http://wayback.archive-it.org/all/20140107040552id_/http://iana.org/", "source": "ait"} {"timestamp": "20131004231540", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"}]
]
assert(to_json_list(res) == exp) assert(to_json_list(res) == exp)
assert(errs == {'bl': "NotFoundException('http://www.webarchive.org.uk/wayback/archive/http://iana.org/',)", assert(errs == {})
'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"})
@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', mock_link_header('agg_test_2'))
def test_mem_agg_index_2(agg):
url = 'http://example.com/'
res, errs = agg(dict(url=url, closest='20100512', limit=6))
exp = [{"timestamp": "20100513010014", "load_url": "http://www.webarchive.org.uk/wayback/archive/20100513010014id_/http://example.com/", "source": "bl"},
{"timestamp": "20100512204410", "load_url": "http://www.webarchive.org.uk/wayback/archive/20100512204410id_/http://example.com/", "source": "bl"},
{"timestamp": "20100513224108", "load_url": "http://web.archive.org/web/20100513224108id_/http://example.com/", "source": "ia"},
{"timestamp": "20100511201151", 'load_url': "http://wayback.archive-it.org/all/20100511201151id_/http://example.com/", "source": "ait"},
{"timestamp": "20100514231857", "load_url": "http://wayback.archive-it.org/all/20100514231857id_/http://example.com/", "source": "ait"},
{"timestamp": "20100514231857", "load_url": "http://web.archive.org/web/20100514231857id_/http://example.com/", "source": "ia"},
]
assert(to_json_list(res) == exp)
assert(errs == {'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://example.com/',)"})
@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys())) @pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', mock_link_header('agg_test_3')) @patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_4'))
def test_mem_agg_index_3(agg): def test_mem_agg_index_4(self, agg):
url = 'http://vvork.com/' url = 'http://vvork.com/'
res, errs = agg(dict(url=url, closest='20141001', limit=5)) res, errs = agg(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait'))
exp = [{"timestamp": "20141006184357", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"}, exp = [{"timestamp": "20141006184357", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"},
{"timestamp": "20141018133107", "load_url": "http://web.archive.org/web/20141018133107id_/http://vvork.com/", "source": "ia"}, {"timestamp": "20131004231540", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"}]
{"timestamp": "20141020161243", "load_url": "http://web.archive.org/web/20141020161243id_/http://vvork.com/", "source": "ia"},
{"timestamp": "20140806161228", "load_url": "http://web.archive.org/web/20140806161228id_/http://vvork.com/", "source": "ia"},
{"timestamp": "20131004231540", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"}]
assert(to_json_list(res) == exp) assert(to_json_list(res) == exp)
assert(errs == {}) assert(errs == {})
@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys())) @pytest.mark.parametrize("agg", list(agg_nf.values()), ids=list(agg_nf.keys()))
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', mock_link_header('agg_test_4')) def test_mem_agg_not_found(self, agg):
def test_mem_agg_index_4(agg): url = 'http://vvork.com/'
url = 'http://vvork.com/' res, errs = agg(dict(url=url, closest='20141001', limit=2))
res, errs = agg(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait'))
exp = [{"timestamp": "20141006184357", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"}, assert(to_json_list(res) == [])
{"timestamp": "20131004231540", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"}] assert(errs == {'notfound': "NotFoundException('testdata/not-found-x',)"})
assert(to_json_list(res) == exp)
assert(errs == {})
@pytest.mark.parametrize("agg", list(agg_nf.values()), ids=list(agg_nf.keys())) @pytest.mark.parametrize("agg", list(agg_tm.values()), ids=list(agg_tm.keys()))
def test_mem_agg_not_found(agg): def test_mem_agg_timeout(self, agg):
url = 'http://vvork.com/' url = 'http://vvork.com/'
res, errs = agg(dict(url=url, closest='20141001', limit=2))
assert(to_json_list(res) == []) orig_source = BaseAggregator.load_child_source
assert(errs == {'notfound': "NotFoundException('testdata/not-found-x',)"}) def load_child_source(self, name, source, params):
time.sleep(0.1)
return orig_source(self, name, source, params)
BaseAggregator.load_child_source = load_child_source
res, errs = agg(dict(url=url, closest='20141001', limit=2))
BaseAggregator.load_child_source = orig_source
assert(to_json_list(res) == [])
assert(errs == {'local': 'timeout',
'ait': 'timeout', 'bl': 'timeout', 'ia': 'timeout', 'rhiz': 'timeout'})
@pytest.mark.parametrize("agg", list(agg_tm.values()), ids=list(agg_tm.keys())) def test_handler_output_cdxj(self):
def test_mem_agg_timeout(agg): agg = GeventTimeoutAggregator(sources, timeout=5.0)
url = 'http://vvork.com/' handler = IndexHandler(agg)
url = 'http://vvork.com/'
headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait'))
orig_source = BaseAggregator.load_child_source exp = b"""\
def load_child_source(self, name, source, params):
time.sleep(0.1)
return orig_source(name, source, params)
BaseAggregator.load_child_source = load_child_source
res, errs = agg(dict(url=url, closest='20141001', limit=2))
BaseAggregator.load_child_source = orig_source
assert(to_json_list(res) == [])
assert(errs == {'local': 'timeout',
'ait': 'timeout', 'bl': 'timeout', 'ia': 'timeout', 'rhiz': 'timeout'})
def test_handler_output_cdxj():
agg = GeventTimeoutAggregator(sources, timeout=5.0)
handler = IndexHandler(agg)
url = 'http://vvork.com/'
headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait'))
exp = b"""\
com,vvork)/ 20141006184357 {"url": "http://www.vvork.com/", "mem_rel": "memento", "memento_url": "http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"} com,vvork)/ 20141006184357 {"url": "http://www.vvork.com/", "mem_rel": "memento", "memento_url": "http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"}
com,vvork)/ 20131004231540 {"url": "http://vvork.com/", "mem_rel": "last memento", "memento_url": "http://wayback.archive-it.org/all/20131004231540/http://vvork.com/", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"} com,vvork)/ 20131004231540 {"url": "http://vvork.com/", "mem_rel": "last memento", "memento_url": "http://wayback.archive-it.org/all/20131004231540/http://vvork.com/", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"}
""" """
assert(headers['Content-Type'] == 'text/x-cdxj') assert(headers['Content-Type'] == 'text/x-cdxj')
assert(b''.join(res) == exp) assert(b''.join(res) == exp)
assert(errs == {}) assert(errs == {})
def test_handler_output_json(): def test_handler_output_json(self):
agg = GeventTimeoutAggregator(sources, timeout=5.0) agg = GeventTimeoutAggregator(sources, timeout=5.0)
handler = IndexHandler(agg) handler = IndexHandler(agg)
url = 'http://vvork.com/' url = 'http://vvork.com/'
headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='json')) headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='json'))
exp = b"""\ exp = b"""\
{"urlkey": "com,vvork)/", "timestamp": "20141006184357", "url": "http://www.vvork.com/", "mem_rel": "memento", "memento_url": "http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"} {"urlkey": "com,vvork)/", "timestamp": "20141006184357", "url": "http://www.vvork.com/", "mem_rel": "memento", "memento_url": "http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"}
{"urlkey": "com,vvork)/", "timestamp": "20131004231540", "url": "http://vvork.com/", "mem_rel": "last memento", "memento_url": "http://wayback.archive-it.org/all/20131004231540/http://vvork.com/", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"} {"urlkey": "com,vvork)/", "timestamp": "20131004231540", "url": "http://vvork.com/", "mem_rel": "last memento", "memento_url": "http://wayback.archive-it.org/all/20131004231540/http://vvork.com/", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"}
""" """
assert(headers['Content-Type'] == 'application/x-ndjson') assert(headers['Content-Type'] == 'application/x-ndjson')
assert(b''.join(res) == exp) assert(b''.join(res) == exp)
assert(errs == {}) assert(errs == {})
def test_handler_output_link(): def test_handler_output_link(self):
agg = GeventTimeoutAggregator(sources, timeout=5.0) agg = GeventTimeoutAggregator(sources, timeout=5.0)
handler = IndexHandler(agg) handler = IndexHandler(agg)
url = 'http://vvork.com/' url = 'http://vvork.com/'
headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='link')) headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='link'))
exp = b"""\ exp = b"""\
<http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/>; rel="memento"; datetime="Mon, 06 Oct 2014 18:43:57 GMT"; src="rhiz", <http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/>; rel="memento"; datetime="Mon, 06 Oct 2014 18:43:57 GMT"; src="rhiz",
<http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/>; rel="memento"; datetime="Fri, 04 Oct 2013 23:15:40 GMT"; src="ait" <http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/>; rel="memento"; datetime="Fri, 04 Oct 2013 23:15:40 GMT"; src="ait"
""" """
assert(headers['Content-Type'] == 'application/link') assert(headers['Content-Type'] == 'application/link')
assert(b''.join(res) == exp) assert(b''.join(res) == exp)
assert(errs == {}) assert(errs == {})
def test_handler_output_link_2(): def test_handler_output_link_2(self):
agg = GeventTimeoutAggregator(sources, timeout=5.0) agg = GeventTimeoutAggregator(sources, timeout=5.0)
handler = IndexHandler(agg) handler = IndexHandler(agg)
url = 'http://iana.org/' url = 'http://iana.org/'
headers, res, errs = handler(dict(url=url, closest='20140126000000', limit=5, output='link')) headers, res, errs = handler(dict(url=url, closest='20140126000000', limit=5, output='link'))
exp = b"""\ exp = b"""\
<http://web.archive.org/web/20140126093743id_/http://iana.org/>; rel="memento"; datetime="Sun, 26 Jan 2014 09:37:43 GMT"; src="ia", <http://web.archive.org/web/20140126093743id_/http://iana.org/>; rel="memento"; datetime="Sun, 26 Jan 2014 09:37:43 GMT"; src="ia",
<file://iana.warc.gz:334:2258>; rel="memento"; datetime="Sun, 26 Jan 2014 20:06:24 GMT"; src="local", <file://iana.warc.gz:334:2258>; rel="memento"; datetime="Sun, 26 Jan 2014 20:06:24 GMT"; src="local",
<http://web.archive.org/web/20140123034755id_/http://iana.org/>; rel="memento"; datetime="Thu, 23 Jan 2014 03:47:55 GMT"; src="ia", <http://web.archive.org/web/20140123034755id_/http://iana.org/>; rel="memento"; datetime="Thu, 23 Jan 2014 03:47:55 GMT"; src="ia",
<http://web.archive.org/web/20140129175203id_/http://iana.org/>; rel="memento"; datetime="Wed, 29 Jan 2014 17:52:03 GMT"; src="ia", <http://web.archive.org/web/20140129175203id_/http://iana.org/>; rel="memento"; datetime="Wed, 29 Jan 2014 17:52:03 GMT"; src="ia",
<http://wayback.archive-it.org/all/20140107040552id_/http://iana.org/>; rel="memento"; datetime="Tue, 07 Jan 2014 04:05:52 GMT"; src="ait" <http://wayback.archive-it.org/all/20140107040552id_/http://iana.org/>; rel="memento"; datetime="Tue, 07 Jan 2014 04:05:52 GMT"; src="ait"
""" """
assert(headers['Content-Type'] == 'application/link') assert(headers['Content-Type'] == 'application/link')
assert(b''.join(res) == exp) assert(b''.join(res) == exp)
exp_errs = {'bl': "NotFoundException('http://www.webarchive.org.uk/wayback/archive/http://iana.org/',)", exp_errs = {'bl': "NotFoundException('http://www.webarchive.org.uk/wayback/archive/http://iana.org/',)",
'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"} 'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"}
assert(errs == exp_errs) assert(errs == exp_errs)
def test_handler_output_link_3(self):
agg = GeventTimeoutAggregator(sources, timeout=5.0)
handler = IndexHandler(agg)
url = 'http://foo.bar.non-existent'
headers, res, errs = handler(dict(url=url, closest='20140126000000', limit=5, output='link'))
def test_handler_output_link_3(): exp = b''
agg = GeventTimeoutAggregator(sources, timeout=5.0)
handler = IndexHandler(agg)
url = 'http://foo.bar.non-existent'
headers, res, errs = handler(dict(url=url, closest='20140126000000', limit=5, output='link'))
exp = b'' assert(headers['Content-Type'] == 'application/link')
assert(b''.join(res) == exp)
assert(headers['Content-Type'] == 'application/link') exp_errs = {'ait': "NotFoundException('http://wayback.archive-it.org/all/http://foo.bar.non-existent',)",
assert(b''.join(res) == exp) 'bl': "NotFoundException('http://www.webarchive.org.uk/wayback/archive/http://foo.bar.non-existent',)",
'ia': "NotFoundException('http://web.archive.org/web/http://foo.bar.non-existent',)",
'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://foo.bar.non-existent',)"}
exp_errs = {'ait': "NotFoundException('http://wayback.archive-it.org/all/http://foo.bar.non-existent',)", assert(errs == exp_errs)
'bl': "NotFoundException('http://www.webarchive.org.uk/wayback/archive/http://foo.bar.non-existent',)",
'ia': "NotFoundException('http://web.archive.org/web/http://foo.bar.non-existent',)",
'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://foo.bar.non-existent',)"}
assert(errs == exp_errs) def test_handler_output_text(self):
agg = GeventTimeoutAggregator(sources, timeout=5.0)
handler = IndexHandler(agg)
url = 'http://vvork.com/'
headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='text'))
def test_handler_output_text(): exp = b"""\
agg = GeventTimeoutAggregator(sources, timeout=5.0)
handler = IndexHandler(agg)
url = 'http://vvork.com/'
headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='text'))
exp = b"""\
com,vvork)/ 20141006184357 http://www.vvork.com/ memento http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/ http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/ rhiz com,vvork)/ 20141006184357 http://www.vvork.com/ memento http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/ http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/ rhiz
com,vvork)/ 20131004231540 http://vvork.com/ last memento http://wayback.archive-it.org/all/20131004231540/http://vvork.com/ http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/ ait com,vvork)/ 20131004231540 http://vvork.com/ last memento http://wayback.archive-it.org/all/20131004231540/http://vvork.com/ http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/ ait
""" """
assert(headers['Content-Type'] == 'text/plain') assert(headers['Content-Type'] == 'text/plain')
assert(b''.join(res) == exp) assert(b''.join(res) == exp)
assert(errs == {}) assert(errs == {})
def test_handler_list_sources(): def test_handler_list_sources(self):
agg = GeventTimeoutAggregator(sources, timeout=5.0) agg = GeventTimeoutAggregator(sources, timeout=5.0)
handler = IndexHandler(agg) handler = IndexHandler(agg)
headers, res, errs = handler(dict(mode='list_sources')) headers, res, errs = handler(dict(mode='list_sources'))
assert(headers == {}) assert(headers == {})
assert(res == {'sources': {'bl': 'memento', assert(res == {'sources': {'bl': 'memento',
'ait': 'memento', 'ait': 'memento',
'ia': 'memento', 'ia': 'memento',
'rhiz': 'memento', 'rhiz': 'memento',
'local': 'file'}}) 'local': 'file'}})
assert(errs == {}) assert(errs == {})

View File

@ -2,6 +2,8 @@ import json
import os import os
import tempfile import tempfile
import shutil import shutil
import yaml
import time
from multiprocessing import Process from multiprocessing import Process
@ -13,9 +15,11 @@ from wsgiref.simple_server import make_server
from pywb.webagg.aggregator import SimpleAggregator from pywb.webagg.aggregator import SimpleAggregator
from pywb.webagg.app import ResAggApp from pywb.webagg.app import ResAggApp
from pywb.webagg.handlers import DefaultResourceHandler from pywb.webagg.handlers import DefaultResourceHandler
from pywb.webagg.indexsource import LiveIndexSource from pywb.webagg.indexsource import LiveIndexSource, MementoIndexSource
from pywb import get_test_dir from pywb import get_test_dir
from pywb.utils.wbexception import NotFoundException
# ============================================================================ # ============================================================================
def to_json_list(cdxlist, fields=['timestamp', 'load_url', 'filename', 'source']): def to_json_list(cdxlist, fields=['timestamp', 'load_url', 'filename', 'source']):
@ -91,6 +95,44 @@ class TempDirTests(object):
shutil.rmtree(cls.root_dir) shutil.rmtree(cls.root_dir)
# ============================================================================
class MementoOverrideTests(object):
link_header_data = None
orig_get_timegate_links = None
@classmethod
def setup_class(cls):
super(MementoOverrideTests, cls).setup_class()
# Load expected link headers
MementoOverrideTests.link_header_data = None
with open(to_path(get_test_dir() + '/text_content/link_headers.yaml')) as fh:
MementoOverrideTests.link_header_data = yaml.load(fh)
MementoOverrideTests.orig_get_timegate_links = MementoIndexSource.get_timegate_links
@classmethod
def mock_link_header(cls, test_name, load=False):
def mock_func(self, params, closest):
if load:
res = cls.orig_get_timegate_links(self, params, closest)
print(test_name + ': ')
print(" '{0}': '{1}'".format(self.timegate_url, res))
return res
try:
res = cls.link_header_data[test_name][self.timegate_url]
time.sleep(0.2)
except Exception as e:
print(e)
msg = self.timegate_url.format(url=params['url'])
raise NotFoundException(msg)
return res
return mock_func
# ============================================================================ # ============================================================================
class LiveServerTests(object): class LiveServerTests(object):
@classmethod @classmethod

View File

@ -28,4 +28,37 @@ agg_test_4:
'http://webenact.rhizome.org/vvork/{url}': '<http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/>; rel="memento"; datetime="Mon, 06 Oct 2014 18:43:57 GMT", <http://www.vvork.com/>; rel="original", <http://webenact.rhizome.org/vvork/timemap/*/http://www.vvork.com/>; rel="timemap"; type="application/link-format"' 'http://webenact.rhizome.org/vvork/{url}': '<http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/>; rel="memento"; datetime="Mon, 06 Oct 2014 18:43:57 GMT", <http://www.vvork.com/>; rel="original", <http://webenact.rhizome.org/vvork/timemap/*/http://www.vvork.com/>; rel="timemap"; type="application/link-format"'
select_mem_1:
'http://web.archive.org/web/{url}': '<http://vvork.com/>; rel="original", <http://web.archive.org/web/timemap/link/http://vvork.com/>; rel="timemap"; type="application/link-format", <http://web.archive.org/web/20020727091331/http://vvork.com/>; rel="first memento"; datetime="Sat, 27 Jul 2002 09:13:31 GMT", <http://web.archive.org/web/20140806161228/http://vvork.com/>; rel="prev memento"; datetime="Wed, 06 Aug 2014 16:12:28 GMT", <http://web.archive.org/web/20141018133107/http://vvork.com/>; rel="memento"; datetime="Sat, 18 Oct 2014 13:31:07 GMT", <http://web.archive.org/web/20141020161243/http://vvork.com/>; rel="next memento"; datetime="Mon, 20 Oct 2014 16:12:43 GMT", <http://web.archive.org/web/20161027001353/http://vvork.com/>; rel="last memento"; datetime="Thu, 27 Oct 2016 00:13:53 GMT"'
'http://webenact.rhizome.org/vvork/{url}': '<http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/>; rel="memento"; datetime="Mon, 06 Oct 2014 18:43:57 GMT", <http://www.vvork.com/>; rel="original", <http://webenact.rhizome.org/vvork/timemap/*/http://www.vvork.com/>; rel="timemap"; type="application/link-format"'
select_mem_2:
'http://webenact.rhizome.org/vvork/{url}': '<http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/>; rel="memento"; datetime="Mon, 06 Oct 2014 18:43:57 GMT", <http://www.vvork.com/>; rel="original", <http://webenact.rhizome.org/vvork/timemap/*/http://www.vvork.com/>; rel="timemap"; type="application/link-format"'
'http://web.archive.org/web/{url}': '<http://vvork.com/>; rel="original", <http://web.archive.org/web/timemap/link/http://vvork.com/>; rel="timemap"; type="application/link-format", <http://web.archive.org/web/20020727091331/http://vvork.com/>; rel="first memento"; datetime="Sat, 27 Jul 2002 09:13:31 GMT", <http://web.archive.org/web/20151105012627/http://vvork.com/>; rel="prev memento"; datetime="Thu, 05 Nov 2015 01:26:27 GMT", <http://web.archive.org/web/20160110134855/http://vvork.com/>; rel="memento"; datetime="Sun, 10 Jan 2016 13:48:55 GMT", <http://web.archive.org/web/20160112032847/http://vvork.com/>; rel="next memento"; datetime="Tue, 12 Jan 2016 03:28:47 GMT", <http://web.archive.org/web/20161027001353/http://vvork.com/>; rel="last memento"; datetime="Thu, 27 Oct 2016 00:13:53 GMT"'
select_live:
'http://web.archive.org/web/{url}': '<http://vvork.com/>; rel="original", <http://web.archive.org/web/timemap/link/http://vvork.com/>; rel="timemap"; type="application/link-format", <http://web.archive.org/web/20161027001353/http://vvork.com/>; rel="last memento"; datetime="Thu, 27 Oct 2016 00:13:53 GMT", <http://web.archive.org/web/20020727091331/http://vvork.com/>; rel="first memento"; datetime="Sat, 27 Jul 2002 09:13:31 GMT", <http://web.archive.org/web/20161011164443/http://vvork.com/>; rel="prev memento"; datetime="Tue, 11 Oct 2016 16:44:43 GMT"'
'http://webenact.rhizome.org/vvork/{url}': '<http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/>; rel="memento"; datetime="Mon, 06 Oct 2014 18:43:57 GMT", <http://www.vvork.com/>; rel="original", <http://webenact.rhizome.org/vvork/timemap/*/http://www.vvork.com/>; rel="timemap"; type="application/link-format"'
select_local:
'http://web.archive.org/web/{url}': '<http://iana.org/>; rel="original", <http://web.archive.org/web/timemap/link/http://iana.org/>; rel="timemap"; type="application/link-format", <http://web.archive.org/web/19971210061738/http://iana.org/>; rel="first memento"; datetime="Wed, 10 Dec 1997 06:17:38 GMT", <http://web.archive.org/web/20140123034755/http://iana.org/>; rel="prev memento"; datetime="Thu, 23 Jan 2014 03:47:55 GMT", <http://web.archive.org/web/20140126093743/http://iana.org/>; rel="memento"; datetime="Sun, 26 Jan 2014 09:37:43 GMT", <http://web.archive.org/web/20140129175203/http://iana.org/>; rel="next memento"; datetime="Wed, 29 Jan 2014 17:52:03 GMT", <http://web.archive.org/web/20161114190210/http://iana.org/>; rel="last memento"; datetime="Mon, 14 Nov 2016 19:02:10 GMT"'
select_local_postreq:
'http://web.archive.org/web/{url}': '<http://iana.org/>; rel="original", <http://web.archive.org/web/timemap/link/http://iana.org/>; rel="timemap"; type="application/link-format", <http://web.archive.org/web/19971210061738/http://iana.org/>; rel="first memento"; datetime="Wed, 10 Dec 1997 06:17:38 GMT", <http://web.archive.org/web/20140123034755/http://iana.org/>; rel="prev memento"; datetime="Thu, 23 Jan 2014 03:47:55 GMT", <http://web.archive.org/web/20140126093743/http://iana.org/>; rel="memento"; datetime="Sun, 26 Jan 2014 09:37:43 GMT", <http://web.archive.org/web/20140129175203/http://iana.org/>; rel="next memento"; datetime="Wed, 29 Jan 2014 17:52:03 GMT", <http://web.archive.org/web/20161114190210/http://iana.org/>; rel="last memento"; datetime="Mon, 14 Nov 2016 19:02:10 GMT"'
select_live_postreq:
'http://web.archive.org/web/{url}': '<http://httpbin.org/get?foo=bar>; rel="original", <http://web.archive.org/web/timemap/link/http://httpbin.org/get?foo=bar>; rel="timemap"; type="application/link-format", <http://web.archive.org/web/20151022094449/http://httpbin.org/get?foo=bar>; rel="first last memento"; datetime="Thu, 22 Oct 2015 09:44:49 GMT"'
select_local_revisit:
'http://web.archive.org/web/{url}': '<http://example.com>; rel="original", <http://web.archive.org/web/timemap/link/http://example.com>; rel="timemap"; type="application/link-format", <http://web.archive.org/web/20020120142510/http://example.com>; rel="first memento"; datetime="Sun, 20 Jan 2002 14:25:10 GMT", <http://web.archive.org/web/20140127153250/http://example.com>; rel="prev memento"; datetime="Mon, 27 Jan 2014 15:32:50 GMT", <http://web.archive.org/web/20140127182713/http://example.com>; rel="memento"; datetime="Mon, 27 Jan 2014 18:27:13 GMT", <http://web.archive.org/web/20140127201610/http://example.com>; rel="next memento"; datetime="Mon, 27 Jan 2014 20:16:10 GMT", <http://web.archive.org/web/20161115101437/http://example.com>; rel="last memento"; datetime="Tue, 15 Nov 2016 10:14:37 GMT"'