mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
tests: add MementoOverrideTests as a reusable class, convert memento_agg tests to use class,
handlers: add saved link header data for memento tests for handlers
This commit is contained in:
parent
c7fa8b711c
commit
d24868db7a
@ -18,8 +18,9 @@ from six.moves.urllib.parse import urlencode
|
||||
|
||||
import webtest
|
||||
from fakeredis import FakeStrictRedis
|
||||
from mock import patch
|
||||
|
||||
from .testutils import to_path, FakeRedisTests, BaseTestClass, TEST_CDX_PATH, TEST_WARC_PATH
|
||||
from .testutils import to_path, MementoOverrideTests, FakeRedisTests, BaseTestClass, TEST_CDX_PATH, TEST_WARC_PATH
|
||||
|
||||
import json
|
||||
|
||||
@ -31,7 +32,7 @@ sources = {
|
||||
}
|
||||
|
||||
|
||||
class TestResAgg(FakeRedisTests, BaseTestClass):
|
||||
class TestResAgg(MementoOverrideTests, FakeRedisTests, BaseTestClass):
|
||||
def setup_class(cls):
|
||||
super(TestResAgg, cls).setup_class()
|
||||
|
||||
@ -162,6 +163,7 @@ class TestResAgg(FakeRedisTests, BaseTestClass):
|
||||
|
||||
assert 'ResErrors' not in resp.headers
|
||||
|
||||
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_mem_1'))
|
||||
def test_agg_select_mem_1(self):
|
||||
resp = self.testapp.get('/many/resource?url=http://vvork.com/&closest=20141001')
|
||||
|
||||
@ -176,6 +178,7 @@ class TestResAgg(FakeRedisTests, BaseTestClass):
|
||||
|
||||
assert 'ResErrors' not in resp.headers
|
||||
|
||||
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_mem_2'))
|
||||
def test_agg_select_mem_2(self):
|
||||
resp = self.testapp.get('/many/resource?url=http://vvork.com/&closest=20151231')
|
||||
|
||||
@ -190,6 +193,7 @@ class TestResAgg(FakeRedisTests, BaseTestClass):
|
||||
|
||||
assert 'ResErrors' not in resp.headers
|
||||
|
||||
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_live'))
|
||||
def test_agg_select_live(self):
|
||||
resp = self.testapp.get('/many/resource?url=http://vvork.com/&closest=2016')
|
||||
|
||||
@ -202,6 +206,7 @@ class TestResAgg(FakeRedisTests, BaseTestClass):
|
||||
|
||||
assert 'ResErrors' not in resp.headers
|
||||
|
||||
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_local'))
|
||||
def test_agg_select_local(self):
|
||||
resp = self.testapp.get('/many/resource?url=http://iana.org/&closest=20140126200624')
|
||||
|
||||
@ -214,6 +219,7 @@ class TestResAgg(FakeRedisTests, BaseTestClass):
|
||||
|
||||
assert json.loads(resp.headers['ResErrors']) == {"rhiz": "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"}
|
||||
|
||||
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_local_postreq'))
|
||||
def test_agg_select_local_postreq(self):
|
||||
req_data = """\
|
||||
GET / HTTP/1.1
|
||||
@ -233,6 +239,7 @@ Host: iana.org
|
||||
|
||||
assert json.loads(resp.headers['ResErrors']) == {"rhiz": "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"}
|
||||
|
||||
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_live_postreq'))
|
||||
def test_agg_live_postreq(self):
|
||||
req_data = """\
|
||||
GET /get?foo=bar HTTP/1.1
|
||||
@ -416,6 +423,7 @@ host: www.youtube.com\
|
||||
|
||||
assert resp.text == resp.headers['ResErrors']
|
||||
|
||||
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_local_revisit'))
|
||||
def test_agg_local_revisit(self):
|
||||
resp = self.testapp.get('/many/resource?url=http://www.example.com/&closest=20140127171251&sources=local')
|
||||
|
||||
@ -442,6 +450,7 @@ host: www.youtube.com\
|
||||
assert resp.json == {'message': 'output=foobar not supported'}
|
||||
assert resp.text == resp.headers['ResErrors']
|
||||
|
||||
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_not_found'))
|
||||
def test_error_local_not_found(self):
|
||||
resp = self.testapp.get('/many/resource?url=http://not-found.error/&sources=local', status=404)
|
||||
|
||||
|
@ -4,21 +4,17 @@ from pywb.webagg.aggregator import SimpleAggregator, GeventTimeoutAggregator
|
||||
from pywb.webagg.aggregator import BaseAggregator
|
||||
|
||||
from pywb.webagg.indexsource import FileIndexSource, RemoteIndexSource, MementoIndexSource
|
||||
from .testutils import to_json_list, to_path, TEST_CDX_PATH
|
||||
from .testutils import to_json_list, to_path, TEST_CDX_PATH, MementoOverrideTests, BaseTestClass
|
||||
|
||||
import json
|
||||
import pytest
|
||||
import time
|
||||
import six
|
||||
import yaml
|
||||
|
||||
from mock import patch
|
||||
|
||||
from pywb.webagg.handlers import IndexHandler
|
||||
|
||||
from pywb import get_test_dir
|
||||
from pywb.utils.wbexception import NotFoundException
|
||||
|
||||
|
||||
# Aggregator Mappings
|
||||
sources = {
|
||||
@ -40,239 +36,213 @@ agg_nf = {'simple': SimpleAggregator(nf),
|
||||
'gevent': GeventTimeoutAggregator(nf, timeout=5.0),
|
||||
}
|
||||
|
||||
# Load expected link headers
|
||||
link_header_data = None
|
||||
def setup_module():
|
||||
global link_header_data
|
||||
with open(to_path(get_test_dir() + '/text_content/link_headers.yaml')) as fh:
|
||||
link_header_data = yaml.load(fh)
|
||||
|
||||
# ============================================================================
|
||||
class TestMemAgg(MementoOverrideTests, BaseTestClass):
|
||||
@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
|
||||
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_1'))
|
||||
def test_mem_agg_index_1(self, agg):
|
||||
url = 'http://iana.org/'
|
||||
res, errs = agg(dict(url=url, closest='20140126000000', limit=5))
|
||||
|
||||
exp = [{"timestamp": "20140126093743", "load_url": "http://web.archive.org/web/20140126093743id_/http://iana.org/", "source": "ia"},
|
||||
{"timestamp": "20140126200624", "filename": "iana.warc.gz", "source": "local"},
|
||||
{"timestamp": "20140123034755", "load_url": "http://web.archive.org/web/20140123034755id_/http://iana.org/", "source": "ia"},
|
||||
{"timestamp": "20140129175203", "load_url": "http://web.archive.org/web/20140129175203id_/http://iana.org/", "source": "ia"},
|
||||
{"timestamp": "20140107040552", "load_url": "http://wayback.archive-it.org/all/20140107040552id_/http://iana.org/", "source": "ait"}
|
||||
]
|
||||
|
||||
assert(to_json_list(res) == exp)
|
||||
assert(errs == {'bl': "NotFoundException('http://www.webarchive.org.uk/wayback/archive/http://iana.org/',)",
|
||||
'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"})
|
||||
|
||||
|
||||
orig_get_timegate_links = MementoIndexSource.get_timegate_links
|
||||
@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
|
||||
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_2'))
|
||||
def test_mem_agg_index_2(self, agg):
|
||||
url = 'http://example.com/'
|
||||
res, errs = agg(dict(url=url, closest='20100512', limit=6))
|
||||
|
||||
def mock_link_header(test_name, load=False):
|
||||
def mock_func(self, params, closest):
|
||||
if load:
|
||||
res = orig_get_timegate_links(self, params, closest)
|
||||
print("'{0}': '{1}'".format(self.timegate_url, res))
|
||||
return res
|
||||
exp = [{"timestamp": "20100513010014", "load_url": "http://www.webarchive.org.uk/wayback/archive/20100513010014id_/http://example.com/", "source": "bl"},
|
||||
{"timestamp": "20100512204410", "load_url": "http://www.webarchive.org.uk/wayback/archive/20100512204410id_/http://example.com/", "source": "bl"},
|
||||
{"timestamp": "20100513224108", "load_url": "http://web.archive.org/web/20100513224108id_/http://example.com/", "source": "ia"},
|
||||
{"timestamp": "20100511201151", 'load_url': "http://wayback.archive-it.org/all/20100511201151id_/http://example.com/", "source": "ait"},
|
||||
{"timestamp": "20100514231857", "load_url": "http://wayback.archive-it.org/all/20100514231857id_/http://example.com/", "source": "ait"},
|
||||
{"timestamp": "20100514231857", "load_url": "http://web.archive.org/web/20100514231857id_/http://example.com/", "source": "ia"},
|
||||
]
|
||||
|
||||
try:
|
||||
res = link_header_data[test_name][self.timegate_url]
|
||||
time.sleep(0.2)
|
||||
except:
|
||||
msg = self.timegate_url.format(url=params['url'])
|
||||
raise NotFoundException(msg)
|
||||
|
||||
return res
|
||||
|
||||
return mock_func
|
||||
assert(to_json_list(res) == exp)
|
||||
assert(errs == {'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://example.com/',)"})
|
||||
|
||||
|
||||
@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
|
||||
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', mock_link_header('agg_test_1'))
|
||||
def test_mem_agg_index_1(agg):
|
||||
url = 'http://iana.org/'
|
||||
res, errs = agg(dict(url=url, closest='20140126000000', limit=5))
|
||||
@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
|
||||
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_3'))
|
||||
def test_mem_agg_index_3(self, agg):
|
||||
url = 'http://vvork.com/'
|
||||
res, errs = agg(dict(url=url, closest='20141001', limit=5))
|
||||
|
||||
exp = [{"timestamp": "20140126093743", "load_url": "http://web.archive.org/web/20140126093743id_/http://iana.org/", "source": "ia"},
|
||||
{"timestamp": "20140126200624", "filename": "iana.warc.gz", "source": "local"},
|
||||
{"timestamp": "20140123034755", "load_url": "http://web.archive.org/web/20140123034755id_/http://iana.org/", "source": "ia"},
|
||||
{"timestamp": "20140129175203", "load_url": "http://web.archive.org/web/20140129175203id_/http://iana.org/", "source": "ia"},
|
||||
{"timestamp": "20140107040552", "load_url": "http://wayback.archive-it.org/all/20140107040552id_/http://iana.org/", "source": "ait"}
|
||||
]
|
||||
exp = [{"timestamp": "20141006184357", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"},
|
||||
{"timestamp": "20141018133107", "load_url": "http://web.archive.org/web/20141018133107id_/http://vvork.com/", "source": "ia"},
|
||||
{"timestamp": "20141020161243", "load_url": "http://web.archive.org/web/20141020161243id_/http://vvork.com/", "source": "ia"},
|
||||
{"timestamp": "20140806161228", "load_url": "http://web.archive.org/web/20140806161228id_/http://vvork.com/", "source": "ia"},
|
||||
{"timestamp": "20131004231540", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"}]
|
||||
|
||||
assert(to_json_list(res) == exp)
|
||||
assert(errs == {'bl': "NotFoundException('http://www.webarchive.org.uk/wayback/archive/http://iana.org/',)",
|
||||
'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"})
|
||||
|
||||
@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
|
||||
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', mock_link_header('agg_test_2'))
|
||||
def test_mem_agg_index_2(agg):
|
||||
url = 'http://example.com/'
|
||||
res, errs = agg(dict(url=url, closest='20100512', limit=6))
|
||||
|
||||
exp = [{"timestamp": "20100513010014", "load_url": "http://www.webarchive.org.uk/wayback/archive/20100513010014id_/http://example.com/", "source": "bl"},
|
||||
{"timestamp": "20100512204410", "load_url": "http://www.webarchive.org.uk/wayback/archive/20100512204410id_/http://example.com/", "source": "bl"},
|
||||
{"timestamp": "20100513224108", "load_url": "http://web.archive.org/web/20100513224108id_/http://example.com/", "source": "ia"},
|
||||
{"timestamp": "20100511201151", 'load_url': "http://wayback.archive-it.org/all/20100511201151id_/http://example.com/", "source": "ait"},
|
||||
{"timestamp": "20100514231857", "load_url": "http://wayback.archive-it.org/all/20100514231857id_/http://example.com/", "source": "ait"},
|
||||
{"timestamp": "20100514231857", "load_url": "http://web.archive.org/web/20100514231857id_/http://example.com/", "source": "ia"},
|
||||
]
|
||||
|
||||
assert(to_json_list(res) == exp)
|
||||
assert(errs == {'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://example.com/',)"})
|
||||
assert(to_json_list(res) == exp)
|
||||
assert(errs == {})
|
||||
|
||||
|
||||
@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
|
||||
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', mock_link_header('agg_test_3'))
|
||||
def test_mem_agg_index_3(agg):
|
||||
url = 'http://vvork.com/'
|
||||
res, errs = agg(dict(url=url, closest='20141001', limit=5))
|
||||
@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
|
||||
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_4'))
|
||||
def test_mem_agg_index_4(self, agg):
|
||||
url = 'http://vvork.com/'
|
||||
res, errs = agg(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait'))
|
||||
|
||||
exp = [{"timestamp": "20141006184357", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"},
|
||||
{"timestamp": "20141018133107", "load_url": "http://web.archive.org/web/20141018133107id_/http://vvork.com/", "source": "ia"},
|
||||
{"timestamp": "20141020161243", "load_url": "http://web.archive.org/web/20141020161243id_/http://vvork.com/", "source": "ia"},
|
||||
{"timestamp": "20140806161228", "load_url": "http://web.archive.org/web/20140806161228id_/http://vvork.com/", "source": "ia"},
|
||||
{"timestamp": "20131004231540", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"}]
|
||||
exp = [{"timestamp": "20141006184357", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"},
|
||||
{"timestamp": "20131004231540", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"}]
|
||||
|
||||
assert(to_json_list(res) == exp)
|
||||
assert(errs == {})
|
||||
assert(to_json_list(res) == exp)
|
||||
assert(errs == {})
|
||||
|
||||
|
||||
@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
|
||||
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', mock_link_header('agg_test_4'))
|
||||
def test_mem_agg_index_4(agg):
|
||||
url = 'http://vvork.com/'
|
||||
res, errs = agg(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait'))
|
||||
@pytest.mark.parametrize("agg", list(agg_nf.values()), ids=list(agg_nf.keys()))
|
||||
def test_mem_agg_not_found(self, agg):
|
||||
url = 'http://vvork.com/'
|
||||
res, errs = agg(dict(url=url, closest='20141001', limit=2))
|
||||
|
||||
exp = [{"timestamp": "20141006184357", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"},
|
||||
{"timestamp": "20131004231540", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"}]
|
||||
|
||||
assert(to_json_list(res) == exp)
|
||||
assert(errs == {})
|
||||
assert(to_json_list(res) == [])
|
||||
assert(errs == {'notfound': "NotFoundException('testdata/not-found-x',)"})
|
||||
|
||||
|
||||
@pytest.mark.parametrize("agg", list(agg_nf.values()), ids=list(agg_nf.keys()))
|
||||
def test_mem_agg_not_found(agg):
|
||||
url = 'http://vvork.com/'
|
||||
res, errs = agg(dict(url=url, closest='20141001', limit=2))
|
||||
@pytest.mark.parametrize("agg", list(agg_tm.values()), ids=list(agg_tm.keys()))
|
||||
def test_mem_agg_timeout(self, agg):
|
||||
url = 'http://vvork.com/'
|
||||
|
||||
assert(to_json_list(res) == [])
|
||||
assert(errs == {'notfound': "NotFoundException('testdata/not-found-x',)"})
|
||||
orig_source = BaseAggregator.load_child_source
|
||||
def load_child_source(self, name, source, params):
|
||||
time.sleep(0.1)
|
||||
return orig_source(self, name, source, params)
|
||||
|
||||
BaseAggregator.load_child_source = load_child_source
|
||||
res, errs = agg(dict(url=url, closest='20141001', limit=2))
|
||||
BaseAggregator.load_child_source = orig_source
|
||||
|
||||
assert(to_json_list(res) == [])
|
||||
assert(errs == {'local': 'timeout',
|
||||
'ait': 'timeout', 'bl': 'timeout', 'ia': 'timeout', 'rhiz': 'timeout'})
|
||||
|
||||
|
||||
@pytest.mark.parametrize("agg", list(agg_tm.values()), ids=list(agg_tm.keys()))
|
||||
def test_mem_agg_timeout(agg):
|
||||
url = 'http://vvork.com/'
|
||||
def test_handler_output_cdxj(self):
|
||||
agg = GeventTimeoutAggregator(sources, timeout=5.0)
|
||||
handler = IndexHandler(agg)
|
||||
url = 'http://vvork.com/'
|
||||
headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait'))
|
||||
|
||||
orig_source = BaseAggregator.load_child_source
|
||||
def load_child_source(self, name, source, params):
|
||||
time.sleep(0.1)
|
||||
return orig_source(name, source, params)
|
||||
|
||||
BaseAggregator.load_child_source = load_child_source
|
||||
res, errs = agg(dict(url=url, closest='20141001', limit=2))
|
||||
BaseAggregator.load_child_source = orig_source
|
||||
|
||||
assert(to_json_list(res) == [])
|
||||
assert(errs == {'local': 'timeout',
|
||||
'ait': 'timeout', 'bl': 'timeout', 'ia': 'timeout', 'rhiz': 'timeout'})
|
||||
|
||||
|
||||
def test_handler_output_cdxj():
|
||||
agg = GeventTimeoutAggregator(sources, timeout=5.0)
|
||||
handler = IndexHandler(agg)
|
||||
url = 'http://vvork.com/'
|
||||
headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait'))
|
||||
|
||||
exp = b"""\
|
||||
exp = b"""\
|
||||
com,vvork)/ 20141006184357 {"url": "http://www.vvork.com/", "mem_rel": "memento", "memento_url": "http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"}
|
||||
com,vvork)/ 20131004231540 {"url": "http://vvork.com/", "mem_rel": "last memento", "memento_url": "http://wayback.archive-it.org/all/20131004231540/http://vvork.com/", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"}
|
||||
"""
|
||||
|
||||
assert(headers['Content-Type'] == 'text/x-cdxj')
|
||||
assert(b''.join(res) == exp)
|
||||
assert(errs == {})
|
||||
assert(headers['Content-Type'] == 'text/x-cdxj')
|
||||
assert(b''.join(res) == exp)
|
||||
assert(errs == {})
|
||||
|
||||
|
||||
def test_handler_output_json():
|
||||
agg = GeventTimeoutAggregator(sources, timeout=5.0)
|
||||
handler = IndexHandler(agg)
|
||||
url = 'http://vvork.com/'
|
||||
headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='json'))
|
||||
def test_handler_output_json(self):
|
||||
agg = GeventTimeoutAggregator(sources, timeout=5.0)
|
||||
handler = IndexHandler(agg)
|
||||
url = 'http://vvork.com/'
|
||||
headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='json'))
|
||||
|
||||
exp = b"""\
|
||||
exp = b"""\
|
||||
{"urlkey": "com,vvork)/", "timestamp": "20141006184357", "url": "http://www.vvork.com/", "mem_rel": "memento", "memento_url": "http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"}
|
||||
{"urlkey": "com,vvork)/", "timestamp": "20131004231540", "url": "http://vvork.com/", "mem_rel": "last memento", "memento_url": "http://wayback.archive-it.org/all/20131004231540/http://vvork.com/", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"}
|
||||
"""
|
||||
|
||||
assert(headers['Content-Type'] == 'application/x-ndjson')
|
||||
assert(b''.join(res) == exp)
|
||||
assert(errs == {})
|
||||
assert(headers['Content-Type'] == 'application/x-ndjson')
|
||||
assert(b''.join(res) == exp)
|
||||
assert(errs == {})
|
||||
|
||||
def test_handler_output_link():
|
||||
agg = GeventTimeoutAggregator(sources, timeout=5.0)
|
||||
handler = IndexHandler(agg)
|
||||
url = 'http://vvork.com/'
|
||||
headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='link'))
|
||||
def test_handler_output_link(self):
|
||||
agg = GeventTimeoutAggregator(sources, timeout=5.0)
|
||||
handler = IndexHandler(agg)
|
||||
url = 'http://vvork.com/'
|
||||
headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='link'))
|
||||
|
||||
exp = b"""\
|
||||
exp = b"""\
|
||||
<http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/>; rel="memento"; datetime="Mon, 06 Oct 2014 18:43:57 GMT"; src="rhiz",
|
||||
<http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/>; rel="memento"; datetime="Fri, 04 Oct 2013 23:15:40 GMT"; src="ait"
|
||||
"""
|
||||
assert(headers['Content-Type'] == 'application/link')
|
||||
assert(b''.join(res) == exp)
|
||||
assert(errs == {})
|
||||
assert(headers['Content-Type'] == 'application/link')
|
||||
assert(b''.join(res) == exp)
|
||||
assert(errs == {})
|
||||
|
||||
|
||||
def test_handler_output_link_2():
|
||||
agg = GeventTimeoutAggregator(sources, timeout=5.0)
|
||||
handler = IndexHandler(agg)
|
||||
url = 'http://iana.org/'
|
||||
headers, res, errs = handler(dict(url=url, closest='20140126000000', limit=5, output='link'))
|
||||
def test_handler_output_link_2(self):
|
||||
agg = GeventTimeoutAggregator(sources, timeout=5.0)
|
||||
handler = IndexHandler(agg)
|
||||
url = 'http://iana.org/'
|
||||
headers, res, errs = handler(dict(url=url, closest='20140126000000', limit=5, output='link'))
|
||||
|
||||
exp = b"""\
|
||||
exp = b"""\
|
||||
<http://web.archive.org/web/20140126093743id_/http://iana.org/>; rel="memento"; datetime="Sun, 26 Jan 2014 09:37:43 GMT"; src="ia",
|
||||
<file://iana.warc.gz:334:2258>; rel="memento"; datetime="Sun, 26 Jan 2014 20:06:24 GMT"; src="local",
|
||||
<http://web.archive.org/web/20140123034755id_/http://iana.org/>; rel="memento"; datetime="Thu, 23 Jan 2014 03:47:55 GMT"; src="ia",
|
||||
<http://web.archive.org/web/20140129175203id_/http://iana.org/>; rel="memento"; datetime="Wed, 29 Jan 2014 17:52:03 GMT"; src="ia",
|
||||
<http://wayback.archive-it.org/all/20140107040552id_/http://iana.org/>; rel="memento"; datetime="Tue, 07 Jan 2014 04:05:52 GMT"; src="ait"
|
||||
"""
|
||||
assert(headers['Content-Type'] == 'application/link')
|
||||
assert(b''.join(res) == exp)
|
||||
assert(headers['Content-Type'] == 'application/link')
|
||||
assert(b''.join(res) == exp)
|
||||
|
||||
exp_errs = {'bl': "NotFoundException('http://www.webarchive.org.uk/wayback/archive/http://iana.org/',)",
|
||||
'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"}
|
||||
exp_errs = {'bl': "NotFoundException('http://www.webarchive.org.uk/wayback/archive/http://iana.org/',)",
|
||||
'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"}
|
||||
|
||||
assert(errs == exp_errs)
|
||||
assert(errs == exp_errs)
|
||||
|
||||
|
||||
def test_handler_output_link_3(self):
|
||||
agg = GeventTimeoutAggregator(sources, timeout=5.0)
|
||||
handler = IndexHandler(agg)
|
||||
url = 'http://foo.bar.non-existent'
|
||||
headers, res, errs = handler(dict(url=url, closest='20140126000000', limit=5, output='link'))
|
||||
|
||||
def test_handler_output_link_3():
|
||||
agg = GeventTimeoutAggregator(sources, timeout=5.0)
|
||||
handler = IndexHandler(agg)
|
||||
url = 'http://foo.bar.non-existent'
|
||||
headers, res, errs = handler(dict(url=url, closest='20140126000000', limit=5, output='link'))
|
||||
exp = b''
|
||||
|
||||
exp = b''
|
||||
assert(headers['Content-Type'] == 'application/link')
|
||||
assert(b''.join(res) == exp)
|
||||
|
||||
assert(headers['Content-Type'] == 'application/link')
|
||||
assert(b''.join(res) == exp)
|
||||
exp_errs = {'ait': "NotFoundException('http://wayback.archive-it.org/all/http://foo.bar.non-existent',)",
|
||||
'bl': "NotFoundException('http://www.webarchive.org.uk/wayback/archive/http://foo.bar.non-existent',)",
|
||||
'ia': "NotFoundException('http://web.archive.org/web/http://foo.bar.non-existent',)",
|
||||
'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://foo.bar.non-existent',)"}
|
||||
|
||||
exp_errs = {'ait': "NotFoundException('http://wayback.archive-it.org/all/http://foo.bar.non-existent',)",
|
||||
'bl': "NotFoundException('http://www.webarchive.org.uk/wayback/archive/http://foo.bar.non-existent',)",
|
||||
'ia': "NotFoundException('http://web.archive.org/web/http://foo.bar.non-existent',)",
|
||||
'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://foo.bar.non-existent',)"}
|
||||
assert(errs == exp_errs)
|
||||
|
||||
assert(errs == exp_errs)
|
||||
def test_handler_output_text(self):
|
||||
agg = GeventTimeoutAggregator(sources, timeout=5.0)
|
||||
handler = IndexHandler(agg)
|
||||
url = 'http://vvork.com/'
|
||||
headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='text'))
|
||||
|
||||
def test_handler_output_text():
|
||||
agg = GeventTimeoutAggregator(sources, timeout=5.0)
|
||||
handler = IndexHandler(agg)
|
||||
url = 'http://vvork.com/'
|
||||
headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='text'))
|
||||
|
||||
exp = b"""\
|
||||
exp = b"""\
|
||||
com,vvork)/ 20141006184357 http://www.vvork.com/ memento http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/ http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/ rhiz
|
||||
com,vvork)/ 20131004231540 http://vvork.com/ last memento http://wayback.archive-it.org/all/20131004231540/http://vvork.com/ http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/ ait
|
||||
"""
|
||||
assert(headers['Content-Type'] == 'text/plain')
|
||||
assert(b''.join(res) == exp)
|
||||
assert(errs == {})
|
||||
assert(headers['Content-Type'] == 'text/plain')
|
||||
assert(b''.join(res) == exp)
|
||||
assert(errs == {})
|
||||
|
||||
|
||||
def test_handler_list_sources():
|
||||
agg = GeventTimeoutAggregator(sources, timeout=5.0)
|
||||
handler = IndexHandler(agg)
|
||||
headers, res, errs = handler(dict(mode='list_sources'))
|
||||
def test_handler_list_sources(self):
|
||||
agg = GeventTimeoutAggregator(sources, timeout=5.0)
|
||||
handler = IndexHandler(agg)
|
||||
headers, res, errs = handler(dict(mode='list_sources'))
|
||||
|
||||
assert(headers == {})
|
||||
assert(res == {'sources': {'bl': 'memento',
|
||||
'ait': 'memento',
|
||||
'ia': 'memento',
|
||||
'rhiz': 'memento',
|
||||
'local': 'file'}})
|
||||
assert(errs == {})
|
||||
assert(headers == {})
|
||||
assert(res == {'sources': {'bl': 'memento',
|
||||
'ait': 'memento',
|
||||
'ia': 'memento',
|
||||
'rhiz': 'memento',
|
||||
'local': 'file'}})
|
||||
assert(errs == {})
|
||||
|
||||
|
||||
|
@ -2,6 +2,8 @@ import json
|
||||
import os
|
||||
import tempfile
|
||||
import shutil
|
||||
import yaml
|
||||
import time
|
||||
|
||||
from multiprocessing import Process
|
||||
|
||||
@ -13,9 +15,11 @@ from wsgiref.simple_server import make_server
|
||||
from pywb.webagg.aggregator import SimpleAggregator
|
||||
from pywb.webagg.app import ResAggApp
|
||||
from pywb.webagg.handlers import DefaultResourceHandler
|
||||
from pywb.webagg.indexsource import LiveIndexSource
|
||||
from pywb.webagg.indexsource import LiveIndexSource, MementoIndexSource
|
||||
|
||||
from pywb import get_test_dir
|
||||
from pywb.utils.wbexception import NotFoundException
|
||||
|
||||
|
||||
# ============================================================================
|
||||
def to_json_list(cdxlist, fields=['timestamp', 'load_url', 'filename', 'source']):
|
||||
@ -91,6 +95,44 @@ class TempDirTests(object):
|
||||
shutil.rmtree(cls.root_dir)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class MementoOverrideTests(object):
|
||||
link_header_data = None
|
||||
orig_get_timegate_links = None
|
||||
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
super(MementoOverrideTests, cls).setup_class()
|
||||
|
||||
# Load expected link headers
|
||||
MementoOverrideTests.link_header_data = None
|
||||
with open(to_path(get_test_dir() + '/text_content/link_headers.yaml')) as fh:
|
||||
MementoOverrideTests.link_header_data = yaml.load(fh)
|
||||
|
||||
MementoOverrideTests.orig_get_timegate_links = MementoIndexSource.get_timegate_links
|
||||
|
||||
@classmethod
|
||||
def mock_link_header(cls, test_name, load=False):
|
||||
def mock_func(self, params, closest):
|
||||
if load:
|
||||
res = cls.orig_get_timegate_links(self, params, closest)
|
||||
print(test_name + ': ')
|
||||
print(" '{0}': '{1}'".format(self.timegate_url, res))
|
||||
return res
|
||||
|
||||
try:
|
||||
res = cls.link_header_data[test_name][self.timegate_url]
|
||||
time.sleep(0.2)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
msg = self.timegate_url.format(url=params['url'])
|
||||
raise NotFoundException(msg)
|
||||
|
||||
return res
|
||||
|
||||
return mock_func
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class LiveServerTests(object):
|
||||
@classmethod
|
||||
|
@ -28,4 +28,37 @@ agg_test_4:
|
||||
|
||||
'http://webenact.rhizome.org/vvork/{url}': '<http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/>; rel="memento"; datetime="Mon, 06 Oct 2014 18:43:57 GMT", <http://www.vvork.com/>; rel="original", <http://webenact.rhizome.org/vvork/timemap/*/http://www.vvork.com/>; rel="timemap"; type="application/link-format"'
|
||||
|
||||
select_mem_1:
|
||||
'http://web.archive.org/web/{url}': '<http://vvork.com/>; rel="original", <http://web.archive.org/web/timemap/link/http://vvork.com/>; rel="timemap"; type="application/link-format", <http://web.archive.org/web/20020727091331/http://vvork.com/>; rel="first memento"; datetime="Sat, 27 Jul 2002 09:13:31 GMT", <http://web.archive.org/web/20140806161228/http://vvork.com/>; rel="prev memento"; datetime="Wed, 06 Aug 2014 16:12:28 GMT", <http://web.archive.org/web/20141018133107/http://vvork.com/>; rel="memento"; datetime="Sat, 18 Oct 2014 13:31:07 GMT", <http://web.archive.org/web/20141020161243/http://vvork.com/>; rel="next memento"; datetime="Mon, 20 Oct 2014 16:12:43 GMT", <http://web.archive.org/web/20161027001353/http://vvork.com/>; rel="last memento"; datetime="Thu, 27 Oct 2016 00:13:53 GMT"'
|
||||
|
||||
'http://webenact.rhizome.org/vvork/{url}': '<http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/>; rel="memento"; datetime="Mon, 06 Oct 2014 18:43:57 GMT", <http://www.vvork.com/>; rel="original", <http://webenact.rhizome.org/vvork/timemap/*/http://www.vvork.com/>; rel="timemap"; type="application/link-format"'
|
||||
|
||||
|
||||
select_mem_2:
|
||||
'http://webenact.rhizome.org/vvork/{url}': '<http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/>; rel="memento"; datetime="Mon, 06 Oct 2014 18:43:57 GMT", <http://www.vvork.com/>; rel="original", <http://webenact.rhizome.org/vvork/timemap/*/http://www.vvork.com/>; rel="timemap"; type="application/link-format"'
|
||||
|
||||
'http://web.archive.org/web/{url}': '<http://vvork.com/>; rel="original", <http://web.archive.org/web/timemap/link/http://vvork.com/>; rel="timemap"; type="application/link-format", <http://web.archive.org/web/20020727091331/http://vvork.com/>; rel="first memento"; datetime="Sat, 27 Jul 2002 09:13:31 GMT", <http://web.archive.org/web/20151105012627/http://vvork.com/>; rel="prev memento"; datetime="Thu, 05 Nov 2015 01:26:27 GMT", <http://web.archive.org/web/20160110134855/http://vvork.com/>; rel="memento"; datetime="Sun, 10 Jan 2016 13:48:55 GMT", <http://web.archive.org/web/20160112032847/http://vvork.com/>; rel="next memento"; datetime="Tue, 12 Jan 2016 03:28:47 GMT", <http://web.archive.org/web/20161027001353/http://vvork.com/>; rel="last memento"; datetime="Thu, 27 Oct 2016 00:13:53 GMT"'
|
||||
|
||||
|
||||
select_live:
|
||||
'http://web.archive.org/web/{url}': '<http://vvork.com/>; rel="original", <http://web.archive.org/web/timemap/link/http://vvork.com/>; rel="timemap"; type="application/link-format", <http://web.archive.org/web/20161027001353/http://vvork.com/>; rel="last memento"; datetime="Thu, 27 Oct 2016 00:13:53 GMT", <http://web.archive.org/web/20020727091331/http://vvork.com/>; rel="first memento"; datetime="Sat, 27 Jul 2002 09:13:31 GMT", <http://web.archive.org/web/20161011164443/http://vvork.com/>; rel="prev memento"; datetime="Tue, 11 Oct 2016 16:44:43 GMT"'
|
||||
|
||||
'http://webenact.rhizome.org/vvork/{url}': '<http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/>; rel="memento"; datetime="Mon, 06 Oct 2014 18:43:57 GMT", <http://www.vvork.com/>; rel="original", <http://webenact.rhizome.org/vvork/timemap/*/http://www.vvork.com/>; rel="timemap"; type="application/link-format"'
|
||||
|
||||
select_local:
|
||||
'http://web.archive.org/web/{url}': '<http://iana.org/>; rel="original", <http://web.archive.org/web/timemap/link/http://iana.org/>; rel="timemap"; type="application/link-format", <http://web.archive.org/web/19971210061738/http://iana.org/>; rel="first memento"; datetime="Wed, 10 Dec 1997 06:17:38 GMT", <http://web.archive.org/web/20140123034755/http://iana.org/>; rel="prev memento"; datetime="Thu, 23 Jan 2014 03:47:55 GMT", <http://web.archive.org/web/20140126093743/http://iana.org/>; rel="memento"; datetime="Sun, 26 Jan 2014 09:37:43 GMT", <http://web.archive.org/web/20140129175203/http://iana.org/>; rel="next memento"; datetime="Wed, 29 Jan 2014 17:52:03 GMT", <http://web.archive.org/web/20161114190210/http://iana.org/>; rel="last memento"; datetime="Mon, 14 Nov 2016 19:02:10 GMT"'
|
||||
|
||||
|
||||
select_local_postreq:
|
||||
'http://web.archive.org/web/{url}': '<http://iana.org/>; rel="original", <http://web.archive.org/web/timemap/link/http://iana.org/>; rel="timemap"; type="application/link-format", <http://web.archive.org/web/19971210061738/http://iana.org/>; rel="first memento"; datetime="Wed, 10 Dec 1997 06:17:38 GMT", <http://web.archive.org/web/20140123034755/http://iana.org/>; rel="prev memento"; datetime="Thu, 23 Jan 2014 03:47:55 GMT", <http://web.archive.org/web/20140126093743/http://iana.org/>; rel="memento"; datetime="Sun, 26 Jan 2014 09:37:43 GMT", <http://web.archive.org/web/20140129175203/http://iana.org/>; rel="next memento"; datetime="Wed, 29 Jan 2014 17:52:03 GMT", <http://web.archive.org/web/20161114190210/http://iana.org/>; rel="last memento"; datetime="Mon, 14 Nov 2016 19:02:10 GMT"'
|
||||
|
||||
|
||||
select_live_postreq:
|
||||
'http://web.archive.org/web/{url}': '<http://httpbin.org/get?foo=bar>; rel="original", <http://web.archive.org/web/timemap/link/http://httpbin.org/get?foo=bar>; rel="timemap"; type="application/link-format", <http://web.archive.org/web/20151022094449/http://httpbin.org/get?foo=bar>; rel="first last memento"; datetime="Thu, 22 Oct 2015 09:44:49 GMT"'
|
||||
|
||||
|
||||
select_local_revisit:
|
||||
'http://web.archive.org/web/{url}': '<http://example.com>; rel="original", <http://web.archive.org/web/timemap/link/http://example.com>; rel="timemap"; type="application/link-format", <http://web.archive.org/web/20020120142510/http://example.com>; rel="first memento"; datetime="Sun, 20 Jan 2002 14:25:10 GMT", <http://web.archive.org/web/20140127153250/http://example.com>; rel="prev memento"; datetime="Mon, 27 Jan 2014 15:32:50 GMT", <http://web.archive.org/web/20140127182713/http://example.com>; rel="memento"; datetime="Mon, 27 Jan 2014 18:27:13 GMT", <http://web.archive.org/web/20140127201610/http://example.com>; rel="next memento"; datetime="Mon, 27 Jan 2014 20:16:10 GMT", <http://web.archive.org/web/20161115101437/http://example.com>; rel="last memento"; datetime="Tue, 15 Nov 2016 10:14:37 GMT"'
|
||||
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user