1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

tests: add MementoOverrideTests as a reusable class, convert memento_agg tests to use class,

handlers: add saved link header data for memento tests for handlers
This commit is contained in:
Ilya Kreymer 2016-11-15 14:24:34 -08:00
parent c7fa8b711c
commit d24868db7a
4 changed files with 234 additions and 180 deletions

View File

@ -18,8 +18,9 @@ from six.moves.urllib.parse import urlencode
import webtest
from fakeredis import FakeStrictRedis
from mock import patch
from .testutils import to_path, FakeRedisTests, BaseTestClass, TEST_CDX_PATH, TEST_WARC_PATH
from .testutils import to_path, MementoOverrideTests, FakeRedisTests, BaseTestClass, TEST_CDX_PATH, TEST_WARC_PATH
import json
@ -31,7 +32,7 @@ sources = {
}
class TestResAgg(FakeRedisTests, BaseTestClass):
class TestResAgg(MementoOverrideTests, FakeRedisTests, BaseTestClass):
def setup_class(cls):
super(TestResAgg, cls).setup_class()
@ -162,6 +163,7 @@ class TestResAgg(FakeRedisTests, BaseTestClass):
assert 'ResErrors' not in resp.headers
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_mem_1'))
def test_agg_select_mem_1(self):
resp = self.testapp.get('/many/resource?url=http://vvork.com/&closest=20141001')
@ -176,6 +178,7 @@ class TestResAgg(FakeRedisTests, BaseTestClass):
assert 'ResErrors' not in resp.headers
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_mem_2'))
def test_agg_select_mem_2(self):
resp = self.testapp.get('/many/resource?url=http://vvork.com/&closest=20151231')
@ -190,6 +193,7 @@ class TestResAgg(FakeRedisTests, BaseTestClass):
assert 'ResErrors' not in resp.headers
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_live'))
def test_agg_select_live(self):
resp = self.testapp.get('/many/resource?url=http://vvork.com/&closest=2016')
@ -202,6 +206,7 @@ class TestResAgg(FakeRedisTests, BaseTestClass):
assert 'ResErrors' not in resp.headers
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_local'))
def test_agg_select_local(self):
resp = self.testapp.get('/many/resource?url=http://iana.org/&closest=20140126200624')
@ -214,6 +219,7 @@ class TestResAgg(FakeRedisTests, BaseTestClass):
assert json.loads(resp.headers['ResErrors']) == {"rhiz": "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"}
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_local_postreq'))
def test_agg_select_local_postreq(self):
req_data = """\
GET / HTTP/1.1
@ -233,6 +239,7 @@ Host: iana.org
assert json.loads(resp.headers['ResErrors']) == {"rhiz": "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"}
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_live_postreq'))
def test_agg_live_postreq(self):
req_data = """\
GET /get?foo=bar HTTP/1.1
@ -416,6 +423,7 @@ host: www.youtube.com\
assert resp.text == resp.headers['ResErrors']
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_local_revisit'))
def test_agg_local_revisit(self):
resp = self.testapp.get('/many/resource?url=http://www.example.com/&closest=20140127171251&sources=local')
@ -442,6 +450,7 @@ host: www.youtube.com\
assert resp.json == {'message': 'output=foobar not supported'}
assert resp.text == resp.headers['ResErrors']
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_not_found'))
def test_error_local_not_found(self):
resp = self.testapp.get('/many/resource?url=http://not-found.error/&sources=local', status=404)

View File

@ -4,21 +4,17 @@ from pywb.webagg.aggregator import SimpleAggregator, GeventTimeoutAggregator
from pywb.webagg.aggregator import BaseAggregator
from pywb.webagg.indexsource import FileIndexSource, RemoteIndexSource, MementoIndexSource
from .testutils import to_json_list, to_path, TEST_CDX_PATH
from .testutils import to_json_list, to_path, TEST_CDX_PATH, MementoOverrideTests, BaseTestClass
import json
import pytest
import time
import six
import yaml
from mock import patch
from pywb.webagg.handlers import IndexHandler
from pywb import get_test_dir
from pywb.utils.wbexception import NotFoundException
# Aggregator Mappings
sources = {
@ -40,38 +36,12 @@ agg_nf = {'simple': SimpleAggregator(nf),
'gevent': GeventTimeoutAggregator(nf, timeout=5.0),
}
# Load expected link headers
link_header_data = None
def setup_module():
global link_header_data
with open(to_path(get_test_dir() + '/text_content/link_headers.yaml')) as fh:
link_header_data = yaml.load(fh)
orig_get_timegate_links = MementoIndexSource.get_timegate_links
def mock_link_header(test_name, load=False):
def mock_func(self, params, closest):
if load:
res = orig_get_timegate_links(self, params, closest)
print("'{0}': '{1}'".format(self.timegate_url, res))
return res
try:
res = link_header_data[test_name][self.timegate_url]
time.sleep(0.2)
except:
msg = self.timegate_url.format(url=params['url'])
raise NotFoundException(msg)
return res
return mock_func
# ============================================================================
class TestMemAgg(MementoOverrideTests, BaseTestClass):
@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', mock_link_header('agg_test_1'))
def test_mem_agg_index_1(agg):
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_1'))
def test_mem_agg_index_1(self, agg):
url = 'http://iana.org/'
res, errs = agg(dict(url=url, closest='20140126000000', limit=5))
@ -86,9 +56,10 @@ def test_mem_agg_index_1(agg):
assert(errs == {'bl': "NotFoundException('http://www.webarchive.org.uk/wayback/archive/http://iana.org/',)",
'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"})
@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', mock_link_header('agg_test_2'))
def test_mem_agg_index_2(agg):
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_2'))
def test_mem_agg_index_2(self, agg):
url = 'http://example.com/'
res, errs = agg(dict(url=url, closest='20100512', limit=6))
@ -105,8 +76,8 @@ def test_mem_agg_index_2(agg):
@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', mock_link_header('agg_test_3'))
def test_mem_agg_index_3(agg):
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_3'))
def test_mem_agg_index_3(self, agg):
url = 'http://vvork.com/'
res, errs = agg(dict(url=url, closest='20141001', limit=5))
@ -121,8 +92,8 @@ def test_mem_agg_index_3(agg):
@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', mock_link_header('agg_test_4'))
def test_mem_agg_index_4(agg):
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_4'))
def test_mem_agg_index_4(self, agg):
url = 'http://vvork.com/'
res, errs = agg(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait'))
@ -134,7 +105,7 @@ def test_mem_agg_index_4(agg):
@pytest.mark.parametrize("agg", list(agg_nf.values()), ids=list(agg_nf.keys()))
def test_mem_agg_not_found(agg):
def test_mem_agg_not_found(self, agg):
url = 'http://vvork.com/'
res, errs = agg(dict(url=url, closest='20141001', limit=2))
@ -143,13 +114,13 @@ def test_mem_agg_not_found(agg):
@pytest.mark.parametrize("agg", list(agg_tm.values()), ids=list(agg_tm.keys()))
def test_mem_agg_timeout(agg):
def test_mem_agg_timeout(self, agg):
url = 'http://vvork.com/'
orig_source = BaseAggregator.load_child_source
def load_child_source(self, name, source, params):
time.sleep(0.1)
return orig_source(name, source, params)
return orig_source(self, name, source, params)
BaseAggregator.load_child_source = load_child_source
res, errs = agg(dict(url=url, closest='20141001', limit=2))
@ -160,7 +131,7 @@ def test_mem_agg_timeout(agg):
'ait': 'timeout', 'bl': 'timeout', 'ia': 'timeout', 'rhiz': 'timeout'})
def test_handler_output_cdxj():
def test_handler_output_cdxj(self):
agg = GeventTimeoutAggregator(sources, timeout=5.0)
handler = IndexHandler(agg)
url = 'http://vvork.com/'
@ -176,7 +147,7 @@ com,vvork)/ 20131004231540 {"url": "http://vvork.com/", "mem_rel": "last memento
assert(errs == {})
def test_handler_output_json():
def test_handler_output_json(self):
agg = GeventTimeoutAggregator(sources, timeout=5.0)
handler = IndexHandler(agg)
url = 'http://vvork.com/'
@ -191,7 +162,7 @@ def test_handler_output_json():
assert(b''.join(res) == exp)
assert(errs == {})
def test_handler_output_link():
def test_handler_output_link(self):
agg = GeventTimeoutAggregator(sources, timeout=5.0)
handler = IndexHandler(agg)
url = 'http://vvork.com/'
@ -206,7 +177,7 @@ def test_handler_output_link():
assert(errs == {})
def test_handler_output_link_2():
def test_handler_output_link_2(self):
agg = GeventTimeoutAggregator(sources, timeout=5.0)
handler = IndexHandler(agg)
url = 'http://iana.org/'
@ -228,8 +199,7 @@ def test_handler_output_link_2():
assert(errs == exp_errs)
def test_handler_output_link_3():
def test_handler_output_link_3(self):
agg = GeventTimeoutAggregator(sources, timeout=5.0)
handler = IndexHandler(agg)
url = 'http://foo.bar.non-existent'
@ -247,7 +217,7 @@ def test_handler_output_link_3():
assert(errs == exp_errs)
def test_handler_output_text():
def test_handler_output_text(self):
agg = GeventTimeoutAggregator(sources, timeout=5.0)
handler = IndexHandler(agg)
url = 'http://vvork.com/'
@ -262,7 +232,7 @@ com,vvork)/ 20131004231540 http://vvork.com/ last memento http://wayback.archive
assert(errs == {})
def test_handler_list_sources():
def test_handler_list_sources(self):
agg = GeventTimeoutAggregator(sources, timeout=5.0)
handler = IndexHandler(agg)
headers, res, errs = handler(dict(mode='list_sources'))

View File

@ -2,6 +2,8 @@ import json
import os
import tempfile
import shutil
import yaml
import time
from multiprocessing import Process
@ -13,9 +15,11 @@ from wsgiref.simple_server import make_server
from pywb.webagg.aggregator import SimpleAggregator
from pywb.webagg.app import ResAggApp
from pywb.webagg.handlers import DefaultResourceHandler
from pywb.webagg.indexsource import LiveIndexSource
from pywb.webagg.indexsource import LiveIndexSource, MementoIndexSource
from pywb import get_test_dir
from pywb.utils.wbexception import NotFoundException
# ============================================================================
def to_json_list(cdxlist, fields=['timestamp', 'load_url', 'filename', 'source']):
@ -91,6 +95,44 @@ class TempDirTests(object):
shutil.rmtree(cls.root_dir)
# ============================================================================
class MementoOverrideTests(object):
link_header_data = None
orig_get_timegate_links = None
@classmethod
def setup_class(cls):
super(MementoOverrideTests, cls).setup_class()
# Load expected link headers
MementoOverrideTests.link_header_data = None
with open(to_path(get_test_dir() + '/text_content/link_headers.yaml')) as fh:
MementoOverrideTests.link_header_data = yaml.load(fh)
MementoOverrideTests.orig_get_timegate_links = MementoIndexSource.get_timegate_links
@classmethod
def mock_link_header(cls, test_name, load=False):
def mock_func(self, params, closest):
if load:
res = cls.orig_get_timegate_links(self, params, closest)
print(test_name + ': ')
print(" '{0}': '{1}'".format(self.timegate_url, res))
return res
try:
res = cls.link_header_data[test_name][self.timegate_url]
time.sleep(0.2)
except Exception as e:
print(e)
msg = self.timegate_url.format(url=params['url'])
raise NotFoundException(msg)
return res
return mock_func
# ============================================================================
class LiveServerTests(object):
@classmethod

View File

@ -28,4 +28,37 @@ agg_test_4:
'http://webenact.rhizome.org/vvork/{url}': '<http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/>; rel="memento"; datetime="Mon, 06 Oct 2014 18:43:57 GMT", <http://www.vvork.com/>; rel="original", <http://webenact.rhizome.org/vvork/timemap/*/http://www.vvork.com/>; rel="timemap"; type="application/link-format"'
select_mem_1:
'http://web.archive.org/web/{url}': '<http://vvork.com/>; rel="original", <http://web.archive.org/web/timemap/link/http://vvork.com/>; rel="timemap"; type="application/link-format", <http://web.archive.org/web/20020727091331/http://vvork.com/>; rel="first memento"; datetime="Sat, 27 Jul 2002 09:13:31 GMT", <http://web.archive.org/web/20140806161228/http://vvork.com/>; rel="prev memento"; datetime="Wed, 06 Aug 2014 16:12:28 GMT", <http://web.archive.org/web/20141018133107/http://vvork.com/>; rel="memento"; datetime="Sat, 18 Oct 2014 13:31:07 GMT", <http://web.archive.org/web/20141020161243/http://vvork.com/>; rel="next memento"; datetime="Mon, 20 Oct 2014 16:12:43 GMT", <http://web.archive.org/web/20161027001353/http://vvork.com/>; rel="last memento"; datetime="Thu, 27 Oct 2016 00:13:53 GMT"'
'http://webenact.rhizome.org/vvork/{url}': '<http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/>; rel="memento"; datetime="Mon, 06 Oct 2014 18:43:57 GMT", <http://www.vvork.com/>; rel="original", <http://webenact.rhizome.org/vvork/timemap/*/http://www.vvork.com/>; rel="timemap"; type="application/link-format"'
select_mem_2:
'http://webenact.rhizome.org/vvork/{url}': '<http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/>; rel="memento"; datetime="Mon, 06 Oct 2014 18:43:57 GMT", <http://www.vvork.com/>; rel="original", <http://webenact.rhizome.org/vvork/timemap/*/http://www.vvork.com/>; rel="timemap"; type="application/link-format"'
'http://web.archive.org/web/{url}': '<http://vvork.com/>; rel="original", <http://web.archive.org/web/timemap/link/http://vvork.com/>; rel="timemap"; type="application/link-format", <http://web.archive.org/web/20020727091331/http://vvork.com/>; rel="first memento"; datetime="Sat, 27 Jul 2002 09:13:31 GMT", <http://web.archive.org/web/20151105012627/http://vvork.com/>; rel="prev memento"; datetime="Thu, 05 Nov 2015 01:26:27 GMT", <http://web.archive.org/web/20160110134855/http://vvork.com/>; rel="memento"; datetime="Sun, 10 Jan 2016 13:48:55 GMT", <http://web.archive.org/web/20160112032847/http://vvork.com/>; rel="next memento"; datetime="Tue, 12 Jan 2016 03:28:47 GMT", <http://web.archive.org/web/20161027001353/http://vvork.com/>; rel="last memento"; datetime="Thu, 27 Oct 2016 00:13:53 GMT"'
select_live:
'http://web.archive.org/web/{url}': '<http://vvork.com/>; rel="original", <http://web.archive.org/web/timemap/link/http://vvork.com/>; rel="timemap"; type="application/link-format", <http://web.archive.org/web/20161027001353/http://vvork.com/>; rel="last memento"; datetime="Thu, 27 Oct 2016 00:13:53 GMT", <http://web.archive.org/web/20020727091331/http://vvork.com/>; rel="first memento"; datetime="Sat, 27 Jul 2002 09:13:31 GMT", <http://web.archive.org/web/20161011164443/http://vvork.com/>; rel="prev memento"; datetime="Tue, 11 Oct 2016 16:44:43 GMT"'
'http://webenact.rhizome.org/vvork/{url}': '<http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/>; rel="memento"; datetime="Mon, 06 Oct 2014 18:43:57 GMT", <http://www.vvork.com/>; rel="original", <http://webenact.rhizome.org/vvork/timemap/*/http://www.vvork.com/>; rel="timemap"; type="application/link-format"'
select_local:
'http://web.archive.org/web/{url}': '<http://iana.org/>; rel="original", <http://web.archive.org/web/timemap/link/http://iana.org/>; rel="timemap"; type="application/link-format", <http://web.archive.org/web/19971210061738/http://iana.org/>; rel="first memento"; datetime="Wed, 10 Dec 1997 06:17:38 GMT", <http://web.archive.org/web/20140123034755/http://iana.org/>; rel="prev memento"; datetime="Thu, 23 Jan 2014 03:47:55 GMT", <http://web.archive.org/web/20140126093743/http://iana.org/>; rel="memento"; datetime="Sun, 26 Jan 2014 09:37:43 GMT", <http://web.archive.org/web/20140129175203/http://iana.org/>; rel="next memento"; datetime="Wed, 29 Jan 2014 17:52:03 GMT", <http://web.archive.org/web/20161114190210/http://iana.org/>; rel="last memento"; datetime="Mon, 14 Nov 2016 19:02:10 GMT"'
select_local_postreq:
'http://web.archive.org/web/{url}': '<http://iana.org/>; rel="original", <http://web.archive.org/web/timemap/link/http://iana.org/>; rel="timemap"; type="application/link-format", <http://web.archive.org/web/19971210061738/http://iana.org/>; rel="first memento"; datetime="Wed, 10 Dec 1997 06:17:38 GMT", <http://web.archive.org/web/20140123034755/http://iana.org/>; rel="prev memento"; datetime="Thu, 23 Jan 2014 03:47:55 GMT", <http://web.archive.org/web/20140126093743/http://iana.org/>; rel="memento"; datetime="Sun, 26 Jan 2014 09:37:43 GMT", <http://web.archive.org/web/20140129175203/http://iana.org/>; rel="next memento"; datetime="Wed, 29 Jan 2014 17:52:03 GMT", <http://web.archive.org/web/20161114190210/http://iana.org/>; rel="last memento"; datetime="Mon, 14 Nov 2016 19:02:10 GMT"'
select_live_postreq:
'http://web.archive.org/web/{url}': '<http://httpbin.org/get?foo=bar>; rel="original", <http://web.archive.org/web/timemap/link/http://httpbin.org/get?foo=bar>; rel="timemap"; type="application/link-format", <http://web.archive.org/web/20151022094449/http://httpbin.org/get?foo=bar>; rel="first last memento"; datetime="Thu, 22 Oct 2015 09:44:49 GMT"'
select_local_revisit:
'http://web.archive.org/web/{url}': '<http://example.com>; rel="original", <http://web.archive.org/web/timemap/link/http://example.com>; rel="timemap"; type="application/link-format", <http://web.archive.org/web/20020120142510/http://example.com>; rel="first memento"; datetime="Sun, 20 Jan 2002 14:25:10 GMT", <http://web.archive.org/web/20140127153250/http://example.com>; rel="prev memento"; datetime="Mon, 27 Jan 2014 15:32:50 GMT", <http://web.archive.org/web/20140127182713/http://example.com>; rel="memento"; datetime="Mon, 27 Jan 2014 18:27:13 GMT", <http://web.archive.org/web/20140127201610/http://example.com>; rel="next memento"; datetime="Mon, 27 Jan 2014 20:16:10 GMT", <http://web.archive.org/web/20161115101437/http://example.com>; rel="last memento"; datetime="Tue, 15 Nov 2016 10:14:37 GMT"'