mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
webagg: Fix loading of url-lookup (url agnostic) revisits, ensure all params passed to cdx lookup, add tests for url-agnostic revisit lookup
This commit is contained in:
parent
20b161bf90
commit
c93d7ecafc
BIN
testdata/example-url-agnostic-orig.warc.gz
vendored
Normal file
BIN
testdata/example-url-agnostic-orig.warc.gz
vendored
Normal file
Binary file not shown.
BIN
testdata/example-url-agnostic-revisit.warc.gz
vendored
Normal file
BIN
testdata/example-url-agnostic-revisit.warc.gz
vendored
Normal file
Binary file not shown.
2
testdata/url-agnost-example.cdxj
vendored
Normal file
2
testdata/url-agnost-example.cdxj
vendored
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
com,example)/ 20130729195151 {"url": "http://test@example.com/", "mime": "warc/revisit", "digest": "B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A", "length": "591", "offset": "355", "filename": "example-url-agnostic-revisit.warc.gz"}
|
||||||
|
org,iana,example)/ 20130702195402 {"url": "http://example.iana.org/", "mime": "text/html", "status": "200", "digest": "B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A", "length": "1001", "offset": "353", "filename": "example-url-agnostic-orig.warc.gz"}
|
@ -166,10 +166,6 @@ class WARCPathLoader(BaseLoader):
|
|||||||
|
|
||||||
self.cdx_source = cdx_source
|
self.cdx_source = cdx_source
|
||||||
|
|
||||||
def cdx_index_source(self, *args, **kwargs):
|
|
||||||
cdx_iter, errs = self.cdx_source(*args, **kwargs)
|
|
||||||
return cdx_iter
|
|
||||||
|
|
||||||
def _make_resolver(self, path):
|
def _make_resolver(self, path):
|
||||||
if hasattr(path, '__call__'):
|
if hasattr(path, '__call__'):
|
||||||
return path
|
return path
|
||||||
@ -188,13 +184,26 @@ class WARCPathLoader(BaseLoader):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
orig_source = cdx.get('source', '').split(':')[0]
|
orig_source = cdx.get('source', '').split(':')[0]
|
||||||
cdx._formatter = ParamFormatter(params, orig_source)
|
formatter = ParamFormatter(params, orig_source)
|
||||||
|
cdx._formatter = formatter
|
||||||
|
|
||||||
|
def local_index_query(local_params):
|
||||||
|
for n, v in six.iteritems(params):
|
||||||
|
if n.startswith('param.'):
|
||||||
|
local_params[n] = v
|
||||||
|
|
||||||
|
cdx_iter, errs = self.cdx_source(local_params)
|
||||||
|
for cdx in cdx_iter:
|
||||||
|
cdx._formatter = formatter
|
||||||
|
yield cdx
|
||||||
|
|
||||||
|
return cdx_iter
|
||||||
|
|
||||||
failed_files = []
|
failed_files = []
|
||||||
headers, payload = (self.resolve_loader.
|
headers, payload = (self.resolve_loader.
|
||||||
load_headers_and_payload(cdx,
|
load_headers_and_payload(cdx,
|
||||||
failed_files,
|
failed_files,
|
||||||
self.cdx_index_source))
|
local_index_query))
|
||||||
|
|
||||||
if cdx.get('status', '').startswith('3'):
|
if cdx.get('status', '').startswith('3'):
|
||||||
status_headers = self.headers_parser.parse(payload.stream)
|
status_headers = self.headers_parser.parse(payload.stream)
|
||||||
|
@ -63,6 +63,9 @@ class TestResAgg(FakeRedisTests, BaseTestClass):
|
|||||||
app.add_route('/empty', HandlerSeq([]))
|
app.add_route('/empty', HandlerSeq([]))
|
||||||
app.add_route('/invalid', DefaultResourceHandler([SimpleAggregator({'invalid': 'should not be a callable'})]))
|
app.add_route('/invalid', DefaultResourceHandler([SimpleAggregator({'invalid': 'should not be a callable'})]))
|
||||||
|
|
||||||
|
url_agnost = SimpleAggregator({'url-agnost': FileIndexSource(to_path('testdata/url-agnost-example.cdxj'))})
|
||||||
|
app.add_route('/urlagnost', DefaultResourceHandler(url_agnost, 'redis://localhost/2/test:{arg}:warc'))
|
||||||
|
|
||||||
cls.testapp = webtest.TestApp(app)
|
cls.testapp = webtest.TestApp(app)
|
||||||
|
|
||||||
def _check_uri_date(self, resp, uri, dt):
|
def _check_uri_date(self, resp, uri, dt):
|
||||||
@ -85,6 +88,7 @@ class TestResAgg(FakeRedisTests, BaseTestClass):
|
|||||||
'/posttest', '/posttest/postreq',
|
'/posttest', '/posttest/postreq',
|
||||||
'/seq', '/seq/postreq',
|
'/seq', '/seq/postreq',
|
||||||
'/allredis', '/allredis/postreq',
|
'/allredis', '/allredis/postreq',
|
||||||
|
'/urlagnost', '/urlagnost/postreq',
|
||||||
'/invalid', '/invalid/postreq'])
|
'/invalid', '/invalid/postreq'])
|
||||||
|
|
||||||
assert res['/fallback'] == {'modes': ['list_sources', 'index', 'resource']}
|
assert res['/fallback'] == {'modes': ['list_sources', 'index', 'resource']}
|
||||||
@ -331,6 +335,18 @@ foo=bar&test=abc"""
|
|||||||
|
|
||||||
assert resp.headers['WebAgg-Source-Coll'] == 'example'
|
assert resp.headers['WebAgg-Source-Coll'] == 'example'
|
||||||
|
|
||||||
|
def test_url_agnost(self):
|
||||||
|
f = FakeStrictRedis.from_url('redis://localhost/2')
|
||||||
|
f.hset('test:foo:warc', 'example-url-agnostic-revisit.warc.gz', './testdata/example-url-agnostic-revisit.warc.gz')
|
||||||
|
f.hset('test:foo:warc', 'example-url-agnostic-orig.warc.gz', './testdata/example-url-agnostic-orig.warc.gz')
|
||||||
|
|
||||||
|
resp = self.testapp.get('/urlagnost/resource?url=http://example.com/¶m.arg=foo')
|
||||||
|
|
||||||
|
assert resp.status_int == 200
|
||||||
|
assert resp.headers['Link'] == MementoUtils.make_link('http://test@example.com/', 'original')
|
||||||
|
assert resp.headers['WebAgg-Source-Coll'] == 'url-agnost'
|
||||||
|
assert resp.headers['Memento-Datetime'] == 'Mon, 29 Jul 2013 19:51:51 GMT'
|
||||||
|
|
||||||
def test_live_video_loader(self):
|
def test_live_video_loader(self):
|
||||||
params = {'url': 'http://www.youtube.com/v/BfBgWtAIbRc',
|
params = {'url': 'http://www.youtube.com/v/BfBgWtAIbRc',
|
||||||
'content_type': 'application/vnd.youtube-dl_formats+json'
|
'content_type': 'application/vnd.youtube-dl_formats+json'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user