mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 08:04:49 +01:00
webagg: Fix loading of url-lookup (url agnostic) revisits, ensure all params passed to cdx lookup, add tests for url-agnostic revisit lookup
This commit is contained in:
parent
20b161bf90
commit
c93d7ecafc
BIN
testdata/example-url-agnostic-orig.warc.gz
vendored
Normal file
BIN
testdata/example-url-agnostic-orig.warc.gz
vendored
Normal file
Binary file not shown.
BIN
testdata/example-url-agnostic-revisit.warc.gz
vendored
Normal file
BIN
testdata/example-url-agnostic-revisit.warc.gz
vendored
Normal file
Binary file not shown.
2
testdata/url-agnost-example.cdxj
vendored
Normal file
2
testdata/url-agnost-example.cdxj
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
com,example)/ 20130729195151 {"url": "http://test@example.com/", "mime": "warc/revisit", "digest": "B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A", "length": "591", "offset": "355", "filename": "example-url-agnostic-revisit.warc.gz"}
|
||||
org,iana,example)/ 20130702195402 {"url": "http://example.iana.org/", "mime": "text/html", "status": "200", "digest": "B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A", "length": "1001", "offset": "353", "filename": "example-url-agnostic-orig.warc.gz"}
|
@ -166,10 +166,6 @@ class WARCPathLoader(BaseLoader):
|
||||
|
||||
self.cdx_source = cdx_source
|
||||
|
||||
def cdx_index_source(self, *args, **kwargs):
|
||||
cdx_iter, errs = self.cdx_source(*args, **kwargs)
|
||||
return cdx_iter
|
||||
|
||||
def _make_resolver(self, path):
|
||||
if hasattr(path, '__call__'):
|
||||
return path
|
||||
@ -188,13 +184,26 @@ class WARCPathLoader(BaseLoader):
|
||||
return None
|
||||
|
||||
orig_source = cdx.get('source', '').split(':')[0]
|
||||
cdx._formatter = ParamFormatter(params, orig_source)
|
||||
formatter = ParamFormatter(params, orig_source)
|
||||
cdx._formatter = formatter
|
||||
|
||||
def local_index_query(local_params):
|
||||
for n, v in six.iteritems(params):
|
||||
if n.startswith('param.'):
|
||||
local_params[n] = v
|
||||
|
||||
cdx_iter, errs = self.cdx_source(local_params)
|
||||
for cdx in cdx_iter:
|
||||
cdx._formatter = formatter
|
||||
yield cdx
|
||||
|
||||
return cdx_iter
|
||||
|
||||
failed_files = []
|
||||
headers, payload = (self.resolve_loader.
|
||||
load_headers_and_payload(cdx,
|
||||
failed_files,
|
||||
self.cdx_index_source))
|
||||
local_index_query))
|
||||
|
||||
if cdx.get('status', '').startswith('3'):
|
||||
status_headers = self.headers_parser.parse(payload.stream)
|
||||
|
@ -63,6 +63,9 @@ class TestResAgg(FakeRedisTests, BaseTestClass):
|
||||
app.add_route('/empty', HandlerSeq([]))
|
||||
app.add_route('/invalid', DefaultResourceHandler([SimpleAggregator({'invalid': 'should not be a callable'})]))
|
||||
|
||||
url_agnost = SimpleAggregator({'url-agnost': FileIndexSource(to_path('testdata/url-agnost-example.cdxj'))})
|
||||
app.add_route('/urlagnost', DefaultResourceHandler(url_agnost, 'redis://localhost/2/test:{arg}:warc'))
|
||||
|
||||
cls.testapp = webtest.TestApp(app)
|
||||
|
||||
def _check_uri_date(self, resp, uri, dt):
|
||||
@ -85,6 +88,7 @@ class TestResAgg(FakeRedisTests, BaseTestClass):
|
||||
'/posttest', '/posttest/postreq',
|
||||
'/seq', '/seq/postreq',
|
||||
'/allredis', '/allredis/postreq',
|
||||
'/urlagnost', '/urlagnost/postreq',
|
||||
'/invalid', '/invalid/postreq'])
|
||||
|
||||
assert res['/fallback'] == {'modes': ['list_sources', 'index', 'resource']}
|
||||
@ -331,6 +335,18 @@ foo=bar&test=abc"""
|
||||
|
||||
assert resp.headers['WebAgg-Source-Coll'] == 'example'
|
||||
|
||||
def test_url_agnost(self):
|
||||
f = FakeStrictRedis.from_url('redis://localhost/2')
|
||||
f.hset('test:foo:warc', 'example-url-agnostic-revisit.warc.gz', './testdata/example-url-agnostic-revisit.warc.gz')
|
||||
f.hset('test:foo:warc', 'example-url-agnostic-orig.warc.gz', './testdata/example-url-agnostic-orig.warc.gz')
|
||||
|
||||
resp = self.testapp.get('/urlagnost/resource?url=http://example.com/¶m.arg=foo')
|
||||
|
||||
assert resp.status_int == 200
|
||||
assert resp.headers['Link'] == MementoUtils.make_link('http://test@example.com/', 'original')
|
||||
assert resp.headers['WebAgg-Source-Coll'] == 'url-agnost'
|
||||
assert resp.headers['Memento-Datetime'] == 'Mon, 29 Jul 2013 19:51:51 GMT'
|
||||
|
||||
def test_live_video_loader(self):
|
||||
params = {'url': 'http://www.youtube.com/v/BfBgWtAIbRc',
|
||||
'content_type': 'application/vnd.youtube-dl_formats+json'
|
||||
|
Loading…
x
Reference in New Issue
Block a user