diff --git a/testdata/example-url-agnostic-orig.warc.gz b/testdata/example-url-agnostic-orig.warc.gz new file mode 100644 index 00000000..98700373 Binary files /dev/null and b/testdata/example-url-agnostic-orig.warc.gz differ diff --git a/testdata/example-url-agnostic-revisit.warc.gz b/testdata/example-url-agnostic-revisit.warc.gz new file mode 100644 index 00000000..3770ed0a Binary files /dev/null and b/testdata/example-url-agnostic-revisit.warc.gz differ diff --git a/testdata/url-agnost-example.cdxj b/testdata/url-agnost-example.cdxj new file mode 100644 index 00000000..6eebd255 --- /dev/null +++ b/testdata/url-agnost-example.cdxj @@ -0,0 +1,2 @@ +com,example)/ 20130729195151 {"url": "http://test@example.com/", "mime": "warc/revisit", "digest": "B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A", "length": "591", "offset": "355", "filename": "example-url-agnostic-revisit.warc.gz"} +org,iana,example)/ 20130702195402 {"url": "http://example.iana.org/", "mime": "text/html", "status": "200", "digest": "B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A", "length": "1001", "offset": "353", "filename": "example-url-agnostic-orig.warc.gz"} diff --git a/webagg/responseloader.py b/webagg/responseloader.py index 0afe6442..ed3c72ba 100644 --- a/webagg/responseloader.py +++ b/webagg/responseloader.py @@ -166,10 +166,6 @@ class WARCPathLoader(BaseLoader): self.cdx_source = cdx_source - def cdx_index_source(self, *args, **kwargs): - cdx_iter, errs = self.cdx_source(*args, **kwargs) - return cdx_iter - def _make_resolver(self, path): if hasattr(path, '__call__'): return path @@ -188,13 +184,26 @@ class WARCPathLoader(BaseLoader): return None orig_source = cdx.get('source', '').split(':')[0] - cdx._formatter = ParamFormatter(params, orig_source) + formatter = ParamFormatter(params, orig_source) + cdx._formatter = formatter + + def local_index_query(local_params): + for n, v in six.iteritems(params): + if n.startswith('param.'): + local_params[n] = v + + cdx_iter, errs = self.cdx_source(local_params) + for cdx in cdx_iter: + cdx._formatter = formatter + yield cdx + + return cdx_iter failed_files = [] headers, payload = (self.resolve_loader. load_headers_and_payload(cdx, failed_files, - self.cdx_index_source)) + local_index_query)) if cdx.get('status', '').startswith('3'): status_headers = self.headers_parser.parse(payload.stream) diff --git a/webagg/test/test_handlers.py b/webagg/test/test_handlers.py index 70b239e2..6fb5c8d8 100644 --- a/webagg/test/test_handlers.py +++ b/webagg/test/test_handlers.py @@ -63,6 +63,9 @@ class TestResAgg(FakeRedisTests, BaseTestClass): app.add_route('/empty', HandlerSeq([])) app.add_route('/invalid', DefaultResourceHandler([SimpleAggregator({'invalid': 'should not be a callable'})])) + url_agnost = SimpleAggregator({'url-agnost': FileIndexSource(to_path('testdata/url-agnost-example.cdxj'))}) + app.add_route('/urlagnost', DefaultResourceHandler(url_agnost, 'redis://localhost/2/test:{arg}:warc')) + cls.testapp = webtest.TestApp(app) def _check_uri_date(self, resp, uri, dt): @@ -85,6 +88,7 @@ class TestResAgg(FakeRedisTests, BaseTestClass): '/posttest', '/posttest/postreq', '/seq', '/seq/postreq', '/allredis', '/allredis/postreq', + '/urlagnost', '/urlagnost/postreq', '/invalid', '/invalid/postreq']) assert res['/fallback'] == {'modes': ['list_sources', 'index', 'resource']} @@ -331,6 +335,18 @@ foo=bar&test=abc""" assert resp.headers['WebAgg-Source-Coll'] == 'example' + def test_url_agnost(self): + f = FakeStrictRedis.from_url('redis://localhost/2') + f.hset('test:foo:warc', 'example-url-agnostic-revisit.warc.gz', './testdata/example-url-agnostic-revisit.warc.gz') + f.hset('test:foo:warc', 'example-url-agnostic-orig.warc.gz', './testdata/example-url-agnostic-orig.warc.gz') + + resp = self.testapp.get('/urlagnost/resource?url=http://example.com/¶m.arg=foo') + + assert resp.status_int == 200 + assert resp.headers['Link'] == MementoUtils.make_link('http://test@example.com/', 'original') + assert resp.headers['WebAgg-Source-Coll'] == 'url-agnost' + assert resp.headers['Memento-Datetime'] == 'Mon, 29 Jul 2013 19:51:51 GMT' + def test_live_video_loader(self): params = {'url': 'http://www.youtube.com/v/BfBgWtAIbRc', 'content_type': 'application/vnd.youtube-dl_formats+json'