diff --git a/pywb/apps/frontendapp.py b/pywb/apps/frontendapp.py index 7fb02e99..0cf36269 100644 --- a/pywb/apps/frontendapp.py +++ b/pywb/apps/frontendapp.py @@ -248,6 +248,10 @@ class FrontEndApp(object): return self.serve_content(environ, coll, url, record=True) def serve_content(self, environ, coll='$root', url='', timemap_output='', record=False): + src_coll = '' + if ':' in coll: + coll, src_coll = coll.split(':', 1) + if not self.is_valid_coll(coll): self.raise_not_found(environ, 'No handler for "/{0}"'.format(coll)) @@ -265,6 +269,9 @@ class FrontEndApp(object): if timemap_output: metadata['output'] = timemap_output + if src_coll: + metadata['src_coll'] = src_coll + try: response = self.rewriterapp.render_content(wb_url_str, metadata, environ) except UpstreamException as ue: @@ -300,9 +307,6 @@ class FrontEndApp(object): return WbResponse.json_response(result) def is_valid_coll(self, coll): - #if coll == self.all_coll: - # return True - return (coll in self.warcserver.list_fixed_routes() or coll in self.warcserver.list_dynamic_routes()) diff --git a/pywb/apps/rewriterapp.py b/pywb/apps/rewriterapp.py index 68690b1a..e18dcd38 100644 --- a/pywb/apps/rewriterapp.py +++ b/pywb/apps/rewriterapp.py @@ -484,6 +484,10 @@ class RewriterApp(object): params['closest'] = closest params['matchType'] = 'exact' + src_coll = kwargs.get('src_coll') + if src_coll: + params['src_coll'] = src_coll + if wb_url.mod == 'vi_': params['content_type'] = self.VIDEO_INFO_CONTENT_TYPE diff --git a/pywb/warcserver/index/indexsource.py b/pywb/warcserver/index/indexsource.py index 66ca7216..811bf1df 100644 --- a/pywb/warcserver/index/indexsource.py +++ b/pywb/warcserver/index/indexsource.py @@ -143,9 +143,13 @@ class RemoteIndexSource(BaseIndexSource): if name: source_coll = params.get('param.' + name + '.src_coll', '') + if not source_coll: + source_coll = params.get('src_coll', '') + cdx[self.url_field] = self.replay_url.format(url=cdx['url'], timestamp=cdx['timestamp'], src_coll=source_coll) + def __repr__(self): return '{0}({1}, {2})'.format(self.__class__.__name__, self.api_url, diff --git a/tests/config_test.yaml b/tests/config_test.yaml index f83c67bf..235a5e28 100644 --- a/tests/config_test.yaml +++ b/tests/config_test.yaml @@ -31,6 +31,15 @@ collections: pywb-cdxj: index_paths: ./sample_archive/cdxj/ + + ait: + index: + type: cdx + api_url: https://wayback.archive-it.org/cdx?url={url}&closest={closest}&sort=closest&filter=filename:ARCHIVEIT-({src_coll})-.* + replay_url: http://wayback.archive-it.org/{src_coll}/{timestamp}id_/{url} + + + archive_paths: - ./invalid/path/to/ignore/ - ./sample_archive/warcs/ diff --git a/tests/test_integration.py b/tests/test_integration.py index a929a950..7b7c5156 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -300,6 +300,14 @@ class TestWbIntegration(BaseConfigTest): assert 'timestamp = "20140127171238"' in resp.text assert '/pywb/20140127171237{0}/http://www.iana.org/about/'.format(fmod) in resp.text + def test_replay_remote_ait(self, fmod): + resp = self.get('/ait:1068/2011{0}/http://www.iana.org/domains/example/', fmod) + self._assert_basic_html(resp) + + assert '"20120119230023"' in resp.text, resp.text + assert '

Example Domains

' in resp.text + assert 'new _WBWombat' in resp.text, resp.text + def test_latest_replay(self, fmod): fmod_slash = fmod + '/' if fmod else '' resp = self.get('/pywb/{0}http://example.com/', fmod_slash) @@ -483,7 +491,7 @@ class TestWbIntegration(BaseConfigTest): resp = self.testapp.get('/collinfo.json') assert resp.content_type == 'application/json' value = resp.json - assert len(value['fixed']) == 4 + assert len(value['fixed']) == 5 assert len(value['dynamic']) == 0 #def test_invalid_config(self):