mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
custom source collection support: support src collections, eg, /coll:src_coll/ to allow access to a specific source collection, for use with remote index and memento source
This commit is contained in:
parent
970d0199c7
commit
8071739876
@ -248,6 +248,10 @@ class FrontEndApp(object):
|
|||||||
return self.serve_content(environ, coll, url, record=True)
|
return self.serve_content(environ, coll, url, record=True)
|
||||||
|
|
||||||
def serve_content(self, environ, coll='$root', url='', timemap_output='', record=False):
|
def serve_content(self, environ, coll='$root', url='', timemap_output='', record=False):
|
||||||
|
src_coll = ''
|
||||||
|
if ':' in coll:
|
||||||
|
coll, src_coll = coll.split(':', 1)
|
||||||
|
|
||||||
if not self.is_valid_coll(coll):
|
if not self.is_valid_coll(coll):
|
||||||
self.raise_not_found(environ, 'No handler for "/{0}"'.format(coll))
|
self.raise_not_found(environ, 'No handler for "/{0}"'.format(coll))
|
||||||
|
|
||||||
@ -265,6 +269,9 @@ class FrontEndApp(object):
|
|||||||
if timemap_output:
|
if timemap_output:
|
||||||
metadata['output'] = timemap_output
|
metadata['output'] = timemap_output
|
||||||
|
|
||||||
|
if src_coll:
|
||||||
|
metadata['src_coll'] = src_coll
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = self.rewriterapp.render_content(wb_url_str, metadata, environ)
|
response = self.rewriterapp.render_content(wb_url_str, metadata, environ)
|
||||||
except UpstreamException as ue:
|
except UpstreamException as ue:
|
||||||
@ -300,9 +307,6 @@ class FrontEndApp(object):
|
|||||||
return WbResponse.json_response(result)
|
return WbResponse.json_response(result)
|
||||||
|
|
||||||
def is_valid_coll(self, coll):
|
def is_valid_coll(self, coll):
|
||||||
#if coll == self.all_coll:
|
|
||||||
# return True
|
|
||||||
|
|
||||||
return (coll in self.warcserver.list_fixed_routes() or
|
return (coll in self.warcserver.list_fixed_routes() or
|
||||||
coll in self.warcserver.list_dynamic_routes())
|
coll in self.warcserver.list_dynamic_routes())
|
||||||
|
|
||||||
|
@ -484,6 +484,10 @@ class RewriterApp(object):
|
|||||||
params['closest'] = closest
|
params['closest'] = closest
|
||||||
params['matchType'] = 'exact'
|
params['matchType'] = 'exact'
|
||||||
|
|
||||||
|
src_coll = kwargs.get('src_coll')
|
||||||
|
if src_coll:
|
||||||
|
params['src_coll'] = src_coll
|
||||||
|
|
||||||
if wb_url.mod == 'vi_':
|
if wb_url.mod == 'vi_':
|
||||||
params['content_type'] = self.VIDEO_INFO_CONTENT_TYPE
|
params['content_type'] = self.VIDEO_INFO_CONTENT_TYPE
|
||||||
|
|
||||||
|
@ -143,9 +143,13 @@ class RemoteIndexSource(BaseIndexSource):
|
|||||||
if name:
|
if name:
|
||||||
source_coll = params.get('param.' + name + '.src_coll', '')
|
source_coll = params.get('param.' + name + '.src_coll', '')
|
||||||
|
|
||||||
|
if not source_coll:
|
||||||
|
source_coll = params.get('src_coll', '')
|
||||||
|
|
||||||
cdx[self.url_field] = self.replay_url.format(url=cdx['url'],
|
cdx[self.url_field] = self.replay_url.format(url=cdx['url'],
|
||||||
timestamp=cdx['timestamp'],
|
timestamp=cdx['timestamp'],
|
||||||
src_coll=source_coll)
|
src_coll=source_coll)
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return '{0}({1}, {2})'.format(self.__class__.__name__,
|
return '{0}({1}, {2})'.format(self.__class__.__name__,
|
||||||
self.api_url,
|
self.api_url,
|
||||||
|
@ -31,6 +31,15 @@ collections:
|
|||||||
pywb-cdxj:
|
pywb-cdxj:
|
||||||
index_paths: ./sample_archive/cdxj/
|
index_paths: ./sample_archive/cdxj/
|
||||||
|
|
||||||
|
|
||||||
|
ait:
|
||||||
|
index:
|
||||||
|
type: cdx
|
||||||
|
api_url: https://wayback.archive-it.org/cdx?url={url}&closest={closest}&sort=closest&filter=filename:ARCHIVEIT-({src_coll})-.*
|
||||||
|
replay_url: http://wayback.archive-it.org/{src_coll}/{timestamp}id_/{url}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
archive_paths:
|
archive_paths:
|
||||||
- ./invalid/path/to/ignore/
|
- ./invalid/path/to/ignore/
|
||||||
- ./sample_archive/warcs/
|
- ./sample_archive/warcs/
|
||||||
|
@ -300,6 +300,14 @@ class TestWbIntegration(BaseConfigTest):
|
|||||||
assert 'timestamp = "20140127171238"' in resp.text
|
assert 'timestamp = "20140127171238"' in resp.text
|
||||||
assert '/pywb/20140127171237{0}/http://www.iana.org/about/'.format(fmod) in resp.text
|
assert '/pywb/20140127171237{0}/http://www.iana.org/about/'.format(fmod) in resp.text
|
||||||
|
|
||||||
|
def test_replay_remote_ait(self, fmod):
|
||||||
|
resp = self.get('/ait:1068/2011{0}/http://www.iana.org/domains/example/', fmod)
|
||||||
|
self._assert_basic_html(resp)
|
||||||
|
|
||||||
|
assert '"20120119230023"' in resp.text, resp.text
|
||||||
|
assert '<h1>Example Domains</h1>' in resp.text
|
||||||
|
assert 'new _WBWombat' in resp.text, resp.text
|
||||||
|
|
||||||
def test_latest_replay(self, fmod):
|
def test_latest_replay(self, fmod):
|
||||||
fmod_slash = fmod + '/' if fmod else ''
|
fmod_slash = fmod + '/' if fmod else ''
|
||||||
resp = self.get('/pywb/{0}http://example.com/', fmod_slash)
|
resp = self.get('/pywb/{0}http://example.com/', fmod_slash)
|
||||||
@ -483,7 +491,7 @@ class TestWbIntegration(BaseConfigTest):
|
|||||||
resp = self.testapp.get('/collinfo.json')
|
resp = self.testapp.get('/collinfo.json')
|
||||||
assert resp.content_type == 'application/json'
|
assert resp.content_type == 'application/json'
|
||||||
value = resp.json
|
value = resp.json
|
||||||
assert len(value['fixed']) == 4
|
assert len(value['fixed']) == 5
|
||||||
assert len(value['dynamic']) == 0
|
assert len(value['dynamic']) == 0
|
||||||
|
|
||||||
#def test_invalid_config(self):
|
#def test_invalid_config(self):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user