mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
timemap format fix: fixes ukwa-pywb/pywb#37
- ensure timemap returns full url-m warcserver supports 'memento_format' param which, if present, specifies full format to use for memento links in timemap - memento tests: timemap tests include full url-m, test both framed and frameless timemap responses
This commit is contained in:
parent
3868f5b915
commit
0a9ad5c8dc
@ -635,6 +635,8 @@ class RewriterApp(object):
|
||||
params['output'] = kwargs.get('output', 'json')
|
||||
params['from'] = wb_url.timestamp
|
||||
params['to'] = wb_url.end_timestamp
|
||||
if 'memento_format' in kwargs:
|
||||
params['memento_format'] = kwargs['memento_format']
|
||||
|
||||
upstream_url = self.get_upstream_url(wb_url, kwargs, params)
|
||||
upstream_url = upstream_url.replace('/resource/postreq', '/index')
|
||||
@ -668,6 +670,7 @@ class RewriterApp(object):
|
||||
|
||||
def handle_timemap(self, wb_url, kwargs, full_prefix):
|
||||
output = kwargs.get('output')
|
||||
kwargs['memento_format'] = full_prefix + '{timestamp}' + self.replay_mod + '/{url}'
|
||||
res = self.do_query(wb_url, kwargs)
|
||||
return self.make_timemap(wb_url, res, full_prefix, output)
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
import re
|
||||
import six
|
||||
|
||||
from warcio.timeutils import timestamp_to_http_date
|
||||
from warcio.timeutils import timestamp_to_http_date, http_date_to_timestamp
|
||||
|
||||
from pywb.utils.wbexception import BadRequestException
|
||||
|
||||
@ -65,7 +65,7 @@ class MementoUtils(object):
|
||||
return results
|
||||
|
||||
@classmethod
|
||||
def make_timemap_memento_link(cls, cdx, datetime=None, rel='memento', end=',\n'):
|
||||
def make_timemap_memento_link(cls, cdx, datetime=None, rel='memento', end=',\n', memento_format=None):
|
||||
url = cdx.get('url')
|
||||
if not url:
|
||||
url = 'file://{0}:{1}:{2}'.format(cdx.get('filename'), cdx.get('offset'), cdx.get('length'))
|
||||
@ -73,21 +73,22 @@ class MementoUtils(object):
|
||||
if not datetime:
|
||||
datetime = timestamp_to_http_date(cdx['timestamp'])
|
||||
|
||||
return cls.make_memento_link(url, rel, datetime, cdx.get('source-coll')) + end
|
||||
return cls.make_memento_link(url, rel, datetime, cdx.get('source-coll'), memento_format) + end
|
||||
|
||||
@classmethod
|
||||
def make_timemap(cls, cdx_iter):
|
||||
def make_timemap(cls, cdx_iter, params):
|
||||
prev_cdx = None
|
||||
memento_format = params.get('memento_format')
|
||||
|
||||
for cdx in cdx_iter:
|
||||
if prev_cdx:
|
||||
yield cls.make_timemap_memento_link(prev_cdx)
|
||||
yield cls.make_timemap_memento_link(prev_cdx, memento_format=memento_format)
|
||||
|
||||
prev_cdx = cdx
|
||||
|
||||
# last memento link, if any
|
||||
if prev_cdx:
|
||||
yield cls.make_timemap_memento_link(prev_cdx, end='\n')
|
||||
yield cls.make_timemap_memento_link(prev_cdx, end='\n', memento_format=memento_format)
|
||||
|
||||
@classmethod
|
||||
def wrap_timemap_header(cls, url, timegate_url, timemap_url, timemap):
|
||||
@ -111,8 +112,14 @@ class MementoUtils(object):
|
||||
return '<{0}>; rel="{1}"'.format(url, type)
|
||||
|
||||
@classmethod
|
||||
def make_memento_link(cls, url, type, dt, coll=None):
|
||||
res = '<{0}>; rel="{1}"; datetime="{2}"'.format(url, type, dt)
|
||||
def make_memento_link(cls, url, type, dt, coll=None, memento_format=None):
|
||||
if memento_format:
|
||||
memento_format = memento_format.format(url=url,
|
||||
timestamp=http_date_to_timestamp(dt))
|
||||
else:
|
||||
memento_format = url
|
||||
|
||||
res = '<{0}>; rel="{1}"; datetime="{2}"'.format(memento_format, type, dt)
|
||||
if coll:
|
||||
res += '; collection="{0}"'.format(coll)
|
||||
|
||||
|
@ -16,21 +16,21 @@ logger = logging.getLogger('warcserver')
|
||||
|
||||
|
||||
#=============================================================================
|
||||
def to_cdxj(cdx_iter, fields):
|
||||
def to_cdxj(cdx_iter, fields, params):
|
||||
content_type = 'text/x-cdxj'
|
||||
return content_type, (cdx.to_cdxj(fields) for cdx in cdx_iter)
|
||||
|
||||
def to_json(cdx_iter, fields):
|
||||
def to_json(cdx_iter, fields, params):
|
||||
content_type = 'text/x-ndjson'
|
||||
return content_type, (cdx.to_json(fields) for cdx in cdx_iter)
|
||||
|
||||
def to_text(cdx_iter, fields):
|
||||
def to_text(cdx_iter, fields, params):
|
||||
content_type = 'text/plain'
|
||||
return content_type, (cdx.to_text(fields) for cdx in cdx_iter)
|
||||
|
||||
def to_link(cdx_iter, fields):
|
||||
def to_link(cdx_iter, fields, params):
|
||||
content_type = 'application/link-format'
|
||||
return content_type, MementoUtils.make_timemap(cdx_iter)
|
||||
return content_type, MementoUtils.make_timemap(cdx_iter, params)
|
||||
|
||||
|
||||
#=============================================================================
|
||||
@ -93,7 +93,7 @@ class IndexHandler(object):
|
||||
if not cdx_iter:
|
||||
return None, None, errs
|
||||
|
||||
content_type, res = handler(cdx_iter, fields)
|
||||
content_type, res = handler(cdx_iter, fields, params)
|
||||
out_headers = {'Content-Type': content_type}
|
||||
|
||||
def check_str(lines):
|
||||
|
@ -7,6 +7,10 @@ VARY = 'Vary'
|
||||
LINK_FORMAT = 'application/link-format'
|
||||
|
||||
class MementoMixin(object):
|
||||
def _timemap_get(self, url, fmod=True, **kwargs):
|
||||
app = self.testapp if fmod else self.testapp_non_frame
|
||||
return app.get(url, extra_environ={'REQUEST_URI': url}, **kwargs)
|
||||
|
||||
def get_links(self, resp):
|
||||
return list(map(lambda x: x.strip(), re.split(', (?![0-9])', resp.headers[LINK])))
|
||||
|
||||
|
@ -11,9 +11,6 @@ class TestMemento(MementoMixin, BaseConfigTest):
|
||||
def setup_class(cls):
|
||||
super(TestMemento, cls).setup_class('config_test.yaml')
|
||||
|
||||
def _timemap_get(self, url, **kwargs):
|
||||
return self.testapp.get(url, extra_environ={'REQUEST_URI': url}, **kwargs)
|
||||
|
||||
def _assert_memento(self, resp, url, ts, fmod, dt=''):
|
||||
dt = dt or timestamp_to_http_date(ts)
|
||||
|
||||
@ -119,12 +116,12 @@ class TestMemento(MementoMixin, BaseConfigTest):
|
||||
|
||||
self._assert_memento(resp, 'http://www.iana.org/domains/example', '20140128051539', fmod)
|
||||
|
||||
def test_timemap(self):
|
||||
def test_timemap(self, fmod):
|
||||
"""
|
||||
Test application/link-format timemap
|
||||
"""
|
||||
|
||||
resp = self._timemap_get('/pywb/timemap/link/http://example.com?example=1')
|
||||
resp = self._timemap_get('/pywb/timemap/link/http://example.com?example=1', fmod)
|
||||
assert resp.status_int == 200
|
||||
assert resp.content_type == LINK_FORMAT
|
||||
|
||||
@ -134,17 +131,18 @@ class TestMemento(MementoMixin, BaseConfigTest):
|
||||
<http://localhost:80/pywb/timemap/link/http://example.com?example=1>; rel="self"; type="application/link-format"; from="Fri, 03 Jan 2014 03:03:21 GMT",
|
||||
<http://localhost:80/pywb/http://example.com?example=1>; rel="timegate",
|
||||
<http://example.com?example=1>; rel="original",
|
||||
<http://example.com?example=1>; rel="memento"; datetime="Fri, 03 Jan 2014 03:03:21 GMT"; collection="pywb",
|
||||
<http://example.com?example=1>; rel="memento"; datetime="Fri, 03 Jan 2014 03:03:41 GMT"; collection="pywb"
|
||||
"""
|
||||
<http://localhost:80/pywb/20140103030321{0}/http://example.com?example=1>; rel="memento"; datetime="Fri, 03 Jan 2014 03:03:21 GMT"; collection="pywb",
|
||||
<http://localhost:80/pywb/20140103030341{0}/http://example.com?example=1>; rel="memento"; datetime="Fri, 03 Jan 2014 03:03:41 GMT"; collection="pywb"
|
||||
""".format(fmod)
|
||||
|
||||
assert exp == resp.text
|
||||
|
||||
def test_timemap_cdxj(self):
|
||||
def test_timemap_cdxj(self, fmod):
|
||||
"""
|
||||
Test test/x-cdxj timemap
|
||||
"""
|
||||
|
||||
resp = self._timemap_get('/pywb/timemap/cdxj/http://example.com?example=1')
|
||||
resp = self._timemap_get('/pywb/timemap/cdxj/http://example.com?example=1', fmod)
|
||||
assert resp.status_int == 200
|
||||
assert resp.content_type == 'text/x-cdxj'
|
||||
|
||||
@ -156,12 +154,12 @@ com,example)/?example=1 20140103030341 {"url": "http://example.com?example=1", "
|
||||
"""
|
||||
assert exp == resp.text
|
||||
|
||||
def test_timemap_2(self):
|
||||
def test_timemap_2(self, fmod):
|
||||
"""
|
||||
Test application/link-format timemap total count
|
||||
"""
|
||||
|
||||
resp = self._timemap_get('/pywb/timemap/link/http://example.com')
|
||||
resp = self._timemap_get('/pywb/timemap/link/http://example.com', fmod)
|
||||
assert resp.status_int == 200
|
||||
assert resp.content_type == LINK_FORMAT
|
||||
|
||||
@ -192,9 +190,6 @@ class TestMementoRedirectClassic(MementoMixin, BaseConfigTest):
|
||||
def setup_class(cls):
|
||||
super(TestMementoRedirectClassic, cls).setup_class('config_test_redirect_classic.yaml')
|
||||
|
||||
def _timemap_get(self, url, **kwargs):
|
||||
return self.testapp.get(url, extra_environ={'REQUEST_URI': url}, **kwargs)
|
||||
|
||||
def test_memento_top_frame_timegate(self, fmod):
|
||||
resp = self.testapp.get('/pywb/http://www.iana.org/')
|
||||
assert resp.status_code == 307
|
||||
@ -252,12 +247,12 @@ class TestMementoRedirectClassic(MementoMixin, BaseConfigTest):
|
||||
assert '"20140126200624"' in resp.text
|
||||
assert '"http://www.iana.org/"' in resp.text, resp.text
|
||||
|
||||
def test_timemap(self):
|
||||
def test_timemap(self, fmod):
|
||||
"""
|
||||
Test application/link-format timemap
|
||||
"""
|
||||
|
||||
resp = self._timemap_get('/pywb/timemap/link/http://example.com?example=1')
|
||||
resp = self._timemap_get('/pywb/timemap/link/http://example.com?example=1', fmod)
|
||||
assert resp.status_int == 200
|
||||
assert resp.content_type == LINK_FORMAT
|
||||
|
||||
@ -267,9 +262,10 @@ class TestMementoRedirectClassic(MementoMixin, BaseConfigTest):
|
||||
<http://localhost:80/pywb/timemap/link/http://example.com?example=1>; rel="self"; type="application/link-format"; from="Fri, 03 Jan 2014 03:03:21 GMT",
|
||||
<http://localhost:80/pywb/http://example.com?example=1>; rel="timegate",
|
||||
<http://example.com?example=1>; rel="original",
|
||||
<http://example.com?example=1>; rel="memento"; datetime="Fri, 03 Jan 2014 03:03:21 GMT"; collection="pywb",
|
||||
<http://example.com?example=1>; rel="memento"; datetime="Fri, 03 Jan 2014 03:03:41 GMT"; collection="pywb"
|
||||
"""
|
||||
<http://localhost:80/pywb/20140103030321{0}/http://example.com?example=1>; rel="memento"; datetime="Fri, 03 Jan 2014 03:03:21 GMT"; collection="pywb",
|
||||
<http://localhost:80/pywb/20140103030341{0}/http://example.com?example=1>; rel="memento"; datetime="Fri, 03 Jan 2014 03:03:41 GMT"; collection="pywb"
|
||||
""".format(fmod)
|
||||
|
||||
assert exp == resp.text
|
||||
|
||||
def test_memento_not_time_gate(self, fmod):
|
||||
|
Loading…
x
Reference in New Issue
Block a user