mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
timemap format fix: fixes ukwa-pywb/pywb#37
- ensure timemap returns full url-m warcserver supports 'memento_format' param which, if present, specifies full format to use for memento links in timemap - memento tests: timemap tests include full url-m, test both framed and frameless timemap responses
This commit is contained in:
parent
3868f5b915
commit
0a9ad5c8dc
@ -635,6 +635,8 @@ class RewriterApp(object):
|
|||||||
params['output'] = kwargs.get('output', 'json')
|
params['output'] = kwargs.get('output', 'json')
|
||||||
params['from'] = wb_url.timestamp
|
params['from'] = wb_url.timestamp
|
||||||
params['to'] = wb_url.end_timestamp
|
params['to'] = wb_url.end_timestamp
|
||||||
|
if 'memento_format' in kwargs:
|
||||||
|
params['memento_format'] = kwargs['memento_format']
|
||||||
|
|
||||||
upstream_url = self.get_upstream_url(wb_url, kwargs, params)
|
upstream_url = self.get_upstream_url(wb_url, kwargs, params)
|
||||||
upstream_url = upstream_url.replace('/resource/postreq', '/index')
|
upstream_url = upstream_url.replace('/resource/postreq', '/index')
|
||||||
@ -668,6 +670,7 @@ class RewriterApp(object):
|
|||||||
|
|
||||||
def handle_timemap(self, wb_url, kwargs, full_prefix):
|
def handle_timemap(self, wb_url, kwargs, full_prefix):
|
||||||
output = kwargs.get('output')
|
output = kwargs.get('output')
|
||||||
|
kwargs['memento_format'] = full_prefix + '{timestamp}' + self.replay_mod + '/{url}'
|
||||||
res = self.do_query(wb_url, kwargs)
|
res = self.do_query(wb_url, kwargs)
|
||||||
return self.make_timemap(wb_url, res, full_prefix, output)
|
return self.make_timemap(wb_url, res, full_prefix, output)
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
import re
|
import re
|
||||||
import six
|
import six
|
||||||
|
|
||||||
from warcio.timeutils import timestamp_to_http_date
|
from warcio.timeutils import timestamp_to_http_date, http_date_to_timestamp
|
||||||
|
|
||||||
from pywb.utils.wbexception import BadRequestException
|
from pywb.utils.wbexception import BadRequestException
|
||||||
|
|
||||||
@ -65,7 +65,7 @@ class MementoUtils(object):
|
|||||||
return results
|
return results
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def make_timemap_memento_link(cls, cdx, datetime=None, rel='memento', end=',\n'):
|
def make_timemap_memento_link(cls, cdx, datetime=None, rel='memento', end=',\n', memento_format=None):
|
||||||
url = cdx.get('url')
|
url = cdx.get('url')
|
||||||
if not url:
|
if not url:
|
||||||
url = 'file://{0}:{1}:{2}'.format(cdx.get('filename'), cdx.get('offset'), cdx.get('length'))
|
url = 'file://{0}:{1}:{2}'.format(cdx.get('filename'), cdx.get('offset'), cdx.get('length'))
|
||||||
@ -73,21 +73,22 @@ class MementoUtils(object):
|
|||||||
if not datetime:
|
if not datetime:
|
||||||
datetime = timestamp_to_http_date(cdx['timestamp'])
|
datetime = timestamp_to_http_date(cdx['timestamp'])
|
||||||
|
|
||||||
return cls.make_memento_link(url, rel, datetime, cdx.get('source-coll')) + end
|
return cls.make_memento_link(url, rel, datetime, cdx.get('source-coll'), memento_format) + end
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def make_timemap(cls, cdx_iter):
|
def make_timemap(cls, cdx_iter, params):
|
||||||
prev_cdx = None
|
prev_cdx = None
|
||||||
|
memento_format = params.get('memento_format')
|
||||||
|
|
||||||
for cdx in cdx_iter:
|
for cdx in cdx_iter:
|
||||||
if prev_cdx:
|
if prev_cdx:
|
||||||
yield cls.make_timemap_memento_link(prev_cdx)
|
yield cls.make_timemap_memento_link(prev_cdx, memento_format=memento_format)
|
||||||
|
|
||||||
prev_cdx = cdx
|
prev_cdx = cdx
|
||||||
|
|
||||||
# last memento link, if any
|
# last memento link, if any
|
||||||
if prev_cdx:
|
if prev_cdx:
|
||||||
yield cls.make_timemap_memento_link(prev_cdx, end='\n')
|
yield cls.make_timemap_memento_link(prev_cdx, end='\n', memento_format=memento_format)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def wrap_timemap_header(cls, url, timegate_url, timemap_url, timemap):
|
def wrap_timemap_header(cls, url, timegate_url, timemap_url, timemap):
|
||||||
@ -111,8 +112,14 @@ class MementoUtils(object):
|
|||||||
return '<{0}>; rel="{1}"'.format(url, type)
|
return '<{0}>; rel="{1}"'.format(url, type)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def make_memento_link(cls, url, type, dt, coll=None):
|
def make_memento_link(cls, url, type, dt, coll=None, memento_format=None):
|
||||||
res = '<{0}>; rel="{1}"; datetime="{2}"'.format(url, type, dt)
|
if memento_format:
|
||||||
|
memento_format = memento_format.format(url=url,
|
||||||
|
timestamp=http_date_to_timestamp(dt))
|
||||||
|
else:
|
||||||
|
memento_format = url
|
||||||
|
|
||||||
|
res = '<{0}>; rel="{1}"; datetime="{2}"'.format(memento_format, type, dt)
|
||||||
if coll:
|
if coll:
|
||||||
res += '; collection="{0}"'.format(coll)
|
res += '; collection="{0}"'.format(coll)
|
||||||
|
|
||||||
|
@ -16,21 +16,21 @@ logger = logging.getLogger('warcserver')
|
|||||||
|
|
||||||
|
|
||||||
#=============================================================================
|
#=============================================================================
|
||||||
def to_cdxj(cdx_iter, fields):
|
def to_cdxj(cdx_iter, fields, params):
|
||||||
content_type = 'text/x-cdxj'
|
content_type = 'text/x-cdxj'
|
||||||
return content_type, (cdx.to_cdxj(fields) for cdx in cdx_iter)
|
return content_type, (cdx.to_cdxj(fields) for cdx in cdx_iter)
|
||||||
|
|
||||||
def to_json(cdx_iter, fields):
|
def to_json(cdx_iter, fields, params):
|
||||||
content_type = 'text/x-ndjson'
|
content_type = 'text/x-ndjson'
|
||||||
return content_type, (cdx.to_json(fields) for cdx in cdx_iter)
|
return content_type, (cdx.to_json(fields) for cdx in cdx_iter)
|
||||||
|
|
||||||
def to_text(cdx_iter, fields):
|
def to_text(cdx_iter, fields, params):
|
||||||
content_type = 'text/plain'
|
content_type = 'text/plain'
|
||||||
return content_type, (cdx.to_text(fields) for cdx in cdx_iter)
|
return content_type, (cdx.to_text(fields) for cdx in cdx_iter)
|
||||||
|
|
||||||
def to_link(cdx_iter, fields):
|
def to_link(cdx_iter, fields, params):
|
||||||
content_type = 'application/link-format'
|
content_type = 'application/link-format'
|
||||||
return content_type, MementoUtils.make_timemap(cdx_iter)
|
return content_type, MementoUtils.make_timemap(cdx_iter, params)
|
||||||
|
|
||||||
|
|
||||||
#=============================================================================
|
#=============================================================================
|
||||||
@ -93,7 +93,7 @@ class IndexHandler(object):
|
|||||||
if not cdx_iter:
|
if not cdx_iter:
|
||||||
return None, None, errs
|
return None, None, errs
|
||||||
|
|
||||||
content_type, res = handler(cdx_iter, fields)
|
content_type, res = handler(cdx_iter, fields, params)
|
||||||
out_headers = {'Content-Type': content_type}
|
out_headers = {'Content-Type': content_type}
|
||||||
|
|
||||||
def check_str(lines):
|
def check_str(lines):
|
||||||
|
@ -7,6 +7,10 @@ VARY = 'Vary'
|
|||||||
LINK_FORMAT = 'application/link-format'
|
LINK_FORMAT = 'application/link-format'
|
||||||
|
|
||||||
class MementoMixin(object):
|
class MementoMixin(object):
|
||||||
|
def _timemap_get(self, url, fmod=True, **kwargs):
|
||||||
|
app = self.testapp if fmod else self.testapp_non_frame
|
||||||
|
return app.get(url, extra_environ={'REQUEST_URI': url}, **kwargs)
|
||||||
|
|
||||||
def get_links(self, resp):
|
def get_links(self, resp):
|
||||||
return list(map(lambda x: x.strip(), re.split(', (?![0-9])', resp.headers[LINK])))
|
return list(map(lambda x: x.strip(), re.split(', (?![0-9])', resp.headers[LINK])))
|
||||||
|
|
||||||
|
@ -11,9 +11,6 @@ class TestMemento(MementoMixin, BaseConfigTest):
|
|||||||
def setup_class(cls):
|
def setup_class(cls):
|
||||||
super(TestMemento, cls).setup_class('config_test.yaml')
|
super(TestMemento, cls).setup_class('config_test.yaml')
|
||||||
|
|
||||||
def _timemap_get(self, url, **kwargs):
|
|
||||||
return self.testapp.get(url, extra_environ={'REQUEST_URI': url}, **kwargs)
|
|
||||||
|
|
||||||
def _assert_memento(self, resp, url, ts, fmod, dt=''):
|
def _assert_memento(self, resp, url, ts, fmod, dt=''):
|
||||||
dt = dt or timestamp_to_http_date(ts)
|
dt = dt or timestamp_to_http_date(ts)
|
||||||
|
|
||||||
@ -119,12 +116,12 @@ class TestMemento(MementoMixin, BaseConfigTest):
|
|||||||
|
|
||||||
self._assert_memento(resp, 'http://www.iana.org/domains/example', '20140128051539', fmod)
|
self._assert_memento(resp, 'http://www.iana.org/domains/example', '20140128051539', fmod)
|
||||||
|
|
||||||
def test_timemap(self):
|
def test_timemap(self, fmod):
|
||||||
"""
|
"""
|
||||||
Test application/link-format timemap
|
Test application/link-format timemap
|
||||||
"""
|
"""
|
||||||
|
|
||||||
resp = self._timemap_get('/pywb/timemap/link/http://example.com?example=1')
|
resp = self._timemap_get('/pywb/timemap/link/http://example.com?example=1', fmod)
|
||||||
assert resp.status_int == 200
|
assert resp.status_int == 200
|
||||||
assert resp.content_type == LINK_FORMAT
|
assert resp.content_type == LINK_FORMAT
|
||||||
|
|
||||||
@ -134,17 +131,18 @@ class TestMemento(MementoMixin, BaseConfigTest):
|
|||||||
<http://localhost:80/pywb/timemap/link/http://example.com?example=1>; rel="self"; type="application/link-format"; from="Fri, 03 Jan 2014 03:03:21 GMT",
|
<http://localhost:80/pywb/timemap/link/http://example.com?example=1>; rel="self"; type="application/link-format"; from="Fri, 03 Jan 2014 03:03:21 GMT",
|
||||||
<http://localhost:80/pywb/http://example.com?example=1>; rel="timegate",
|
<http://localhost:80/pywb/http://example.com?example=1>; rel="timegate",
|
||||||
<http://example.com?example=1>; rel="original",
|
<http://example.com?example=1>; rel="original",
|
||||||
<http://example.com?example=1>; rel="memento"; datetime="Fri, 03 Jan 2014 03:03:21 GMT"; collection="pywb",
|
<http://localhost:80/pywb/20140103030321{0}/http://example.com?example=1>; rel="memento"; datetime="Fri, 03 Jan 2014 03:03:21 GMT"; collection="pywb",
|
||||||
<http://example.com?example=1>; rel="memento"; datetime="Fri, 03 Jan 2014 03:03:41 GMT"; collection="pywb"
|
<http://localhost:80/pywb/20140103030341{0}/http://example.com?example=1>; rel="memento"; datetime="Fri, 03 Jan 2014 03:03:41 GMT"; collection="pywb"
|
||||||
"""
|
""".format(fmod)
|
||||||
|
|
||||||
assert exp == resp.text
|
assert exp == resp.text
|
||||||
|
|
||||||
def test_timemap_cdxj(self):
|
def test_timemap_cdxj(self, fmod):
|
||||||
"""
|
"""
|
||||||
Test test/x-cdxj timemap
|
Test test/x-cdxj timemap
|
||||||
"""
|
"""
|
||||||
|
|
||||||
resp = self._timemap_get('/pywb/timemap/cdxj/http://example.com?example=1')
|
resp = self._timemap_get('/pywb/timemap/cdxj/http://example.com?example=1', fmod)
|
||||||
assert resp.status_int == 200
|
assert resp.status_int == 200
|
||||||
assert resp.content_type == 'text/x-cdxj'
|
assert resp.content_type == 'text/x-cdxj'
|
||||||
|
|
||||||
@ -156,12 +154,12 @@ com,example)/?example=1 20140103030341 {"url": "http://example.com?example=1", "
|
|||||||
"""
|
"""
|
||||||
assert exp == resp.text
|
assert exp == resp.text
|
||||||
|
|
||||||
def test_timemap_2(self):
|
def test_timemap_2(self, fmod):
|
||||||
"""
|
"""
|
||||||
Test application/link-format timemap total count
|
Test application/link-format timemap total count
|
||||||
"""
|
"""
|
||||||
|
|
||||||
resp = self._timemap_get('/pywb/timemap/link/http://example.com')
|
resp = self._timemap_get('/pywb/timemap/link/http://example.com', fmod)
|
||||||
assert resp.status_int == 200
|
assert resp.status_int == 200
|
||||||
assert resp.content_type == LINK_FORMAT
|
assert resp.content_type == LINK_FORMAT
|
||||||
|
|
||||||
@ -192,9 +190,6 @@ class TestMementoRedirectClassic(MementoMixin, BaseConfigTest):
|
|||||||
def setup_class(cls):
|
def setup_class(cls):
|
||||||
super(TestMementoRedirectClassic, cls).setup_class('config_test_redirect_classic.yaml')
|
super(TestMementoRedirectClassic, cls).setup_class('config_test_redirect_classic.yaml')
|
||||||
|
|
||||||
def _timemap_get(self, url, **kwargs):
|
|
||||||
return self.testapp.get(url, extra_environ={'REQUEST_URI': url}, **kwargs)
|
|
||||||
|
|
||||||
def test_memento_top_frame_timegate(self, fmod):
|
def test_memento_top_frame_timegate(self, fmod):
|
||||||
resp = self.testapp.get('/pywb/http://www.iana.org/')
|
resp = self.testapp.get('/pywb/http://www.iana.org/')
|
||||||
assert resp.status_code == 307
|
assert resp.status_code == 307
|
||||||
@ -252,12 +247,12 @@ class TestMementoRedirectClassic(MementoMixin, BaseConfigTest):
|
|||||||
assert '"20140126200624"' in resp.text
|
assert '"20140126200624"' in resp.text
|
||||||
assert '"http://www.iana.org/"' in resp.text, resp.text
|
assert '"http://www.iana.org/"' in resp.text, resp.text
|
||||||
|
|
||||||
def test_timemap(self):
|
def test_timemap(self, fmod):
|
||||||
"""
|
"""
|
||||||
Test application/link-format timemap
|
Test application/link-format timemap
|
||||||
"""
|
"""
|
||||||
|
|
||||||
resp = self._timemap_get('/pywb/timemap/link/http://example.com?example=1')
|
resp = self._timemap_get('/pywb/timemap/link/http://example.com?example=1', fmod)
|
||||||
assert resp.status_int == 200
|
assert resp.status_int == 200
|
||||||
assert resp.content_type == LINK_FORMAT
|
assert resp.content_type == LINK_FORMAT
|
||||||
|
|
||||||
@ -267,9 +262,10 @@ class TestMementoRedirectClassic(MementoMixin, BaseConfigTest):
|
|||||||
<http://localhost:80/pywb/timemap/link/http://example.com?example=1>; rel="self"; type="application/link-format"; from="Fri, 03 Jan 2014 03:03:21 GMT",
|
<http://localhost:80/pywb/timemap/link/http://example.com?example=1>; rel="self"; type="application/link-format"; from="Fri, 03 Jan 2014 03:03:21 GMT",
|
||||||
<http://localhost:80/pywb/http://example.com?example=1>; rel="timegate",
|
<http://localhost:80/pywb/http://example.com?example=1>; rel="timegate",
|
||||||
<http://example.com?example=1>; rel="original",
|
<http://example.com?example=1>; rel="original",
|
||||||
<http://example.com?example=1>; rel="memento"; datetime="Fri, 03 Jan 2014 03:03:21 GMT"; collection="pywb",
|
<http://localhost:80/pywb/20140103030321{0}/http://example.com?example=1>; rel="memento"; datetime="Fri, 03 Jan 2014 03:03:21 GMT"; collection="pywb",
|
||||||
<http://example.com?example=1>; rel="memento"; datetime="Fri, 03 Jan 2014 03:03:41 GMT"; collection="pywb"
|
<http://localhost:80/pywb/20140103030341{0}/http://example.com?example=1>; rel="memento"; datetime="Fri, 03 Jan 2014 03:03:41 GMT"; collection="pywb"
|
||||||
"""
|
""".format(fmod)
|
||||||
|
|
||||||
assert exp == resp.text
|
assert exp == resp.text
|
||||||
|
|
||||||
def test_memento_not_time_gate(self, fmod):
|
def test_memento_not_time_gate(self, fmod):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user