mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
memento timemap and test improvements:
- windows: fix paths for pathresolver test on windows - timemap: add tests for all collection timemap, add cdxj timemap test - timemap: only add original, timegate links for 'link' timemap
This commit is contained in:
parent
a32c6f089c
commit
a870f7e91a
@ -460,7 +460,7 @@ class RewriterApp(object):
|
|||||||
|
|
||||||
return r
|
return r
|
||||||
|
|
||||||
def make_timemap(self, wb_url, res, full_prefix):
|
def make_timemap(self, wb_url, res, full_prefix, output):
|
||||||
wb_url.type = wb_url.QUERY
|
wb_url.type = wb_url.QUERY
|
||||||
|
|
||||||
content_type = res.headers.get('Content-Type')
|
content_type = res.headers.get('Content-Type')
|
||||||
@ -472,7 +472,7 @@ class RewriterApp(object):
|
|||||||
elif res.status_code:
|
elif res.status_code:
|
||||||
status = str(res.status_code) + ' ' + res.reason
|
status = str(res.status_code) + ' ' + res.reason
|
||||||
|
|
||||||
if res.status_code == 200:
|
if res.status_code == 200 and output == 'link':
|
||||||
timegate, timemap = self._get_timegate_timemap(wb_url.url, full_prefix)
|
timegate, timemap = self._get_timegate_timemap(wb_url.url, full_prefix)
|
||||||
|
|
||||||
text = MementoUtils.wrap_timemap_header(wb_url.url,
|
text = MementoUtils.wrap_timemap_header(wb_url.url,
|
||||||
@ -486,8 +486,9 @@ class RewriterApp(object):
|
|||||||
def handle_query(self, environ, wb_url, kwargs, full_prefix):
|
def handle_query(self, environ, wb_url, kwargs, full_prefix):
|
||||||
res = self.do_query(wb_url, kwargs)
|
res = self.do_query(wb_url, kwargs)
|
||||||
|
|
||||||
if kwargs.get('output'):
|
output = kwargs.get('output')
|
||||||
return self.make_timemap(wb_url, res, full_prefix)
|
if output:
|
||||||
|
return self.make_timemap(wb_url, res, full_prefix, output)
|
||||||
|
|
||||||
def format_cdx(text):
|
def format_cdx(text):
|
||||||
cdx_lines = text.rstrip().split('\n')
|
cdx_lines = text.rstrip().split('\n')
|
||||||
|
@ -31,8 +31,8 @@ class TestPathIndex(object):
|
|||||||
assert res[0] == os.path.join(get_test_dir(), 'warcs', 'example.warc.gz')
|
assert res[0] == os.path.join(get_test_dir(), 'warcs', 'example.warc.gz')
|
||||||
|
|
||||||
def test_resolver_dir_wildcard_as_file_url(self):
|
def test_resolver_dir_wildcard_as_file_url(self):
|
||||||
url = to_file_url(get_test_dir()) + '/*/'
|
url = to_file_url(get_test_dir()) + os.path.sep + '*' + os.path.sep
|
||||||
resolver = DefaultResolverMixin.make_best_resolver(url)
|
resolver = PrefixResolver(url)
|
||||||
|
|
||||||
cdx = CDXObject()
|
cdx = CDXObject()
|
||||||
res = resolver('example.warc.gz', cdx)
|
res = resolver('example.warc.gz', cdx)
|
||||||
|
@ -133,6 +133,23 @@ class TestMemento(MementoMixin, BaseConfigTest):
|
|||||||
<http://example.com?example=1>; rel="original",
|
<http://example.com?example=1>; rel="original",
|
||||||
<http://example.com?example=1>; rel="memento"; datetime="Fri, 03 Jan 2014 03:03:21 GMT"; src="pywb:example.cdx",
|
<http://example.com?example=1>; rel="memento"; datetime="Fri, 03 Jan 2014 03:03:21 GMT"; src="pywb:example.cdx",
|
||||||
<http://example.com?example=1>; rel="memento"; datetime="Fri, 03 Jan 2014 03:03:41 GMT"; src="pywb:example.cdx"
|
<http://example.com?example=1>; rel="memento"; datetime="Fri, 03 Jan 2014 03:03:41 GMT"; src="pywb:example.cdx"
|
||||||
|
"""
|
||||||
|
assert exp == resp.text
|
||||||
|
|
||||||
|
def test_timemap_cdxj(self):
|
||||||
|
"""
|
||||||
|
Test test/x-cdxj timemap
|
||||||
|
"""
|
||||||
|
|
||||||
|
resp = self.testapp.get('/pywb/timemap/cdxj/http://example.com?example=1')
|
||||||
|
assert resp.status_int == 200
|
||||||
|
assert resp.content_type == 'text/x-cdxj'
|
||||||
|
|
||||||
|
resp.charset = 'utf-8'
|
||||||
|
|
||||||
|
exp = """\
|
||||||
|
com,example)/?example=1 20140103030321 {"url": "http://example.com?example=1", "mime": "text/html", "status": "200", "digest": "B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A", "redirect": "-", "robotflags": "-", "length": "1043", "offset": "333", "filename": "example.warc.gz", "source": "pywb:example.cdx"}
|
||||||
|
com,example)/?example=1 20140103030341 {"url": "http://example.com?example=1", "mime": "warc/revisit", "status": "-", "digest": "B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A", "redirect": "-", "robotflags": "-", "length": "553", "offset": "1864", "filename": "example.warc.gz", "source": "pywb:example.cdx"}
|
||||||
"""
|
"""
|
||||||
assert exp == resp.text
|
assert exp == resp.text
|
||||||
|
|
||||||
|
@ -99,3 +99,12 @@ class TestRecordReplay(CollsDirMixin, BaseConfigTest):
|
|||||||
|
|
||||||
assert cdxj_lines[0]['filename'] == cdxj_lines[2]['filename']
|
assert cdxj_lines[0]['filename'] == cdxj_lines[2]['filename']
|
||||||
|
|
||||||
|
def test_timemap_all_coll(self):
|
||||||
|
res = self.testapp.get('/all/timemap/link/http://httpbin.org/get?C=D')
|
||||||
|
link_lines = res.text.rstrip().split('\n')
|
||||||
|
assert len(link_lines) == 5
|
||||||
|
|
||||||
|
assert '_test_colls:test2/indexes/autoindex.cdxj' in link_lines[3]
|
||||||
|
assert '_test_colls:test/indexes/autoindex.cdxj' in link_lines[4]
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user