1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

memento timemap and test improvements:

- windows: fix paths for pathresolver test on windows
- timemap: add tests for all collection timemap, add cdxj timemap test
- timemap: only add original, timegate links for 'link' timemap
This commit is contained in:
Ilya Kreymer 2017-09-28 07:13:23 -07:00
parent a32c6f089c
commit a870f7e91a
4 changed files with 33 additions and 6 deletions

View File

@ -460,7 +460,7 @@ class RewriterApp(object):
return r
def make_timemap(self, wb_url, res, full_prefix):
def make_timemap(self, wb_url, res, full_prefix, output):
wb_url.type = wb_url.QUERY
content_type = res.headers.get('Content-Type')
@ -472,7 +472,7 @@ class RewriterApp(object):
elif res.status_code:
status = str(res.status_code) + ' ' + res.reason
if res.status_code == 200:
if res.status_code == 200 and output == 'link':
timegate, timemap = self._get_timegate_timemap(wb_url.url, full_prefix)
text = MementoUtils.wrap_timemap_header(wb_url.url,
@ -486,8 +486,9 @@ class RewriterApp(object):
def handle_query(self, environ, wb_url, kwargs, full_prefix):
res = self.do_query(wb_url, kwargs)
if kwargs.get('output'):
return self.make_timemap(wb_url, res, full_prefix)
output = kwargs.get('output')
if output:
return self.make_timemap(wb_url, res, full_prefix, output)
def format_cdx(text):
cdx_lines = text.rstrip().split('\n')

View File

@ -31,8 +31,8 @@ class TestPathIndex(object):
assert res[0] == os.path.join(get_test_dir(), 'warcs', 'example.warc.gz')
def test_resolver_dir_wildcard_as_file_url(self):
url = to_file_url(get_test_dir()) + '/*/'
resolver = DefaultResolverMixin.make_best_resolver(url)
url = to_file_url(get_test_dir()) + os.path.sep + '*' + os.path.sep
resolver = PrefixResolver(url)
cdx = CDXObject()
res = resolver('example.warc.gz', cdx)

View File

@ -133,6 +133,23 @@ class TestMemento(MementoMixin, BaseConfigTest):
<http://example.com?example=1>; rel="original",
<http://example.com?example=1>; rel="memento"; datetime="Fri, 03 Jan 2014 03:03:21 GMT"; src="pywb:example.cdx",
<http://example.com?example=1>; rel="memento"; datetime="Fri, 03 Jan 2014 03:03:41 GMT"; src="pywb:example.cdx"
"""
assert exp == resp.text
def test_timemap_cdxj(self):
"""
Test test/x-cdxj timemap
"""
resp = self.testapp.get('/pywb/timemap/cdxj/http://example.com?example=1')
assert resp.status_int == 200
assert resp.content_type == 'text/x-cdxj'
resp.charset = 'utf-8'
exp = """\
com,example)/?example=1 20140103030321 {"url": "http://example.com?example=1", "mime": "text/html", "status": "200", "digest": "B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A", "redirect": "-", "robotflags": "-", "length": "1043", "offset": "333", "filename": "example.warc.gz", "source": "pywb:example.cdx"}
com,example)/?example=1 20140103030341 {"url": "http://example.com?example=1", "mime": "warc/revisit", "status": "-", "digest": "B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A", "redirect": "-", "robotflags": "-", "length": "553", "offset": "1864", "filename": "example.warc.gz", "source": "pywb:example.cdx"}
"""
assert exp == resp.text

View File

@ -99,3 +99,12 @@ class TestRecordReplay(CollsDirMixin, BaseConfigTest):
assert cdxj_lines[0]['filename'] == cdxj_lines[2]['filename']
def test_timemap_all_coll(self):
res = self.testapp.get('/all/timemap/link/http://httpbin.org/get?C=D')
link_lines = res.text.rstrip().split('\n')
assert len(link_lines) == 5
assert '_test_colls:test2/indexes/autoindex.cdxj' in link_lines[3]
assert '_test_colls:test/indexes/autoindex.cdxj' in link_lines[4]