1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

warcserver: self-redirect improvement: include trailing slash in self-redirect check, urls differing only by trailing slash should be considered self-redirect, update tests

This commit is contained in:
Ilya Kreymer 2017-11-09 21:22:11 -08:00
parent da2ae0f373
commit 0c74616070
2 changed files with 8 additions and 2 deletions

View File

@ -139,8 +139,8 @@ class BaseLoader(object):
host = urlsplit(cdx['url']).netloc
location_url = host + location_url
location_url = location_url.split('://', 1)[-1]
request_url = request_url.split('://', 1)[-1]
location_url = location_url.split('://', 1)[-1].rstrip('/')
request_url = request_url.split('://', 1)[-1].rstrip('/')
if request_url == location_url:
msg = 'Self Redirect {0} -> {1}'

View File

@ -350,6 +350,12 @@ class TestWbIntegration(BaseConfigTest):
assert resp.status_int == 200
assert resp.headers['Content-Location'].endswith('/pywb/20140126200928{0}/http://www.iana.org/domains/root/db'.format(fmod))
def test_non_exact_replay_skip_self_redir_slash(self, fmod):
uri = '/pywb/20140126200927{0}/http://www.iana.org/domains/root/db/'
resp = self.get(uri, fmod)
assert resp.status_int == 200
assert resp.headers['Content-Location'].endswith('/pywb/20140126200928{0}/http://www.iana.org/domains/root/db'.format(fmod))
def test_not_existant_warc_other_capture(self, fmod):
resp = self.get('/pywb/20140703030321{0}/http://example.com/?example=2', fmod)
assert resp.status_int == 200