From 0c74616070ec4e1b209f188d8e38d0087e14891f Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Thu, 9 Nov 2017 21:22:11 -0800 Subject: [PATCH] warcserver: self-redirect improvement: include trailing slash in self-redirect check, urls differing only by trailing slash should be considered self-redirect, update tests --- pywb/warcserver/resource/responseloader.py | 4 ++-- tests/test_integration.py | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/pywb/warcserver/resource/responseloader.py b/pywb/warcserver/resource/responseloader.py index 51fcae34..d45d291f 100644 --- a/pywb/warcserver/resource/responseloader.py +++ b/pywb/warcserver/resource/responseloader.py @@ -139,8 +139,8 @@ class BaseLoader(object): host = urlsplit(cdx['url']).netloc location_url = host + location_url - location_url = location_url.split('://', 1)[-1] - request_url = request_url.split('://', 1)[-1] + location_url = location_url.split('://', 1)[-1].rstrip('/') + request_url = request_url.split('://', 1)[-1].rstrip('/') if request_url == location_url: msg = 'Self Redirect {0} -> {1}' diff --git a/tests/test_integration.py b/tests/test_integration.py index ead35c43..a929a950 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -350,6 +350,12 @@ class TestWbIntegration(BaseConfigTest): assert resp.status_int == 200 assert resp.headers['Content-Location'].endswith('/pywb/20140126200928{0}/http://www.iana.org/domains/root/db'.format(fmod)) + def test_non_exact_replay_skip_self_redir_slash(self, fmod): + uri = '/pywb/20140126200927{0}/http://www.iana.org/domains/root/db/' + resp = self.get(uri, fmod) + assert resp.status_int == 200 + assert resp.headers['Content-Location'].endswith('/pywb/20140126200928{0}/http://www.iana.org/domains/root/db'.format(fmod)) + def test_not_existant_warc_other_capture(self, fmod): resp = self.get('/pywb/20140703030321{0}/http://example.com/?example=2', fmod) assert resp.status_int == 200