From 2ddff987be6cc83eb4bc5a9309c69b9394bb3fb5 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Thu, 7 Dec 2017 17:46:50 -0800 Subject: [PATCH] range requests: rewriting disabled only if range response (206) is returned tests: add test to ensure range request redirect response is correctly rewriting, add 302 replay test --- pywb/apps/rewriterapp.py | 8 ++++---- tests/test_integration.py | 12 +++++------- tests/test_range.py | 7 ++++++- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/pywb/apps/rewriterapp.py b/pywb/apps/rewriterapp.py index fb0a000e..ebb60555 100644 --- a/pywb/apps/rewriterapp.py +++ b/pywb/apps/rewriterapp.py @@ -156,9 +156,6 @@ class RewriterApp(object): range_start = start range_end = end - # disable rewriting - wb_url.mod = 'id_' - #if start with 0, load from upstream, but add range after if start == 0: del inputreq.env['HTTP_RANGE'] @@ -197,6 +194,8 @@ class RewriterApp(object): record.http_headers.replace_header('Content-Length', str(range_len)) record.raw_stream = OffsetLimitReader(record.raw_stream, range_start, range_len) + return True + except (ValueError, TypeError): pass @@ -326,7 +325,8 @@ class RewriterApp(object): self._add_custom_params(cdx, r.headers, kwargs) - self._add_range(record, wb_url, range_start, range_end) + if self._add_range(record, wb_url, range_start, range_end): + wb_url.mod = 'id_' is_ajax = self.is_ajax(environ) diff --git a/tests/test_integration.py b/tests/test_integration.py index 666ff98f..3191dec0 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -105,6 +105,11 @@ class TestWbIntegration(BaseConfigTest): resp = self.get('/pywb/20171122230223{0}/http://httpbin.org/anything/resource.json', fmod) assert resp.headers['Content-Type'] == 'application/json' + def test_replay_redirect(self, fmod): + resp = self.get('/pywb/2014{0}/http://www.iana.org/domains/example', fmod) + assert resp.headers['Location'].startswith('/pywb/2014{0}/'.format(fmod)) + assert resp.status_code == 302 + def test_replay_fuzzy_1(self, fmod): resp = self.get('/pywb/20140127171238{0}/http://www.iana.org/?_=123', fmod) assert resp.status_int == 200 @@ -193,13 +198,6 @@ class TestWbIntegration(BaseConfigTest): # original unrewritten url present assert '"http://www.iana.org/domains/example"' in resp.text - def _test_replay_redir_no_cache(self): - headers = [('Range', 'bytes=10-10000')] - # Range ignored - resp = self.testapp.get('/pywb/20140126200927/http://www.iana.org/domains/root/db/', headers=headers) - assert resp.status_int == 302 - assert resp.content_length == 0 - def test_replay_identity_2_arcgz(self): resp = self.testapp.get('/pywb/20140216050221id_/http://arc.gz.test.example.com/') diff --git a/tests/test_range.py b/tests/test_range.py index dc81965b..a9d6c449 100644 --- a/tests/test_range.py +++ b/tests/test_range.py @@ -59,6 +59,12 @@ class TestReplayRange(BaseConfigTest): assert 'wombat.js' not in resp.text + def test_replay_range_on_redirect(self, fmod): + headers = [('Range', 'bytes=0-')] + resp = self.get('/pywb/2014{0}/http://www.iana.org/domains/example', fmod, headers=headers) + assert resp.headers['Location'].startswith('/pywb/2014{0}/'.format(fmod)) + assert resp.status_code == 302 + def test_error_range_out_of_bounds_1(self, fmod): headers = [('Range', 'bytes=10-2000')] resp = self.get('/pywb/20140127171251{0}/http://example.com/', fmod, headers=headers, status=416) @@ -67,7 +73,6 @@ class TestReplayRange(BaseConfigTest): assert self.recorder_skip == '1' - def test_error_range_out_of_bounds_2(self, fmod): headers = [('Range', 'bytes=2000-10')] resp = self.get('/pywb/20140127171251{0}/http://example.com/', fmod, headers=headers, status=416)