1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 08:04:49 +01:00

range requests: rewriting disabled only if range response (206) is returned

tests: add test to ensure range request redirect response is correctly rewriting, add 302 replay test
This commit is contained in:
Ilya Kreymer 2017-12-07 17:46:50 -08:00
parent 9eba59d8b4
commit 2ddff987be
3 changed files with 15 additions and 12 deletions

@ -156,9 +156,6 @@ class RewriterApp(object):
range_start = start
range_end = end
# disable rewriting
wb_url.mod = 'id_'
#if start with 0, load from upstream, but add range after
if start == 0:
del inputreq.env['HTTP_RANGE']
@ -197,6 +194,8 @@ class RewriterApp(object):
record.http_headers.replace_header('Content-Length', str(range_len))
record.raw_stream = OffsetLimitReader(record.raw_stream, range_start, range_len)
return True
except (ValueError, TypeError):
pass
@ -326,7 +325,8 @@ class RewriterApp(object):
self._add_custom_params(cdx, r.headers, kwargs)
self._add_range(record, wb_url, range_start, range_end)
if self._add_range(record, wb_url, range_start, range_end):
wb_url.mod = 'id_'
is_ajax = self.is_ajax(environ)

@ -105,6 +105,11 @@ class TestWbIntegration(BaseConfigTest):
resp = self.get('/pywb/20171122230223{0}/http://httpbin.org/anything/resource.json', fmod)
assert resp.headers['Content-Type'] == 'application/json'
def test_replay_redirect(self, fmod):
resp = self.get('/pywb/2014{0}/http://www.iana.org/domains/example', fmod)
assert resp.headers['Location'].startswith('/pywb/2014{0}/'.format(fmod))
assert resp.status_code == 302
def test_replay_fuzzy_1(self, fmod):
resp = self.get('/pywb/20140127171238{0}/http://www.iana.org/?_=123', fmod)
assert resp.status_int == 200
@ -193,13 +198,6 @@ class TestWbIntegration(BaseConfigTest):
# original unrewritten url present
assert '"http://www.iana.org/domains/example"' in resp.text
def _test_replay_redir_no_cache(self):
headers = [('Range', 'bytes=10-10000')]
# Range ignored
resp = self.testapp.get('/pywb/20140126200927/http://www.iana.org/domains/root/db/', headers=headers)
assert resp.status_int == 302
assert resp.content_length == 0
def test_replay_identity_2_arcgz(self):
resp = self.testapp.get('/pywb/20140216050221id_/http://arc.gz.test.example.com/')

@ -59,6 +59,12 @@ class TestReplayRange(BaseConfigTest):
assert 'wombat.js' not in resp.text
def test_replay_range_on_redirect(self, fmod):
headers = [('Range', 'bytes=0-')]
resp = self.get('/pywb/2014{0}/http://www.iana.org/domains/example', fmod, headers=headers)
assert resp.headers['Location'].startswith('/pywb/2014{0}/'.format(fmod))
assert resp.status_code == 302
def test_error_range_out_of_bounds_1(self, fmod):
headers = [('Range', 'bytes=10-2000')]
resp = self.get('/pywb/20140127171251{0}/http://example.com/', fmod, headers=headers, status=416)
@ -67,7 +73,6 @@ class TestReplayRange(BaseConfigTest):
assert self.recorder_skip == '1'
def test_error_range_out_of_bounds_2(self, fmod):
headers = [('Range', 'bytes=2000-10')]
resp = self.get('/pywb/20140127171251{0}/http://example.com/', fmod, headers=headers, status=416)