diff --git a/pywb/webapp/rangecache.py b/pywb/webapp/rangecache.py index dd6b8bdc..09da656b 100644 --- a/pywb/webapp/rangecache.py +++ b/pywb/webapp/rangecache.py @@ -18,19 +18,21 @@ class RangeCache(object): atexit.register(self.cleanup) def cleanup(self): - if self.temp_dir: + if self.temp_dir: # pragma: no cover import shutil print('Removing: ' + self.temp_dir) shutil.rmtree(self.temp_dir, True) + self.temp_dir = None def __call__(self, wbrequest, digest, wbresponse_func): result = wbrequest.extract_range() if not result: return None, None - if wbrequest.env.get('HTTP_X_IGNORE_RANGE_ARG'): - wbrequest.wb_url.url = result[0] - return None, None + # no longer needed -- handled at frontend rewrite + #if wbrequest.env.get('HTTP_X_IGNORE_RANGE_ARG'): + # wbrequest.wb_url.url = result[0] + # return None, None return self.handle_range(wbrequest, digest, @@ -47,7 +49,6 @@ class RangeCache(object): # only cache 200 responses if not response.status_headers.get_statuscode().startswith('200'): - print('NON 200 RESP') return response.status_headers, response.body if not self.temp_dir: @@ -89,10 +90,12 @@ class RangeCache(object): yield buf + status_headers = StatusAndHeaders('200 OK', spec['headers']) + if use_206: - WbResponse.add_range_status_h(status_headers) - else: - status_headers = StatusAndHeaders('200 OK', spec['headers']) + StatusAndHeaders.add_range(status_headers, start, + maxlen, + filelen) status_headers.replace_header('Content-Length', str(maxlen)) diff --git a/tests/test_config.yaml b/tests/test_config.yaml index ea0f21a9..79a62cfe 100644 --- a/tests/test_config.yaml +++ b/tests/test_config.yaml @@ -16,7 +16,7 @@ collections: pywb-filt: index_paths: './sample_archive/cdx/' filters: ['filename:dupe*'] - + pywb-filt-2: index_paths: './sample_archive/cdx/' filters: ['!filename:dupe*'] @@ -26,7 +26,7 @@ collections: framed_replay: false # collection of non-surt CDX - pywb-nosurt: + pywb-nosurt: index_paths: './sample_archive/non-surt-cdx/' surt_ordered: false @@ -38,6 +38,9 @@ collections: index_paths: ./sample_archive/cdx/ fallback: live + pywb-rangecache: + index_paths: ./sample_archive/cdx/ + enable_cache: true # indicate if cdx files are sorted by SURT keys -- eg: com,example)/ # SURT keys are recommended for future indices, but non-SURT cdxs diff --git a/tests/test_integration.py b/tests/test_integration.py index 208ceb9c..b49248b8 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -101,6 +101,15 @@ class TestWb: assert 'new _WBWombat' in resp.body, resp.body assert '/pywb/20140127171238/http://www.iana.org/time-zones"' in resp.body + def test_replay_content_with_rangecache(self): + resp = self.testapp.get('/pywb-rangecache/20140127171238/http://www.iana.org/') + self._assert_basic_html(resp) + + assert '"20140127171238"' in resp.body + assert 'wb.js' in resp.body + assert 'new _WBWombat' in resp.body, resp.body + assert '/pywb-rangecache/20140127171238/http://www.iana.org/time-zones"' in resp.body + def test_replay_non_frame_content(self): resp = self.testapp.get('/pywb-nonframe/20140127171238/http://www.iana.org/') self._assert_basic_html(resp) @@ -161,9 +170,41 @@ class TestWb: # no wb header insertion assert 'wb.js' not in resp.body + assert resp.content_length == 1270, resp.content_length + # original unrewritten url present assert '"http://www.iana.org/domains/example"' in resp.body + def test_replay_range_cache_content(self): + headers = [('Range', 'bytes=0-200')] + resp = self.testapp.get('/pywb-rangecache/20140127171251id_/http://example.com', headers=headers) + + assert resp.status_int == 206 + assert resp.headers['Accept-Ranges'] == 'bytes' + assert resp.headers['Content-Range'] == 'bytes 0-200/1270', resp.headers['Content-Range'] + assert resp.content_length == 201, resp.content_length + + assert 'wb.js' not in resp.body + + def test_replay_range_cache_content_bound_end(self): + headers = [('Range', 'bytes=10-10000')] + resp = self.testapp.get('/pywb-rangecache/20140127171251id_/http://example.com', headers=headers) + + assert resp.status_int == 206 + assert resp.headers['Accept-Ranges'] == 'bytes' + assert resp.headers['Content-Range'] == 'bytes 10-1269/1270', resp.headers['Content-Range'] + assert resp.content_length == 1260, resp.content_length + assert len(resp.body) == resp.content_length + + assert 'wb.js' not in resp.body + + def test_replay_redir_no_cache(self): + headers = [('Range', 'bytes=10-10000')] + # Range ignored + resp = self.testapp.get('/pywb-rangecache/20140126200927/http://www.iana.org/domains/root/db/', headers=headers) + assert resp.content_length == 0 + assert resp.status_int == 302 + def test_replay_identity_2_arcgz(self): resp = self.testapp.get('/pywb/20140216050221id_/http://arc.gz.test.example.com')