diff --git a/pywb/warcserver/resource/resolvingloader.py b/pywb/warcserver/resource/resolvingloader.py index 72d8c5ad..cc9f23b9 100644 --- a/pywb/warcserver/resource/resolvingloader.py +++ b/pywb/warcserver/resource/resolvingloader.py @@ -75,6 +75,14 @@ class ResolvingLoader(object): # two index lookups # Case 1: if mimetype is still warc/revisit if cdx.get('mime') == 'warc/revisit' and headers_record: + if headers_record.http_headers: + status = headers_record.http_headers.get_statuscode() + # optimization: if redirect, don't load payload record, as it'll be ignored by browser + # always replay zero-length payload + if status and status.startswith('3'): + headers_record.http_headers.replace_header('Content-Length', '0') + return headers_record, headers_record + payload_record = self._load_different_url_payload(cdx, headers_record, failed_files, diff --git a/tests/test_integration.py b/tests/test_integration.py index 36d53139..53440109 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -104,13 +104,11 @@ class TestWbIntegration(BaseConfigTest): def test_replay_content_head_non_zero_content_length_match(self): resp = self.testapp.get('/pywb/20140126200625id_/http://www.iana.org/_js/2013.1/jquery.js', status=200) length = resp.content_length - print('length', length) # Content-Length included if non-zero resp = self.testapp.head('/pywb/20140126200625id_/http://www.iana.org/_js/2013.1/jquery.js', status=200) #assert resp.headers['Content-Length'] == length - print('length', resp.content_length) assert resp.content_length == length def test_replay_content(self, fmod): diff --git a/tests/test_redirect_revisits.py b/tests/test_redirect_revisits.py index 3188aa1d..68bb5f7b 100644 --- a/tests/test_redirect_revisits.py +++ b/tests/test_redirect_revisits.py @@ -1,3 +1,4 @@ +from .base_config_test import BaseConfigTest, CollsDirMixin, fmod from io import BytesIO import os @@ -5,8 +6,6 @@ import os from warcio import WARCWriter, StatusAndHeaders from pywb.manager.manager import main as wb_manager -from .base_config_test import BaseConfigTest, CollsDirMixin, fmod - # ============================================================================ class TestRevisits(CollsDirMixin, BaseConfigTest): @@ -125,18 +124,22 @@ class TestRevisits(CollsDirMixin, BaseConfigTest): res = self.get('/revisits/20220101{0}/http://example.com/', fmod, status=301) assert res.headers["Custom"] == "4" assert res.headers["Location"].endswith("/20220101{0}/https://example.com/redirect-4".format(fmod)) - assert res.text == 'some\ntext' + assert res.content_length == 0 + assert res.text == '' - def test_different_url_revisit_and_response(self, fmod): + def test_different_url_response_and_revisit(self, fmod): + # response res = self.get('/revisits/20200101{0}/http://example.com/orig-2', fmod, status=301) assert res.headers["Custom"] == "2" assert res.headers["Location"].endswith("/20200101{0}/https://example.com/redirect-2".format(fmod)) assert res.text == 'some\ntext' + # revisit res = self.get('/revisits/20220101{0}/http://example.com/orig-2', fmod, status=301) assert res.headers["Custom"] == "3" assert res.headers["Location"].endswith("/20220101{0}/https://example.com/redirect-3".format(fmod)) - assert res.text == 'some\ntext' + assert res.content_length == 0 + assert res.text == '' def test_orig(self, fmod): res = self.get('/revisits/20200101{0}/http://example.com/orig-1', fmod, status=301)