mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
revisit of redirect optimization: (#753)
- if a revisit is of a redirect (3xx response) and revisit has http headers, return the http headers with empty payload -- don't bother loading the original record builds on changes in #751 - cleanup redirect revisit tests from #751
This commit is contained in:
parent
0cc912da95
commit
c121198183
@ -75,6 +75,14 @@ class ResolvingLoader(object):
|
||||
# two index lookups
|
||||
# Case 1: if mimetype is still warc/revisit
|
||||
if cdx.get('mime') == 'warc/revisit' and headers_record:
|
||||
if headers_record.http_headers:
|
||||
status = headers_record.http_headers.get_statuscode()
|
||||
# optimization: if redirect, don't load payload record, as it'll be ignored by browser
|
||||
# always replay zero-length payload
|
||||
if status and status.startswith('3'):
|
||||
headers_record.http_headers.replace_header('Content-Length', '0')
|
||||
return headers_record, headers_record
|
||||
|
||||
payload_record = self._load_different_url_payload(cdx,
|
||||
headers_record,
|
||||
failed_files,
|
||||
|
@ -104,13 +104,11 @@ class TestWbIntegration(BaseConfigTest):
|
||||
def test_replay_content_head_non_zero_content_length_match(self):
|
||||
resp = self.testapp.get('/pywb/20140126200625id_/http://www.iana.org/_js/2013.1/jquery.js', status=200)
|
||||
length = resp.content_length
|
||||
print('length', length)
|
||||
|
||||
# Content-Length included if non-zero
|
||||
resp = self.testapp.head('/pywb/20140126200625id_/http://www.iana.org/_js/2013.1/jquery.js', status=200)
|
||||
|
||||
#assert resp.headers['Content-Length'] == length
|
||||
print('length', resp.content_length)
|
||||
assert resp.content_length == length
|
||||
|
||||
def test_replay_content(self, fmod):
|
||||
|
@ -1,3 +1,4 @@
|
||||
from .base_config_test import BaseConfigTest, CollsDirMixin, fmod
|
||||
|
||||
from io import BytesIO
|
||||
import os
|
||||
@ -5,8 +6,6 @@ import os
|
||||
from warcio import WARCWriter, StatusAndHeaders
|
||||
from pywb.manager.manager import main as wb_manager
|
||||
|
||||
from .base_config_test import BaseConfigTest, CollsDirMixin, fmod
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class TestRevisits(CollsDirMixin, BaseConfigTest):
|
||||
@ -125,18 +124,22 @@ class TestRevisits(CollsDirMixin, BaseConfigTest):
|
||||
res = self.get('/revisits/20220101{0}/http://example.com/', fmod, status=301)
|
||||
assert res.headers["Custom"] == "4"
|
||||
assert res.headers["Location"].endswith("/20220101{0}/https://example.com/redirect-4".format(fmod))
|
||||
assert res.text == 'some\ntext'
|
||||
assert res.content_length == 0
|
||||
assert res.text == ''
|
||||
|
||||
def test_different_url_revisit_and_response(self, fmod):
|
||||
def test_different_url_response_and_revisit(self, fmod):
|
||||
# response
|
||||
res = self.get('/revisits/20200101{0}/http://example.com/orig-2', fmod, status=301)
|
||||
assert res.headers["Custom"] == "2"
|
||||
assert res.headers["Location"].endswith("/20200101{0}/https://example.com/redirect-2".format(fmod))
|
||||
assert res.text == 'some\ntext'
|
||||
|
||||
# revisit
|
||||
res = self.get('/revisits/20220101{0}/http://example.com/orig-2', fmod, status=301)
|
||||
assert res.headers["Custom"] == "3"
|
||||
assert res.headers["Location"].endswith("/20220101{0}/https://example.com/redirect-3".format(fmod))
|
||||
assert res.text == 'some\ntext'
|
||||
assert res.content_length == 0
|
||||
assert res.text == ''
|
||||
|
||||
def test_orig(self, fmod):
|
||||
res = self.get('/revisits/20200101{0}/http://example.com/orig-1', fmod, status=301)
|
||||
|
Loading…
x
Reference in New Issue
Block a user