mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
content rewriter: encoding check: if response has Content-Encoding but no match found in Accept-Encoding header, auto decode response (even if not otherwise rewriting) (#372)
rewriterapp: pass environ to content rewriter to allow access to request http headers tests: test brotli served with 'br' in Accept-Encoding (no change), and without (response auto-decoded)
This commit is contained in:
parent
dfc3033117
commit
9c44739bae
@ -374,7 +374,7 @@ class RewriterApp(object):
|
|||||||
|
|
||||||
urlrewriter.rewrite_opts['ua_string'] = environ.get('HTTP_USER_AGENT')
|
urlrewriter.rewrite_opts['ua_string'] = environ.get('HTTP_USER_AGENT')
|
||||||
|
|
||||||
result = content_rw(record, urlrewriter, cookie_rewriter, head_insert_func, cdx)
|
result = content_rw(record, urlrewriter, cookie_rewriter, head_insert_func, cdx, environ)
|
||||||
|
|
||||||
status_headers, gen, is_rw = result
|
status_headers, gen, is_rw = result
|
||||||
|
|
||||||
|
@ -175,8 +175,9 @@ class BaseContentRewriter(object):
|
|||||||
|
|
||||||
def __call__(self, record, url_rewriter, cookie_rewriter,
|
def __call__(self, record, url_rewriter, cookie_rewriter,
|
||||||
head_insert_func=None,
|
head_insert_func=None,
|
||||||
cdx=None):
|
cdx=None, environ=None):
|
||||||
|
|
||||||
|
environ = environ or {}
|
||||||
rwinfo = RewriteInfo(record, self, url_rewriter, cookie_rewriter)
|
rwinfo = RewriteInfo(record, self, url_rewriter, cookie_rewriter)
|
||||||
content_rewriter = None
|
content_rewriter = None
|
||||||
|
|
||||||
@ -192,6 +193,16 @@ class BaseContentRewriter(object):
|
|||||||
|
|
||||||
gen = None
|
gen = None
|
||||||
|
|
||||||
|
# check if decoding is needed
|
||||||
|
if not rwinfo.is_content_rw:
|
||||||
|
content_encoding = rwinfo.record.http_headers.get_header('Content-Encoding')
|
||||||
|
accept_encoding = environ.get('HTTP_ACCEPT_ENCODING', '')
|
||||||
|
|
||||||
|
# if content-encoding is set but encoding is not in accept encoding,
|
||||||
|
# enable content_rw force decompression
|
||||||
|
if content_encoding and content_encoding not in accept_encoding:
|
||||||
|
rwinfo.is_content_rw = True
|
||||||
|
|
||||||
if content_rewriter:
|
if content_rewriter:
|
||||||
gen = content_rewriter(rwinfo)
|
gen = content_rewriter(rwinfo)
|
||||||
elif rwinfo.is_content_rw:
|
elif rwinfo.is_content_rw:
|
||||||
|
@ -20,6 +20,7 @@ from pywb import get_test_dir
|
|||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
import pytest
|
import pytest
|
||||||
|
import six
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
@ -45,7 +46,8 @@ class TestContentRewriter(object):
|
|||||||
|
|
||||||
warc_headers = warc_headers or {}
|
warc_headers = warc_headers or {}
|
||||||
|
|
||||||
payload = payload.encode('utf-8')
|
if isinstance(payload, six.text_type):
|
||||||
|
payload = payload.encode('utf-8')
|
||||||
|
|
||||||
http_headers = StatusAndHeaders('200 OK', headers, protocol='HTTP/1.0')
|
http_headers = StatusAndHeaders('200 OK', headers, protocol='HTTP/1.0')
|
||||||
|
|
||||||
@ -57,7 +59,7 @@ class TestContentRewriter(object):
|
|||||||
|
|
||||||
def rewrite_record(self, headers, content, ts, url='http://example.com/',
|
def rewrite_record(self, headers, content, ts, url='http://example.com/',
|
||||||
prefix='http://localhost:8080/prefix/', warc_headers=None,
|
prefix='http://localhost:8080/prefix/', warc_headers=None,
|
||||||
request_url=None, is_live=None, use_js_proxy=True):
|
request_url=None, is_live=None, use_js_proxy=True, environ=None):
|
||||||
|
|
||||||
record = self._create_response_record(url, headers, content, warc_headers)
|
record = self._create_response_record(url, headers, content, warc_headers)
|
||||||
|
|
||||||
@ -73,9 +75,9 @@ class TestContentRewriter(object):
|
|||||||
cdx['is_live'] = is_live
|
cdx['is_live'] = is_live
|
||||||
|
|
||||||
if use_js_proxy:
|
if use_js_proxy:
|
||||||
return self.js_proxy_content_rewriter(record, url_rewriter, None, cdx=cdx)
|
return self.js_proxy_content_rewriter(record, url_rewriter, None, cdx=cdx, environ=environ)
|
||||||
else:
|
else:
|
||||||
return self.content_rewriter(record, url_rewriter, None, cdx=cdx)
|
return self.content_rewriter(record, url_rewriter, None, cdx=cdx, environ=environ)
|
||||||
|
|
||||||
def test_rewrite_html(self, headers):
|
def test_rewrite_html(self, headers):
|
||||||
content = '<html><body><a href="http://example.com/"></a></body></html>'
|
content = '<html><body><a href="http://example.com/"></a></body></html>'
|
||||||
@ -269,6 +271,42 @@ class TestContentRewriter(object):
|
|||||||
|
|
||||||
assert ('Transfer-Encoding', 'chunked') not in headers.headers
|
assert ('Transfer-Encoding', 'chunked') not in headers.headers
|
||||||
|
|
||||||
|
@pytest.mark.importorskip('brotli')
|
||||||
|
def test_brotli_accepted_no_change(self):
|
||||||
|
import brotli
|
||||||
|
content = brotli.compress('ABCDEFG'.encode('utf-8'))
|
||||||
|
|
||||||
|
headers = {'Content-Type': 'application/octet-stream',
|
||||||
|
'Content-Encoding': 'br',
|
||||||
|
'Content-Length': str(len(content))
|
||||||
|
}
|
||||||
|
|
||||||
|
headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701mp_',
|
||||||
|
environ={'HTTP_ACCEPT_ENCODING': 'gzip, deflate, br'})
|
||||||
|
|
||||||
|
assert headers['Content-Encoding'] == 'br'
|
||||||
|
assert headers['Content-Length'] == str(len(content))
|
||||||
|
|
||||||
|
assert brotli.decompress(b''.join(gen)).decode('utf-8') == 'ABCDEFG'
|
||||||
|
|
||||||
|
@pytest.mark.importorskip('brotli')
|
||||||
|
def test_brotli_not_accepted_auto_decode(self):
|
||||||
|
import brotli
|
||||||
|
content = brotli.compress('ABCDEFG'.encode('utf-8'))
|
||||||
|
|
||||||
|
headers = {'Content-Type': 'application/octet-stream',
|
||||||
|
'Content-Encoding': 'br',
|
||||||
|
'Content-Length': str(len(content))
|
||||||
|
}
|
||||||
|
|
||||||
|
headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701mp_')
|
||||||
|
|
||||||
|
assert 'Content-Encoding' not in headers
|
||||||
|
assert 'Content-Length' not in headers
|
||||||
|
assert headers['X-Archive-Orig-Content-Encoding'] == 'br'
|
||||||
|
|
||||||
|
assert b''.join(gen).decode('utf-8') == 'ABCDEFG'
|
||||||
|
|
||||||
def test_rewrite_json(self):
|
def test_rewrite_json(self):
|
||||||
headers = {'Content-Type': 'application/json'}
|
headers = {'Content-Type': 'application/json'}
|
||||||
content = '/**/ jQuery_ABC({"foo": "bar"});'
|
content = '/**/ jQuery_ABC({"foo": "bar"});'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user