mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
rewrite improvements: if content-type is text/plain but mod is js_ or cs_, treat as js or css (#31)
header rewriter: ensure removed content-length and content-encoding are added back if no rewriting performed on response body
This commit is contained in:
parent
cd15dbfe48
commit
c8c0cecda3
@ -17,6 +17,12 @@ class RewrittenStatusAndHeaders(object):
|
||||
def contains_removed_header(self, name, value):
|
||||
return self.removed_header_dict.get(name) == value
|
||||
|
||||
def readd_rewrite_removed(self):
|
||||
for name in HeaderRewriter.PROXY_NO_REWRITE_HEADERS:
|
||||
value = self.removed_header_dict.get(name)
|
||||
if value is not None:
|
||||
self.status_headers.headers.append((name, value))
|
||||
|
||||
|
||||
#=================================================================
|
||||
class HeaderRewriter(object):
|
||||
@ -34,6 +40,8 @@ class HeaderRewriter(object):
|
||||
'json': ['application/json'],
|
||||
|
||||
'xml': ['/xml', '+xml', '.xml', '.rss'],
|
||||
|
||||
'plain': ['text/plain'],
|
||||
}
|
||||
|
||||
PROXY_HEADERS = ['content-type', 'content-disposition', 'content-range',
|
||||
@ -41,12 +49,12 @@ class HeaderRewriter(object):
|
||||
|
||||
URL_REWRITE_HEADERS = ['location', 'content-location', 'content-base']
|
||||
|
||||
ENCODING_HEADERS = ['content-encoding']
|
||||
#ENCODING_HEADERS = ['content-encoding']
|
||||
|
||||
REMOVE_HEADERS = ['transfer-encoding', 'content-security-policy',
|
||||
'strict-transport-security']
|
||||
|
||||
PROXY_NO_REWRITE_HEADERS = ['content-length']
|
||||
PROXY_NO_REWRITE_HEADERS = ['content-length', 'content-encoding']
|
||||
|
||||
COOKIE_HEADERS = ['set-cookie', 'cookie']
|
||||
|
||||
@ -141,9 +149,10 @@ class HeaderRewriter(object):
|
||||
elif urlrewriter and lowername in self.URL_REWRITE_HEADERS:
|
||||
new_headers.append((name, urlrewriter.rewrite(value)))
|
||||
|
||||
elif lowername in self.ENCODING_HEADERS:
|
||||
elif lowername in self.PROXY_NO_REWRITE_HEADERS:
|
||||
if content_rewritten:
|
||||
removed_header_dict[lowername] = value
|
||||
add_prefixed_header(name, value)
|
||||
else:
|
||||
add_header(name, value)
|
||||
|
||||
@ -151,10 +160,6 @@ class HeaderRewriter(object):
|
||||
removed_header_dict[lowername] = value
|
||||
add_prefixed_header(name, value)
|
||||
|
||||
elif (lowername in self.PROXY_NO_REWRITE_HEADERS and
|
||||
not content_rewritten):
|
||||
add_header(name, value)
|
||||
|
||||
elif (lowername in self.COOKIE_HEADERS and
|
||||
cookie_rewriter):
|
||||
cookie_list = cookie_rewriter.rewrite(value)
|
||||
|
@ -6,13 +6,12 @@ from pywb.rewrite.rewrite_content import RewriteContent
|
||||
# ============================================================================
|
||||
# Expiermental: not fully tested
|
||||
class RewriteContentAMF(RewriteContent): #pragma: no cover
|
||||
def handle_custom_rewrite(self, text_type, status_headers, stream, env):
|
||||
|
||||
if status_headers.get_header('Content-Type') == 'application/x-amf':
|
||||
def handle_custom_rewrite(self, rewritten_headers, stream, mod, env):
|
||||
if rewritten_headers.status_headers.get_header('Content-Type') == 'application/x-amf':
|
||||
stream = self.rewrite_amf(stream, env)
|
||||
|
||||
return (super(RewriteContentAMF, self).
|
||||
handle_custom_rewrite(text_type, status_headers, stream, env))
|
||||
handle_custom_rewrite(rewritten_headers, stream, mod, env))
|
||||
|
||||
def rewrite_amf(self, stream, env):
|
||||
try:
|
||||
|
@ -118,11 +118,9 @@ class RewriteContent(object):
|
||||
urlkey,
|
||||
cookie_rewriter)
|
||||
|
||||
status_headers = rewritten_headers.status_headers
|
||||
|
||||
res = self.handle_custom_rewrite(rewritten_headers.text_type,
|
||||
status_headers,
|
||||
res = self.handle_custom_rewrite(rewritten_headers,
|
||||
stream,
|
||||
wb_url.mod,
|
||||
env)
|
||||
if res:
|
||||
return res
|
||||
@ -131,6 +129,7 @@ class RewriteContent(object):
|
||||
# ====================================================================
|
||||
# special case -- need to ungzip the body
|
||||
|
||||
status_headers = rewritten_headers.status_headers
|
||||
text_type = rewritten_headers.text_type
|
||||
|
||||
# see known js/css modifier specified, the context should run
|
||||
@ -246,11 +245,18 @@ class RewriteContent(object):
|
||||
|
||||
return (status_headers, gen, True)
|
||||
|
||||
def handle_custom_rewrite(self, text_type, status_headers, stream, env):
|
||||
def handle_custom_rewrite(self, rewritten_headers, stream, mod, env):
|
||||
text_type = rewritten_headers.text_type
|
||||
status_headers = rewritten_headers.status_headers
|
||||
|
||||
# use rewritten headers, but no further rewriting needed
|
||||
if text_type is None:
|
||||
return (status_headers, self.stream_to_gen(stream), False)
|
||||
|
||||
if text_type == 'plain' and not mod in ('js_', 'cs_'):
|
||||
rewritten_headers.readd_rewrite_removed()
|
||||
return (status_headers, self.stream_to_gen(stream), False)
|
||||
|
||||
@staticmethod
|
||||
def _extract_html_charset(buff, status_headers):
|
||||
charset = None
|
||||
|
@ -6,7 +6,7 @@ HTTP Headers Rewriting
|
||||
# Text with charset
|
||||
>>> _test_headers([('Date', 'Fri, 03 Jan 2014 03:03:21 GMT'), ('Content-Length', '5'), ('Content-Type', 'text/html;charset=UTF-8')])
|
||||
{'charset': 'utf-8',
|
||||
'removed_header_dict': {},
|
||||
'removed_header_dict': {'content-length': '5'},
|
||||
'status_headers': StatusAndHeaders(protocol = '', statusline = '200 OK', headers = [ ('X-Archive-Orig-Date', 'Fri, 03 Jan 2014 03:03:21 GMT'),
|
||||
('X-Archive-Orig-Content-Length', '5'),
|
||||
('Content-Type', 'text/html;charset=UTF-8')]),
|
||||
@ -24,9 +24,11 @@ HTTP Headers Rewriting
|
||||
>>> _test_headers([('Content-Length', '199999'), ('Content-Type', 'text/javascript'), ('Content-Encoding', 'gzip'), ('Transfer-Encoding', 'chunked')])
|
||||
{'charset': None,
|
||||
'removed_header_dict': {'content-encoding': 'gzip',
|
||||
'content-length': '199999',
|
||||
'transfer-encoding': 'chunked'},
|
||||
'status_headers': StatusAndHeaders(protocol = '', statusline = '200 OK', headers = [ ('X-Archive-Orig-Content-Length', '199999'),
|
||||
('Content-Type', 'text/javascript'),
|
||||
('X-Archive-Orig-Content-Encoding', 'gzip'),
|
||||
('X-Archive-Orig-Transfer-Encoding', 'chunked')]),
|
||||
'text_type': 'js'}
|
||||
|
||||
|
@ -125,7 +125,7 @@ class TestProxyLiveRewriter:
|
||||
|
||||
def test_echo_proxy_start_unbounded_remove_range(self):
|
||||
headers = [('Range', 'bytes=0-')]
|
||||
resp = self.testapp.get('/rewrite/http://example.com/', headers=headers)
|
||||
resp = self.testapp.get('/rewrite/http://httpbin.org/range/100', headers=headers)
|
||||
|
||||
# actual response is with range
|
||||
assert resp.status_int == 206
|
||||
@ -138,7 +138,7 @@ class TestProxyLiveRewriter:
|
||||
assert self.requestlog[0] == resp.text
|
||||
assert resp.headers['x-archive-orig-x-proxy'] == 'test'
|
||||
|
||||
assert self.requestlog[0].startswith('GET http://example.com/ HTTP/1.1')
|
||||
assert self.requestlog[0].startswith('GET http://httpbin.org/range/100 HTTP/1.1')
|
||||
assert 'range: ' not in self.requestlog[0]
|
||||
|
||||
assert len(self.cache) == 0
|
||||
|
Loading…
x
Reference in New Issue
Block a user