mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
rewrite improvements: if content-type is text/plain but mod is js_ or cs_, treat as js or css (#31)
header rewriter: ensure removed content-length and content-encoding are added back if no rewriting performed on response body
This commit is contained in:
parent
cd15dbfe48
commit
c8c0cecda3
@ -17,6 +17,12 @@ class RewrittenStatusAndHeaders(object):
|
|||||||
def contains_removed_header(self, name, value):
|
def contains_removed_header(self, name, value):
|
||||||
return self.removed_header_dict.get(name) == value
|
return self.removed_header_dict.get(name) == value
|
||||||
|
|
||||||
|
def readd_rewrite_removed(self):
|
||||||
|
for name in HeaderRewriter.PROXY_NO_REWRITE_HEADERS:
|
||||||
|
value = self.removed_header_dict.get(name)
|
||||||
|
if value is not None:
|
||||||
|
self.status_headers.headers.append((name, value))
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class HeaderRewriter(object):
|
class HeaderRewriter(object):
|
||||||
@ -34,6 +40,8 @@ class HeaderRewriter(object):
|
|||||||
'json': ['application/json'],
|
'json': ['application/json'],
|
||||||
|
|
||||||
'xml': ['/xml', '+xml', '.xml', '.rss'],
|
'xml': ['/xml', '+xml', '.xml', '.rss'],
|
||||||
|
|
||||||
|
'plain': ['text/plain'],
|
||||||
}
|
}
|
||||||
|
|
||||||
PROXY_HEADERS = ['content-type', 'content-disposition', 'content-range',
|
PROXY_HEADERS = ['content-type', 'content-disposition', 'content-range',
|
||||||
@ -41,12 +49,12 @@ class HeaderRewriter(object):
|
|||||||
|
|
||||||
URL_REWRITE_HEADERS = ['location', 'content-location', 'content-base']
|
URL_REWRITE_HEADERS = ['location', 'content-location', 'content-base']
|
||||||
|
|
||||||
ENCODING_HEADERS = ['content-encoding']
|
#ENCODING_HEADERS = ['content-encoding']
|
||||||
|
|
||||||
REMOVE_HEADERS = ['transfer-encoding', 'content-security-policy',
|
REMOVE_HEADERS = ['transfer-encoding', 'content-security-policy',
|
||||||
'strict-transport-security']
|
'strict-transport-security']
|
||||||
|
|
||||||
PROXY_NO_REWRITE_HEADERS = ['content-length']
|
PROXY_NO_REWRITE_HEADERS = ['content-length', 'content-encoding']
|
||||||
|
|
||||||
COOKIE_HEADERS = ['set-cookie', 'cookie']
|
COOKIE_HEADERS = ['set-cookie', 'cookie']
|
||||||
|
|
||||||
@ -141,9 +149,10 @@ class HeaderRewriter(object):
|
|||||||
elif urlrewriter and lowername in self.URL_REWRITE_HEADERS:
|
elif urlrewriter and lowername in self.URL_REWRITE_HEADERS:
|
||||||
new_headers.append((name, urlrewriter.rewrite(value)))
|
new_headers.append((name, urlrewriter.rewrite(value)))
|
||||||
|
|
||||||
elif lowername in self.ENCODING_HEADERS:
|
elif lowername in self.PROXY_NO_REWRITE_HEADERS:
|
||||||
if content_rewritten:
|
if content_rewritten:
|
||||||
removed_header_dict[lowername] = value
|
removed_header_dict[lowername] = value
|
||||||
|
add_prefixed_header(name, value)
|
||||||
else:
|
else:
|
||||||
add_header(name, value)
|
add_header(name, value)
|
||||||
|
|
||||||
@ -151,10 +160,6 @@ class HeaderRewriter(object):
|
|||||||
removed_header_dict[lowername] = value
|
removed_header_dict[lowername] = value
|
||||||
add_prefixed_header(name, value)
|
add_prefixed_header(name, value)
|
||||||
|
|
||||||
elif (lowername in self.PROXY_NO_REWRITE_HEADERS and
|
|
||||||
not content_rewritten):
|
|
||||||
add_header(name, value)
|
|
||||||
|
|
||||||
elif (lowername in self.COOKIE_HEADERS and
|
elif (lowername in self.COOKIE_HEADERS and
|
||||||
cookie_rewriter):
|
cookie_rewriter):
|
||||||
cookie_list = cookie_rewriter.rewrite(value)
|
cookie_list = cookie_rewriter.rewrite(value)
|
||||||
|
@ -6,13 +6,12 @@ from pywb.rewrite.rewrite_content import RewriteContent
|
|||||||
# ============================================================================
|
# ============================================================================
|
||||||
# Expiermental: not fully tested
|
# Expiermental: not fully tested
|
||||||
class RewriteContentAMF(RewriteContent): #pragma: no cover
|
class RewriteContentAMF(RewriteContent): #pragma: no cover
|
||||||
def handle_custom_rewrite(self, text_type, status_headers, stream, env):
|
def handle_custom_rewrite(self, rewritten_headers, stream, mod, env):
|
||||||
|
if rewritten_headers.status_headers.get_header('Content-Type') == 'application/x-amf':
|
||||||
if status_headers.get_header('Content-Type') == 'application/x-amf':
|
|
||||||
stream = self.rewrite_amf(stream, env)
|
stream = self.rewrite_amf(stream, env)
|
||||||
|
|
||||||
return (super(RewriteContentAMF, self).
|
return (super(RewriteContentAMF, self).
|
||||||
handle_custom_rewrite(text_type, status_headers, stream, env))
|
handle_custom_rewrite(rewritten_headers, stream, mod, env))
|
||||||
|
|
||||||
def rewrite_amf(self, stream, env):
|
def rewrite_amf(self, stream, env):
|
||||||
try:
|
try:
|
||||||
|
@ -118,11 +118,9 @@ class RewriteContent(object):
|
|||||||
urlkey,
|
urlkey,
|
||||||
cookie_rewriter)
|
cookie_rewriter)
|
||||||
|
|
||||||
status_headers = rewritten_headers.status_headers
|
res = self.handle_custom_rewrite(rewritten_headers,
|
||||||
|
|
||||||
res = self.handle_custom_rewrite(rewritten_headers.text_type,
|
|
||||||
status_headers,
|
|
||||||
stream,
|
stream,
|
||||||
|
wb_url.mod,
|
||||||
env)
|
env)
|
||||||
if res:
|
if res:
|
||||||
return res
|
return res
|
||||||
@ -131,6 +129,7 @@ class RewriteContent(object):
|
|||||||
# ====================================================================
|
# ====================================================================
|
||||||
# special case -- need to ungzip the body
|
# special case -- need to ungzip the body
|
||||||
|
|
||||||
|
status_headers = rewritten_headers.status_headers
|
||||||
text_type = rewritten_headers.text_type
|
text_type = rewritten_headers.text_type
|
||||||
|
|
||||||
# see known js/css modifier specified, the context should run
|
# see known js/css modifier specified, the context should run
|
||||||
@ -246,11 +245,18 @@ class RewriteContent(object):
|
|||||||
|
|
||||||
return (status_headers, gen, True)
|
return (status_headers, gen, True)
|
||||||
|
|
||||||
def handle_custom_rewrite(self, text_type, status_headers, stream, env):
|
def handle_custom_rewrite(self, rewritten_headers, stream, mod, env):
|
||||||
|
text_type = rewritten_headers.text_type
|
||||||
|
status_headers = rewritten_headers.status_headers
|
||||||
|
|
||||||
# use rewritten headers, but no further rewriting needed
|
# use rewritten headers, but no further rewriting needed
|
||||||
if text_type is None:
|
if text_type is None:
|
||||||
return (status_headers, self.stream_to_gen(stream), False)
|
return (status_headers, self.stream_to_gen(stream), False)
|
||||||
|
|
||||||
|
if text_type == 'plain' and not mod in ('js_', 'cs_'):
|
||||||
|
rewritten_headers.readd_rewrite_removed()
|
||||||
|
return (status_headers, self.stream_to_gen(stream), False)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_html_charset(buff, status_headers):
|
def _extract_html_charset(buff, status_headers):
|
||||||
charset = None
|
charset = None
|
||||||
|
@ -6,7 +6,7 @@ HTTP Headers Rewriting
|
|||||||
# Text with charset
|
# Text with charset
|
||||||
>>> _test_headers([('Date', 'Fri, 03 Jan 2014 03:03:21 GMT'), ('Content-Length', '5'), ('Content-Type', 'text/html;charset=UTF-8')])
|
>>> _test_headers([('Date', 'Fri, 03 Jan 2014 03:03:21 GMT'), ('Content-Length', '5'), ('Content-Type', 'text/html;charset=UTF-8')])
|
||||||
{'charset': 'utf-8',
|
{'charset': 'utf-8',
|
||||||
'removed_header_dict': {},
|
'removed_header_dict': {'content-length': '5'},
|
||||||
'status_headers': StatusAndHeaders(protocol = '', statusline = '200 OK', headers = [ ('X-Archive-Orig-Date', 'Fri, 03 Jan 2014 03:03:21 GMT'),
|
'status_headers': StatusAndHeaders(protocol = '', statusline = '200 OK', headers = [ ('X-Archive-Orig-Date', 'Fri, 03 Jan 2014 03:03:21 GMT'),
|
||||||
('X-Archive-Orig-Content-Length', '5'),
|
('X-Archive-Orig-Content-Length', '5'),
|
||||||
('Content-Type', 'text/html;charset=UTF-8')]),
|
('Content-Type', 'text/html;charset=UTF-8')]),
|
||||||
@ -24,9 +24,11 @@ HTTP Headers Rewriting
|
|||||||
>>> _test_headers([('Content-Length', '199999'), ('Content-Type', 'text/javascript'), ('Content-Encoding', 'gzip'), ('Transfer-Encoding', 'chunked')])
|
>>> _test_headers([('Content-Length', '199999'), ('Content-Type', 'text/javascript'), ('Content-Encoding', 'gzip'), ('Transfer-Encoding', 'chunked')])
|
||||||
{'charset': None,
|
{'charset': None,
|
||||||
'removed_header_dict': {'content-encoding': 'gzip',
|
'removed_header_dict': {'content-encoding': 'gzip',
|
||||||
|
'content-length': '199999',
|
||||||
'transfer-encoding': 'chunked'},
|
'transfer-encoding': 'chunked'},
|
||||||
'status_headers': StatusAndHeaders(protocol = '', statusline = '200 OK', headers = [ ('X-Archive-Orig-Content-Length', '199999'),
|
'status_headers': StatusAndHeaders(protocol = '', statusline = '200 OK', headers = [ ('X-Archive-Orig-Content-Length', '199999'),
|
||||||
('Content-Type', 'text/javascript'),
|
('Content-Type', 'text/javascript'),
|
||||||
|
('X-Archive-Orig-Content-Encoding', 'gzip'),
|
||||||
('X-Archive-Orig-Transfer-Encoding', 'chunked')]),
|
('X-Archive-Orig-Transfer-Encoding', 'chunked')]),
|
||||||
'text_type': 'js'}
|
'text_type': 'js'}
|
||||||
|
|
||||||
|
@ -125,7 +125,7 @@ class TestProxyLiveRewriter:
|
|||||||
|
|
||||||
def test_echo_proxy_start_unbounded_remove_range(self):
|
def test_echo_proxy_start_unbounded_remove_range(self):
|
||||||
headers = [('Range', 'bytes=0-')]
|
headers = [('Range', 'bytes=0-')]
|
||||||
resp = self.testapp.get('/rewrite/http://example.com/', headers=headers)
|
resp = self.testapp.get('/rewrite/http://httpbin.org/range/100', headers=headers)
|
||||||
|
|
||||||
# actual response is with range
|
# actual response is with range
|
||||||
assert resp.status_int == 206
|
assert resp.status_int == 206
|
||||||
@ -138,7 +138,7 @@ class TestProxyLiveRewriter:
|
|||||||
assert self.requestlog[0] == resp.text
|
assert self.requestlog[0] == resp.text
|
||||||
assert resp.headers['x-archive-orig-x-proxy'] == 'test'
|
assert resp.headers['x-archive-orig-x-proxy'] == 'test'
|
||||||
|
|
||||||
assert self.requestlog[0].startswith('GET http://example.com/ HTTP/1.1')
|
assert self.requestlog[0].startswith('GET http://httpbin.org/range/100 HTTP/1.1')
|
||||||
assert 'range: ' not in self.requestlog[0]
|
assert 'range: ' not in self.requestlog[0]
|
||||||
|
|
||||||
assert len(self.cache) == 0
|
assert len(self.cache) == 0
|
||||||
|
Loading…
x
Reference in New Issue
Block a user