mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
- merge pywb.urlrewrite -> pywb.rewrite, remove obsolete stuff (rewrite_content.py, rewrite_live.py, dsrules.py) - move wbrequestresponse -> pywb.apps - move pywb.webapp.handlers -> pywb.apps.static_handler - remove pywb.webapp, pywb.framework packages - disable old header_rewriter, content_rewriter tests - finish renaming from previous warcserver refactor - all other tests passing!
167 lines
6.0 KiB
Python
167 lines
6.0 KiB
Python
"""
|
|
#=================================================================
|
|
HTTP Headers Rewriting
|
|
#=================================================================
|
|
|
|
# Text with charset
|
|
>>> _test_headers([('Date', 'Fri, 03 Jan 2014 03:03:21 GMT'), ('Content-Length', '5'), ('Content-Type', 'text/html;charset=UTF-8')])
|
|
{'charset': 'utf-8',
|
|
'removed_header_dict': {'content-length': '5'},
|
|
'status_headers': StatusAndHeaders(protocol = '', statusline = '200 OK', headers = [ ('X-Archive-Orig-Date', 'Fri, 03 Jan 2014 03:03:21 GMT'),
|
|
('X-Archive-Orig-Content-Length', '5'),
|
|
('Content-Type', 'text/html;charset=UTF-8')]),
|
|
'text_type': 'html'}
|
|
|
|
# Redirect
|
|
>>> _test_headers([('Connection', 'close'), ('Location', '/other.html')], '302 Redirect')
|
|
{'charset': None,
|
|
'removed_header_dict': {},
|
|
'status_headers': StatusAndHeaders(protocol = '', statusline = '302 Redirect', headers = [ ('X-Archive-Orig-Connection', 'close'),
|
|
('Location', '/web/20131010/http://example.com/other.html')]),
|
|
'text_type': None}
|
|
|
|
# gzip
|
|
>>> _test_headers([('Content-Length', '199999'), ('Content-Type', 'text/javascript'), ('Content-Encoding', 'gzip'), ('Transfer-Encoding', 'chunked')])
|
|
{'charset': None,
|
|
'removed_header_dict': {'content-encoding': 'gzip',
|
|
'content-length': '199999',
|
|
'transfer-encoding': 'chunked'},
|
|
'status_headers': StatusAndHeaders(protocol = '', statusline = '200 OK', headers = [ ('X-Archive-Orig-Content-Length', '199999'),
|
|
('Content-Type', 'text/javascript'),
|
|
('X-Archive-Orig-Content-Encoding', 'gzip'),
|
|
('X-Archive-Orig-Transfer-Encoding', 'chunked')]),
|
|
'text_type': 'js'}
|
|
|
|
# Binary -- transfer-encoding rewritten
|
|
>>> _test_headers([('Content-Length', '200000'), ('Content-Type', 'image/png'), ('Set-Cookie', 'foo=bar; Path=/;'), ('Content-Encoding', 'gzip'), ('Transfer-Encoding', 'chunked'), ('X-Proxy', 'test')])
|
|
{'charset': None,
|
|
'removed_header_dict': {'transfer-encoding': 'chunked'},
|
|
'status_headers': StatusAndHeaders(protocol = '', statusline = '200 OK', headers = [ ('Content-Length', '200000'),
|
|
('Content-Type', 'image/png'),
|
|
('Set-Cookie', 'foo=bar; Path=/web/20131010/http://example.com/'),
|
|
('Content-Encoding', 'gzip'),
|
|
('X-Archive-Orig-Transfer-Encoding', 'chunked'),
|
|
('X-Archive-Orig-X-Proxy', 'test')]),
|
|
'text_type': None}
|
|
|
|
"""
|
|
|
|
|
|
|
|
from pywb.rewrite.header_rewriter import HeaderRewriter
|
|
from pywb.rewrite.url_rewriter import UrlRewriter
|
|
from warcio.statusandheaders import StatusAndHeaders
|
|
|
|
from warcio.timeutils import datetime_to_http_date
|
|
from datetime import datetime
|
|
|
|
import pprint
|
|
from mock import patch
|
|
|
|
urlrewriter = UrlRewriter('20131010/http://example.com/', '/web/')
|
|
|
|
|
|
headerrewriter = HeaderRewriter()
|
|
|
|
def _repr_format(sh):
|
|
headers_str = pprint.pformat(sh.headers, indent=2, width=80)
|
|
return "StatusAndHeaders(protocol = '{0}', statusline = '{1}', \
|
|
headers = {2})".format(sh.protocol, sh.statusline, headers_str)
|
|
|
|
|
|
@patch('warcio.statusandheaders.StatusAndHeaders.__repr__', _repr_format)
|
|
def _test_headers(headers, status='200 OK', rewriter=urlrewriter):
|
|
rewritten = headerrewriter.rewrite(StatusAndHeaders(status, headers), rewriter, rewriter.get_cookie_rewriter())
|
|
return pprint.pprint(vars(rewritten))
|
|
|
|
|
|
def _test_head_data(headers, status='200 OK', rewriter=urlrewriter):
|
|
rewritten = headerrewriter.rewrite(StatusAndHeaders(status, headers),
|
|
rewriter,
|
|
rewriter.get_cookie_rewriter())
|
|
return rewritten.status_headers
|
|
|
|
|
|
|
|
def test_cookie_headers():
|
|
# cookie, host/origin rewriting
|
|
res = _test_head_data([('Connection', 'close'),
|
|
('Set-Cookie', 'foo=bar; Path=/; abc=def; Path=/somefile.html'),
|
|
('Host', 'example.com'),
|
|
('Origin', 'https://example.com')])
|
|
|
|
assert(('Set-Cookie', 'foo=bar; Path=/web/20131010/http://example.com/') in res.headers)
|
|
assert(('Set-Cookie', 'abc=def; Path=/web/20131010/http://example.com/somefile.html') in res.headers)
|
|
|
|
assert(('X-Archive-Orig-Connection', 'close') in res.headers)
|
|
assert(('X-Archive-Orig-Host', 'example.com') in res.headers)
|
|
assert(('X-Archive-Orig-Origin', 'https://example.com') in res.headers)
|
|
|
|
|
|
|
|
def _make_cache_headers():
|
|
cache_headers = [('Content-Length', '123'),
|
|
('Cache-Control', 'max-age=10'),
|
|
('Expires', datetime_to_http_date(datetime.now())),
|
|
('ETag', '123456')]
|
|
return cache_headers
|
|
|
|
|
|
def _test_proxy_headers(http_cache=None):
|
|
headers = _make_cache_headers()
|
|
status = '200 OK'
|
|
rewriter = UrlRewriter('20131010/http://example.com/', '/pywb/',
|
|
rewrite_opts={'http_cache': http_cache})
|
|
|
|
rewritten = headerrewriter.rewrite(StatusAndHeaders(status, headers),
|
|
rewriter,
|
|
rewriter.get_cookie_rewriter())
|
|
return rewritten.status_headers
|
|
|
|
|
|
def test_proxy_default():
|
|
res = _test_proxy_headers()
|
|
|
|
assert res.get_header('X-Archive-Orig-Cache-Control') != None
|
|
assert res.get_header('X-Archive-Orig-Expires') != None
|
|
assert res.get_header('X-Archive-Orig-ETag') != None
|
|
|
|
|
|
def test_proxy_pass():
|
|
res = _test_proxy_headers('pass')
|
|
|
|
assert res.get_header('Cache-Control') == 'max-age=10'
|
|
assert res.get_header('Expires') != None
|
|
assert res.get_header('ETag') != None
|
|
|
|
|
|
def test_proxy_set_age():
|
|
res = _test_proxy_headers('600')
|
|
|
|
assert res.get_header('Cache-Control') == 'max-age=600'
|
|
assert res.get_header('Expires') != None
|
|
assert res.get_header('ETag') == None
|
|
|
|
|
|
def test_proxy_zero():
|
|
res = _test_proxy_headers('0')
|
|
|
|
assert res.get_header('Cache-Control') == 'no-cache; no-store'
|
|
assert res.get_header('Expires') == None
|
|
assert res.get_header('ETag') == None
|
|
|
|
|
|
def test_proxy_not_num():
|
|
res = _test_proxy_headers('blah')
|
|
|
|
assert res.get_header('Cache-Control') == 'no-cache; no-store'
|
|
assert res.get_header('Expires') == None
|
|
assert res.get_header('ETag') == None
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import doctest
|
|
doctest.testmod()
|
|
|
|
|