mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
proxy: add ProxyRouter wrapper to check for content-length and, if missing, perform full buffering (http1.0) or chunked encoding (http1.1) (separate from replay view buffering)
add tests for buffering and chunked encoding, fixes #143, also tests no banner url-rewrite only proxy related to #142
This commit is contained in:
parent
0c96591c49
commit
eeb35ea3b4
@ -10,12 +10,15 @@ import socket
|
||||
import ssl
|
||||
|
||||
from pywb.rewrite.url_rewriter import SchemeOnlyUrlRewriter
|
||||
from pywb.rewrite.rewrite_content import RewriteContent
|
||||
from pywb.utils.wbexception import BadRequestException
|
||||
|
||||
from pywb.utils.bufferedreaders import BufferedReader
|
||||
|
||||
from pywb.framework.proxy_resolvers import ProxyAuthResolver, CookieResolver, IPCacheResolver
|
||||
|
||||
from tempfile import SpooledTemporaryFile
|
||||
|
||||
|
||||
#=================================================================
|
||||
class ProxyArchivalRouter(ArchivalRouter):
|
||||
@ -55,6 +58,7 @@ class ProxyRouter(object):
|
||||
|
||||
BLOCK_SIZE = 4096
|
||||
DEF_MAGIC_NAME = 'pywb.proxy'
|
||||
BUFF_RESPONSE_MEM_SIZE = 1024*1024
|
||||
|
||||
CERT_DL_PEM = '/pywb-ca.pem'
|
||||
CERT_DL_P12 = '/pywb-ca.p12'
|
||||
@ -222,12 +226,63 @@ class ProxyRouter(object):
|
||||
wbrequest.wb_url.mod = 'uo_'
|
||||
|
||||
response = route.handler(wbrequest)
|
||||
if not response:
|
||||
return None
|
||||
|
||||
# add extra headers for replay responses
|
||||
if wbrequest.wb_url and wbrequest.wb_url.is_replay():
|
||||
response.status_headers.replace_headers(self.extra_headers)
|
||||
|
||||
# check for content-length
|
||||
res = response.status_headers.get_header('content-length')
|
||||
try:
|
||||
if int(res) > 0:
|
||||
return response
|
||||
except:
|
||||
pass
|
||||
|
||||
# need to either chunk or buffer to get content-length
|
||||
if env.get('SERVER_PROTOCOL') == 'HTTP/1.1':
|
||||
response.status_headers.remove_header('content-length')
|
||||
response.status_headers.headers.append(('Transfer-Encoding', 'chunked'))
|
||||
response.body = self._chunk_encode(response.body)
|
||||
else:
|
||||
response.body = self._buffer_response(response.status_headers,
|
||||
response.body)
|
||||
|
||||
return response
|
||||
|
||||
@staticmethod
|
||||
def _chunk_encode(orig_iter):
|
||||
for buff in orig_iter:
|
||||
chunk = bytes(buff)
|
||||
if not len(chunk):
|
||||
continue
|
||||
chunk_len = '%X\r\n' % len(chunk)
|
||||
yield chunk_len
|
||||
yield chunk
|
||||
yield '\r\n'
|
||||
|
||||
yield '0\r\n\r\n'
|
||||
|
||||
@staticmethod
|
||||
def _buffer_response(status_headers, iterator):
|
||||
out = SpooledTemporaryFile(ProxyRouter.BUFF_RESPONSE_MEM_SIZE)
|
||||
size = 0
|
||||
|
||||
for buff in iterator:
|
||||
buff = bytes(buff)
|
||||
size += len(buff)
|
||||
out.write(buff)
|
||||
|
||||
content_length_str = str(size)
|
||||
# remove existing content length
|
||||
status_headers.replace_header('Content-Length',
|
||||
content_length_str)
|
||||
|
||||
out.seek(0)
|
||||
return RewriteContent.stream_to_gen(out)
|
||||
|
||||
def get_request_socket(self, env):
|
||||
if not self.ca:
|
||||
return None
|
||||
@ -259,7 +314,8 @@ class ProxyRouter(object):
|
||||
return WbResponse.text_response('HTTPS Proxy Not Supported',
|
||||
'405 HTTPS Proxy Not Supported')
|
||||
|
||||
sock.send('HTTP/1.0 200 Connection Established\r\n')
|
||||
sock.send('HTTP/1.1 200 Connection Established\r\n')
|
||||
sock.send('Proxy-Connection: close\r\n')
|
||||
sock.send('Server: pywb proxy\r\n')
|
||||
sock.send('\r\n')
|
||||
|
||||
|
@ -203,7 +203,7 @@ def test_example_1():
|
||||
status_headers, buff = get_rewritten('http://example.com/', urlrewriter, req_headers={'Connection': 'close'})
|
||||
|
||||
# verify header rewriting
|
||||
assert (('X-Archive-Orig-Content-Length', '1270') in status_headers.headers), status_headers
|
||||
assert status_headers.get_header('x-archive-orig-content-length') == '1270', status_headers
|
||||
|
||||
|
||||
# verify utf-8 charset detection
|
||||
|
@ -187,12 +187,7 @@ class ReplayView(object):
|
||||
content_len = 0
|
||||
|
||||
if content_len <= 0:
|
||||
# if proxy mode, must set content-length (or use chunked)
|
||||
if wbrequest.options.get('is_proxy'):
|
||||
max_size = 0
|
||||
else:
|
||||
max_size = self.buffer_max_size
|
||||
|
||||
max_size = self.buffer_max_size
|
||||
response_iter = self.buffered_response(status_headers,
|
||||
response_iter,
|
||||
max_size)
|
||||
|
18
tests/test_config_proxy_no_banner.yaml
Normal file
18
tests/test_config_proxy_no_banner.yaml
Normal file
@ -0,0 +1,18 @@
|
||||
collections:
|
||||
all:
|
||||
- ./sample_archive/cdx/iana.cdx
|
||||
|
||||
archive_paths: ./sample_archive/warcs/
|
||||
|
||||
enable_http_proxy: true
|
||||
|
||||
buffer_response: false
|
||||
|
||||
proxy_options:
|
||||
enable_https_proxy: false
|
||||
|
||||
cookie_resolver: ip
|
||||
use_default_coll: all
|
||||
|
||||
use_banner: false
|
||||
use_client_rewrite: false
|
48
tests/test_proxy_http_no_banner.py
Normal file
48
tests/test_proxy_http_no_banner.py
Normal file
@ -0,0 +1,48 @@
|
||||
from pytest import raises
|
||||
import webtest
|
||||
import base64
|
||||
|
||||
from pywb.webapp.pywb_init import create_wb_router
|
||||
from pywb.framework.wsgi_wrappers import init_app
|
||||
from pywb.cdx.cdxobject import CDXObject
|
||||
|
||||
from urlparse import urlsplit
|
||||
|
||||
from server_mock import make_setup_module, BaseIntegration
|
||||
|
||||
setup_module = make_setup_module('tests/test_config_proxy_no_banner.yaml')
|
||||
|
||||
class TestProxyNoBanner(BaseIntegration):
|
||||
def get_url(self, uri, addr='127.0.0.1', server_protocol='HTTP/1.0'):
|
||||
parts = urlsplit(uri)
|
||||
env = dict(REQUEST_URI=uri, QUERY_STRING=parts.query, SCRIPT_NAME='',
|
||||
SERVER_PROTOCOL=server_protocol, REMOTE_ADDR=addr)
|
||||
# 'Simulating' proxy by settings REQUEST_URI explicitly to full url with empty SCRIPT_NAME
|
||||
return self.testapp.get('/x-ignore-this-x', extra_environ=env)
|
||||
|
||||
def test_proxy_chunked(self):
|
||||
resp = self.get_url('http://www.iana.org/_img/2013.1/icann-logo.svg', server_protocol='HTTP/1.1')
|
||||
assert resp.content_type == 'image/svg+xml'
|
||||
assert resp.headers['Transfer-Encoding'] == 'chunked'
|
||||
assert int(resp.headers['Content-Length']) == len(resp.body)
|
||||
|
||||
def test_proxy_buffered(self):
|
||||
resp = self.get_url('http://www.iana.org/_img/2013.1/icann-logo.svg', server_protocol='HTTP/1.0')
|
||||
assert resp.content_type == 'image/svg+xml'
|
||||
assert 'Transfer-Encoding' not in resp.headers
|
||||
assert int(resp.headers['Content-Length']) == len(resp.body)
|
||||
|
||||
def test_proxy_html_url_only_rewrite_buffered(self):
|
||||
resp = self.get_url('http://www.iana.org/', server_protocol='HTTP/1.0')
|
||||
assert 'Transfer-Encoding' not in resp.headers
|
||||
assert int(resp.headers['Content-Length']) == len(resp.body)
|
||||
|
||||
def test_proxy_js_url_only_rewrite_buffered(self):
|
||||
resp = self.get_url('http://www.iana.org/_js/2013.1/iana.js', server_protocol='HTTP/1.0')
|
||||
assert 'Transfer-Encoding' not in resp.headers
|
||||
assert int(resp.headers['Content-Length']) == len(resp.body)
|
||||
|
||||
def test_proxy_js_url_only_rewrite_chunked(self):
|
||||
resp = self.get_url('http://www.iana.org/_js/2013.1/iana.js', server_protocol='HTTP/1.1')
|
||||
assert resp.headers['Transfer-Encoding'] == 'chunked'
|
||||
assert int(resp.headers['Content-Length']) == len(resp.body)
|
Loading…
x
Reference in New Issue
Block a user