diff --git a/pywb/framework/proxy.py b/pywb/framework/proxy.py index cdf49efc..471794fe 100644 --- a/pywb/framework/proxy.py +++ b/pywb/framework/proxy.py @@ -86,9 +86,10 @@ class ProxyRouter(object): self.unaltered = proxy_options.get('unaltered_replay', False) + self.proxy_cert_dl_view = proxy_options.get('proxy_cert_download_view') + if not proxy_options.get('enable_https_proxy'): self.ca = None - self.proxy_cert_dl_view = None return if not openssl_avail: # pragma: no cover @@ -96,7 +97,6 @@ class ProxyRouter(object): print('Please install via "pip install pyopenssl" ' + 'to enable HTTPS support') self.ca = None - self.proxy_cert_dl_view = None return # HTTPS Only Options @@ -113,8 +113,6 @@ class ProxyRouter(object): self.use_wildcard = proxy_options.get('use_wildcard_certs', True) - self.proxy_cert_dl_view = proxy_options.get('proxy_cert_download_view') - def __call__(self, env): is_https = (env['REQUEST_METHOD'] == 'CONNECT') @@ -161,7 +159,7 @@ class ProxyRouter(object): if env['pywb.proxy_host'] == self.magic_name: env['REL_REQUEST_URI'] = env['pywb.proxy_req_uri'] - # special case for proxy install + # special case for proxy install response = self.handle_cert_install(env) if response: return response @@ -332,6 +330,9 @@ class ProxyRouter(object): p12_path=self.CERT_DL_P12)) elif env['pywb.proxy_req_uri'] == self.CERT_DL_PEM: + if not self.ca: + return None + buff = '' with open(self.ca.ca_file) as fh: buff = fh.read() @@ -342,6 +343,9 @@ class ProxyRouter(object): content_type=content_type) elif env['pywb.proxy_req_uri'] == self.CERT_DL_P12: + if not self.ca: + return None + buff = self.ca.get_root_PKCS12() content_type = 'application/x-pkcs12' diff --git a/tests/test_config_proxy_http.yaml b/tests/test_config_proxy_http.yaml new file mode 100644 index 00000000..c99c49bd --- /dev/null +++ b/tests/test_config_proxy_http.yaml @@ -0,0 +1,21 @@ +collections: + all: + - ./sample_archive/cdx/iana.cdx + - ./sample_archive/cdx/dupes.cdx + - ./sample_archive/cdx/post-test.cdx + + older: + - ./sample_archive/cdx/iana.cdx + + newer: + - ./sample_archive/cdx/dupes.cdx + +archive_paths: ./sample_archive/warcs/ + +enable_http_proxy: true + +proxy_options: + enable_https_proxy: false + + cookie_resolver: true + use_default_coll: false diff --git a/tests/test_config_proxy.yaml b/tests/test_config_proxy_https.yaml similarity index 100% rename from tests/test_config_proxy.yaml rename to tests/test_config_proxy_https.yaml diff --git a/tests/test_proxy.py b/tests/test_proxy_http_auth.py similarity index 99% rename from tests/test_proxy.py rename to tests/test_proxy_http_auth.py index cee011af..b0229948 100644 --- a/tests/test_proxy.py +++ b/tests/test_proxy_http_auth.py @@ -7,7 +7,7 @@ from pywb.framework.wsgi_wrappers import init_app from pywb.cdx.cdxobject import CDXObject -class TestProxyWb: +class TestProxyHttpAuth: TEST_CONFIG = 'tests/test_config.yaml' def setup(self): diff --git a/tests/test_proxy_http_cookie.py b/tests/test_proxy_http_cookie.py new file mode 100644 index 00000000..af1bbd25 --- /dev/null +++ b/tests/test_proxy_http_cookie.py @@ -0,0 +1,200 @@ +from pywb.webapp.pywb_init import create_wb_router +from pywb.framework.wsgi_wrappers import init_app + +from wsgiref.simple_server import make_server + +from pywb.framework.proxy_resolvers import CookieResolver + +import threading +import requests +import shutil +import sys +import os + + +TEST_CONFIG = 'tests/test_config_proxy_http.yaml' + +server = None +sesh_key = None + +def setup_module(): + global server + server = ServeThread() + server.daemon = True + server.start() + + global session + session = requests.Session() + + +def teardown_module(): + try: + server.httpd.shutdown() + threading.current_thread().join(server) + except Exception: + pass + +class ServeThread(threading.Thread): + def __init__(self, *args, **kwargs): + super(ServeThread, self).__init__(*args, **kwargs) + self.app = init_app(create_wb_router, + load_yaml=True, + config_file=TEST_CONFIG) + + # init with port 0 to allow os to pick a port + self.httpd = make_server('', 0, self.app) + port = self.httpd.socket.getsockname()[1] + + proxy_str = 'http://localhost:' + str(port) + self.proxy_dict = {'http': proxy_str} + + def run(self, *args, **kwargs): + self.httpd.serve_forever() + + +class TestProxyHttpCookie: + def setup(self): + self.session = requests.Session() + + def get_url(self, url): + global sesh_key + if sesh_key: + self.session.headers.update({'Cookie': '__pywb_proxy_sesh=' + sesh_key}) + self.session.cookies.set('__pywb_proxy_sesh', sesh_key, domain='.pywb.proxy') + + return self.session.get(url, + proxies=server.proxy_dict) + + def post_url(self, url, data): + global sesh_key + if sesh_key: + self.session.headers.update({'Cookie': '__pywb_proxy_sesh=' + sesh_key}) + self.session.cookies.set('__pywb_proxy_sesh', sesh_key, domain='.pywb.proxy') + + return self.session.post(url, + data=data, + proxies=server.proxy_dict) + + def test_replay_no_coll(self): + resp = self.get_url('http://iana.org/') + assert resp.url == 'http://select.pywb.proxy/http://iana.org/' + assert resp.status_code == 200 + + def test_replay_set_older_coll(self): + resp = self.get_url('http://older-set.pywb.proxy/http://iana.org/') + assert resp.url == 'http://iana.org/' + assert resp.status_code == 200 + assert '20140126200624' in resp.text + + sesh1 = self.session.cookies.get('__pywb_proxy_sesh', domain='.pywb.proxy') + sesh2 = self.session.cookies.get('__pywb_proxy_sesh', domain='.iana.org') + assert sesh1 and sesh1 == sesh2, self.session.cookies + + # store session cookie + global sesh_key + sesh_key = sesh1 + + global sesh_key + sesh2 = self.session.cookies.get('__pywb_proxy_sesh', domain='.iana.org') + assert sesh_key == sesh2 + + def test_replay_same_coll(self): + resp = self.get_url('http://iana.org/') + assert resp.url == 'http://iana.org/' + assert resp.status_code == 200 + assert 'wbinfo.proxy_magic = "pywb.proxy";' in resp.text + assert '20140126200624' in resp.text + + def test_replay_set_change_coll(self): + resp = self.get_url('http://all-set.pywb.proxy/http://iana.org/') + assert resp.url == 'http://iana.org/' + assert resp.status_code == 200 + assert '20140127171238' in resp.text + + # verify still same session cookie + sesh2 = self.session.cookies.get('__pywb_proxy_sesh', domain='.iana.org') + global sesh_key + assert sesh_key == sesh2 + + def test_query(self): + resp = self.get_url('http://query.pywb.proxy/*/http://iana.org/') + assert resp.url == 'http://query.pywb.proxy/*/http://iana.org/' + assert resp.status_code == 200 + assert 'text/html' in resp.headers['content-type'] + assert '20140126200624' in resp.text + assert '20140127171238' in resp.text + assert '3 captures' in resp.text + + # testing via http here + def test_change_timestamp(self): + resp = self.get_url('http://query.pywb.proxy/20140126200624/http://iana.org/') + assert resp.url == 'http://iana.org/' + assert resp.status_code == 200 + assert '20140126200624' in resp.text + + def test_change_coll_same_ts(self): + resp = self.get_url('http://all-set.pywb.proxy/iana.org/') + assert resp.url == 'http://iana.org/' + assert resp.status_code == 200 + assert '20140126200624' in resp.text + + # testing via http here + def test_change_latest_ts(self): + resp = self.get_url('http://query.pywb.proxy/http://iana.org/?_=1234') + assert resp.url == 'http://iana.org/?_=1234' + assert resp.status_code == 200 + assert '20140127171238' in resp.text + + def test_diff_url(self): + resp = self.get_url('http://example.com/') + assert resp.url == 'http://example.com/' + assert '20140127171251' in resp.text + + def test_post_replay_all_coll(self): + resp = self.post_url('http://httpbin.org/post', data={'foo': 'bar', 'test': 'abc'}) + assert resp.url == 'http://httpbin.org/post' + assert 'application/json' in resp.headers['content-type'] + assert resp.status_code == 200 + + # Bounce back to select.pywb.proxy due to missing session + def test_clear_key(self): + # clear session key + global sesh_key + sesh_key = None + + def test_no_sesh_latest_bounce(self): + resp = self.get_url('http://query.pywb.proxy/http://iana.org/') + assert resp.url == 'http://select.pywb.proxy/http://iana.org/' + + def test_no_sesh_coll_change_bounce(self): + resp = self.get_url('http://auto.pywb.proxy/http://iana.org/') + assert resp.url == 'http://select.pywb.proxy/http://iana.org/' + + def test_no_sesh_ts_bounce(self): + resp = self.get_url('http://query.pywb.proxy/20140126200624/http://iana.org/') + assert resp.url == 'http://select.pywb.proxy/20140126200624/http://iana.org/' + + def test_no_sesh_query_bounce(self): + resp = self.get_url('http://query.pywb.proxy/*/http://iana.org/') + assert resp.url == 'http://select.pywb.proxy/http://query.pywb.proxy/*/http://iana.org/' + + # static replay + def test_replay_static(self): + resp = self.get_url('http://pywb.proxy/static/default/wb.js') + assert resp.status_code == 200 + assert 'function init_banner' in resp.text + + # download index page and cert downloads + def test_replay_dl_page(self): + resp = self.get_url('http://pywb.proxy/') + assert resp.status_code == 200 + assert 'text/html' in resp.headers['content-type'] + assert 'Sorry' in resp.text + + def test_dl_pem(self): + resp = self.get_url('http://pywb.proxy/pywb-ca.pem') + assert resp.status_code == 404 + + def test_dl_p12(self): + resp = self.get_url('http://pywb.proxy/pywb-ca.p12') + assert resp.status_code == 404 diff --git a/tests/test_proxy_https.py b/tests/test_proxy_https_cookie.py similarity index 98% rename from tests/test_proxy_https.py rename to tests/test_proxy_https_cookie.py index e13284a2..7ebf480a 100644 --- a/tests/test_proxy_https.py +++ b/tests/test_proxy_https_cookie.py @@ -14,7 +14,7 @@ import sys import os -TEST_CONFIG = 'tests/test_config_proxy.yaml' +TEST_CONFIG = 'tests/test_config_proxy_https.yaml' TEST_CA_DIR = './tests/pywb_test_certs' TEST_CA_ROOT = './tests/pywb_test_ca.pem' @@ -64,7 +64,7 @@ class ServeThread(threading.Thread): self.httpd.serve_forever() -class TestHttpsProxy: +class TestProxyHttpsCookie: def setup(self): self.session = requests.Session()