1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00
pywb/tests/test_proxy_https_cookie.py
Ilya Kreymer 7feb0893eb rewrite: add 'application/json' to a seperate 'json' regex rewriter type (rewrite links only, no
http), can be customized via rules
wombat: add rewrite_style for rewriting style attrs
query: don't include any filter in latest, custom filter can be used
without any other filters
tests: fix typos in tests
2014-09-30 10:57:25 -07:00

218 lines
7.5 KiB
Python

import pytest
from pywb.webapp.pywb_init import create_wb_router
from pywb.framework.wsgi_wrappers import init_app
from wsgiref.simple_server import make_server
from pywb.framework.proxy_resolvers import CookieResolver
import threading
import requests
import shutil
import sys
import os
TEST_CONFIG = 'tests/test_config_proxy_https_cookie.yaml'
TEST_CA_DIR = './tests/pywb_test_certs'
TEST_CA_ROOT = './tests/pywb_test_ca.pem'
server = None
sesh_key = None
def setup_module():
openssl_support = pytest.importorskip("OpenSSL")
global server
server = ServeThread()
server.daemon = True
server.start()
global session
session = requests.Session()
def teardown_module():
try:
server.httpd.shutdown()
threading.current_thread().join(server)
except Exception:
pass
# delete test root and certs
shutil.rmtree(TEST_CA_DIR)
os.remove(TEST_CA_ROOT)
class ServeThread(threading.Thread):
def __init__(self, *args, **kwargs):
super(ServeThread, self).__init__(*args, **kwargs)
self.app = init_app(create_wb_router,
load_yaml=True,
config_file=TEST_CONFIG)
# init with port 0 to allow os to pick a port
self.httpd = make_server('', 0, self.app)
port = self.httpd.socket.getsockname()[1]
proxy_str = 'http://localhost:' + str(port)
self.proxy_dict = {'http': proxy_str, 'https': proxy_str}
def run(self, *args, **kwargs):
self.httpd.serve_forever()
class TestProxyHttpsCookie:
def setup(self):
self.session = requests.Session()
def get_url(self, url):
global sesh_key
if sesh_key:
self.session.headers.update({'Cookie': '__pywb_proxy_sesh=' + sesh_key})
self.session.cookies.set('__pywb_proxy_sesh', sesh_key, domain='.pywb.proxy')
return self.session.get(url,
proxies=server.proxy_dict,
verify=TEST_CA_ROOT)
def post_url(self, url, data):
global sesh_key
if sesh_key:
self.session.headers.update({'Cookie': '__pywb_proxy_sesh=' + sesh_key})
self.session.cookies.set('__pywb_proxy_sesh', sesh_key, domain='.pywb.proxy')
return self.session.post(url,
data=data,
proxies=server.proxy_dict,
verify=TEST_CA_ROOT)
def test_replay_no_coll(self):
resp = self.get_url('https://iana.org/')
assert resp.url == 'https://select.pywb.proxy/https://iana.org/'
assert resp.status_code == 200
def test_replay_set_older_coll(self):
resp = self.get_url('https://older-set.pywb.proxy/https://iana.org/')
assert resp.url == 'https://iana.org/'
assert resp.status_code == 200
assert '20140126200624' in resp.text
sesh1 = self.session.cookies.get('__pywb_proxy_sesh', domain='.pywb.proxy')
sesh2 = self.session.cookies.get('__pywb_proxy_sesh', domain='.iana.org')
assert sesh1 and sesh1 == sesh2, self.session.cookies
# store session cookie
global sesh_key
sesh_key = sesh1
sesh2 = self.session.cookies.get('__pywb_proxy_sesh', domain='.iana.org')
assert sesh_key == sesh2
def test_replay_same_coll(self):
resp = self.get_url('https://iana.org/')
assert resp.url == 'https://iana.org/'
assert resp.status_code == 200
assert 'wbinfo.proxy_magic = "pywb.proxy";' in resp.text
assert '20140126200624' in resp.text
def test_replay_set_change_coll(self):
resp = self.get_url('https://all-set.pywb.proxy/https://iana.org/')
assert resp.url == 'https://iana.org/'
assert resp.status_code == 200
assert '20140127171238' in resp.text
# verify still same session cookie
sesh2 = self.session.cookies.get('__pywb_proxy_sesh', domain='.iana.org')
global sesh_key
assert sesh_key == sesh2
def test_query(self):
resp = self.get_url('https://query.pywb.proxy/*/https://iana.org/')
assert resp.url == 'https://query.pywb.proxy/*/https://iana.org/'
assert resp.status_code == 200
assert 'text/html' in resp.headers['content-type']
assert '20140126200624' in resp.text
assert '20140127171238' in resp.text
assert '<b>3</b> captures' in resp.text
# testing via http here
def test_change_timestamp(self):
resp = self.get_url('http://query.pywb.proxy/20140126200624/http://iana.org/')
assert resp.url == 'http://iana.org/'
assert resp.status_code == 200
assert '20140126200624' in resp.text
def test_change_coll_same_ts(self):
resp = self.get_url('https://all-set.pywb.proxy/iana.org/')
assert resp.url == 'https://iana.org/'
assert resp.status_code == 200
assert '20140126200624' in resp.text
# testing via http here
def test_change_latest_ts(self):
resp = self.get_url('http://query.pywb.proxy/http://iana.org/?_=1234')
assert resp.url == 'http://iana.org/?_=1234'
assert resp.status_code == 200
assert '20140127171238' in resp.text
def test_diff_url(self):
resp = self.get_url('https://example.com/')
assert resp.url == 'https://example.com/'
assert '20140127171251' in resp.text
@pytest.mark.skipif(sys.version_info < (2,7),
reason="doesn't work in 2.6")
def test_post_replay_all_coll(self):
resp = self.post_url('https://httpbin.org/post', data={'foo': 'bar', 'test': 'abc'})
assert resp.url == 'https://httpbin.org/post'
assert 'application/json' in resp.headers['content-type']
assert resp.status_code == 200
# Bounce back to select.pywb.proxy due to missing session
def test_clear_key(self):
# clear session key
global sesh_key
sesh_key = None
def test_no_sesh_latest_bounce(self):
resp = self.get_url('https://query.pywb.proxy/https://iana.org/')
assert resp.url == 'https://select.pywb.proxy/https://iana.org/'
def test_no_sesh_coll_change_bounce(self):
resp = self.get_url('https://auto.pywb.proxy/https://iana.org/')
assert resp.url == 'https://select.pywb.proxy/https://iana.org/'
def test_no_sesh_ts_bounce(self):
resp = self.get_url('https://query.pywb.proxy/20140126200624/https://iana.org/')
assert resp.url == 'https://select.pywb.proxy/20140126200624/https://iana.org/'
def test_no_sesh_query_bounce(self):
resp = self.get_url('https://query.pywb.proxy/*/https://iana.org/')
assert resp.url == 'https://select.pywb.proxy/https://query.pywb.proxy/*/https://iana.org/'
# static replay
def test_replay_static(self):
resp = self.get_url('https://pywb.proxy/static/default/wb.js')
assert resp.status_code == 200
assert 'function init_banner' in resp.text
# download index page and cert downloads
def test_replay_dl_page(self):
resp = self.get_url('https://pywb.proxy/')
assert resp.status_code == 200
assert 'text/html' in resp.headers['content-type']
assert 'Download' in resp.text
def test_dl_pem(self):
resp = self.get_url('https://pywb.proxy/pywb-ca.pem')
assert resp.headers['content-type'] == 'application/x-x509-ca-cert'
def test_dl_p12(self):
resp = self.get_url('https://pywb.proxy/pywb-ca.p12')
assert resp.headers['content-type'] == 'application/x-pkcs12'