mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
Proxy Mode Support (#244)
proxy mode support readded! - use wsgiprox wrapper in FrontEndApp.init_proxy() with fixed collection prefix, ca options - cli --proxy <coll> flag added to specify proxy collection - cleanup: remove cookie rw (already disabled), fix post handling paths - headers: ensure request headers are not rewritten when in proxy mode, response headers marked with 'url-rewrite' also no rewritten if no url rewrite/proxy mode - urlrewriter: add IdentityRewriter with no rewriting as default, instead of SchemeOnlyUrlRewriter - memento support: for now, only include rel="original" and Memento-Datetime in for proxy replay response - responseloader: disable urllib3 unsecure response warnings - tests: add test for proxy replay and proxy record/replay of new collection
This commit is contained in:
parent
bbbb62ad52
commit
925f8337a5
@ -34,6 +34,8 @@ class BaseCli(object):
|
|||||||
parser.add_argument('--debug', action='store_true')
|
parser.add_argument('--debug', action='store_true')
|
||||||
parser.add_argument('--profile', action='store_true')
|
parser.add_argument('--profile', action='store_true')
|
||||||
|
|
||||||
|
parser.add_argument('--proxy', help='Enable HTTP/S Proxy on specified collection')
|
||||||
|
|
||||||
parser.add_argument('--live', action='store_true', help='Add /live handler')
|
parser.add_argument('--live', action='store_true', help='Add /live handler')
|
||||||
|
|
||||||
self.desc = desc
|
self.desc = desc
|
||||||
@ -48,6 +50,9 @@ class BaseCli(object):
|
|||||||
|
|
||||||
self.application = self.load()
|
self.application = self.load()
|
||||||
|
|
||||||
|
if self.r.proxy:
|
||||||
|
self.application = self.application.init_proxy(self.r.proxy)
|
||||||
|
|
||||||
if self.r.profile:
|
if self.r.profile:
|
||||||
from werkzeug.contrib.profiler import ProfilerMiddleware
|
from werkzeug.contrib.profiler import ProfilerMiddleware
|
||||||
self.application = ProfilerMiddleware(self.application)
|
self.application = ProfilerMiddleware(self.application)
|
||||||
|
@ -8,6 +8,7 @@ from six.moves.urllib.parse import urljoin
|
|||||||
from six import iteritems
|
from six import iteritems
|
||||||
|
|
||||||
from warcio.utils import to_native_str
|
from warcio.utils import to_native_str
|
||||||
|
from wsgiprox.wsgiprox import WSGIProxMiddleware
|
||||||
|
|
||||||
from pywb.recorder.multifilewarcwriter import MultiFileWARCWriter
|
from pywb.recorder.multifilewarcwriter import MultiFileWARCWriter
|
||||||
from pywb.recorder.recorderapp import RecorderApp
|
from pywb.recorder.recorderapp import RecorderApp
|
||||||
@ -202,7 +203,6 @@ class FrontEndApp(object):
|
|||||||
metadata = self.get_metadata(coll)
|
metadata = self.get_metadata(coll)
|
||||||
if record:
|
if record:
|
||||||
metadata['type'] = 'record'
|
metadata['type'] = 'record'
|
||||||
print('RECORD')
|
|
||||||
|
|
||||||
if timemap_output:
|
if timemap_output:
|
||||||
metadata['output'] = timemap_output
|
metadata['output'] = timemap_output
|
||||||
@ -304,6 +304,17 @@ class FrontEndApp(object):
|
|||||||
app_server = GeventServer(app, port=port, hostname='0.0.0.0')
|
app_server = GeventServer(app, port=port, hostname='0.0.0.0')
|
||||||
return app_server
|
return app_server
|
||||||
|
|
||||||
|
def init_proxy(self, proxy_coll, opts=None):
|
||||||
|
if not opts:
|
||||||
|
opts = {'ca_name': 'pywb HTTPS Proxy CA',
|
||||||
|
'ca_file_cache': os.path.join('proxy-certs', 'pywb-ca.pem')}
|
||||||
|
|
||||||
|
prefix = '/{0}/bn_/'.format(proxy_coll)
|
||||||
|
|
||||||
|
return WSGIProxMiddleware(self, prefix,
|
||||||
|
proxy_host='pywb.proxy',
|
||||||
|
proxy_options=opts)
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
class MetadataCache(object):
|
class MetadataCache(object):
|
||||||
|
@ -6,7 +6,7 @@ from six.moves.urllib.parse import urlencode, urlsplit, urlunsplit
|
|||||||
from pywb.rewrite.default_rewriter import DefaultRewriter, RewriterWithJSProxy
|
from pywb.rewrite.default_rewriter import DefaultRewriter, RewriterWithJSProxy
|
||||||
|
|
||||||
from pywb.rewrite.wburl import WbUrl
|
from pywb.rewrite.wburl import WbUrl
|
||||||
from pywb.rewrite.url_rewriter import UrlRewriter, SchemeOnlyUrlRewriter
|
from pywb.rewrite.url_rewriter import UrlRewriter, IdentityUrlRewriter
|
||||||
|
|
||||||
from pywb.utils.wbexception import WbException
|
from pywb.utils.wbexception import WbException
|
||||||
from pywb.utils.canonicalize import canonicalize
|
from pywb.utils.canonicalize import canonicalize
|
||||||
@ -122,18 +122,18 @@ class RewriterApp(object):
|
|||||||
rel_prefix = self.get_rel_prefix(environ)
|
rel_prefix = self.get_rel_prefix(environ)
|
||||||
full_prefix = host_prefix + rel_prefix
|
full_prefix = host_prefix + rel_prefix
|
||||||
|
|
||||||
|
is_proxy = ('wsgiprox.proxy_host' in environ)
|
||||||
|
|
||||||
response = self.handle_custom_response(environ, wb_url,
|
response = self.handle_custom_response(environ, wb_url,
|
||||||
full_prefix, host_prefix,
|
full_prefix, host_prefix,
|
||||||
kwargs)
|
kwargs)
|
||||||
|
|
||||||
if response:
|
if response:
|
||||||
return self.format_response(response, wb_url, full_prefix, is_timegate)
|
return self.format_response(response, wb_url, full_prefix, is_timegate, is_proxy)
|
||||||
|
|
||||||
is_proxy = ('wsgiprox.proxy_host' in environ)
|
|
||||||
|
|
||||||
if is_proxy:
|
if is_proxy:
|
||||||
environ['pywb_proxy_magic'] = environ['wsgiprox.proxy_host']
|
environ['pywb_proxy_magic'] = environ['wsgiprox.proxy_host']
|
||||||
urlrewriter = SchemeOnlyUrlRewriter(wb_url, '')
|
urlrewriter = IdentityUrlRewriter(wb_url, '')
|
||||||
framed_replay = False
|
framed_replay = False
|
||||||
|
|
||||||
else:
|
else:
|
||||||
@ -293,24 +293,18 @@ class RewriterApp(object):
|
|||||||
if not is_ajax and self.enable_memento:
|
if not is_ajax and self.enable_memento:
|
||||||
self._add_memento_links(cdx['url'], full_prefix,
|
self._add_memento_links(cdx['url'], full_prefix,
|
||||||
memento_dt, cdx['timestamp'], status_headers,
|
memento_dt, cdx['timestamp'], status_headers,
|
||||||
is_timegate)
|
is_timegate, is_proxy)
|
||||||
|
|
||||||
set_content_loc = True
|
set_content_loc = True
|
||||||
|
|
||||||
if set_content_loc:
|
if set_content_loc:
|
||||||
status_headers.headers.append(('Content-Location', urlrewriter.get_new_url(timestamp=cdx['timestamp'],
|
status_headers.headers.append(('Content-Location', urlrewriter.get_new_url(timestamp=cdx['timestamp'],
|
||||||
url=cdx['url'])))
|
url=cdx['url'])))
|
||||||
#gen = buffer_iter(status_headers, gen)
|
|
||||||
response = WbResponse(status_headers, gen)
|
response = WbResponse(status_headers, gen)
|
||||||
|
|
||||||
if is_proxy:
|
|
||||||
response.status_headers.remove_header('Content-Security-Policy-Report-Only')
|
|
||||||
response.status_headers.remove_header('Content-Security-Policy')
|
|
||||||
response.status_headers.remove_header('X-Frame-Options')
|
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
def format_response(self, response, wb_url, full_prefix, is_timegate):
|
def format_response(self, response, wb_url, full_prefix, is_timegate, is_proxy):
|
||||||
memento_ts = None
|
memento_ts = None
|
||||||
if not isinstance(response, WbResponse):
|
if not isinstance(response, WbResponse):
|
||||||
content_type = 'text/html'
|
content_type = 'text/html'
|
||||||
@ -324,11 +318,11 @@ class RewriterApp(object):
|
|||||||
response = WbResponse.text_response(response, content_type=content_type)
|
response = WbResponse.text_response(response, content_type=content_type)
|
||||||
|
|
||||||
self._add_memento_links(wb_url.url, full_prefix, None, memento_ts,
|
self._add_memento_links(wb_url.url, full_prefix, None, memento_ts,
|
||||||
response.status_headers, is_timegate)
|
response.status_headers, is_timegate, is_proxy)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
def _add_memento_links(self, url, full_prefix, memento_dt, memento_ts,
|
def _add_memento_links(self, url, full_prefix, memento_dt, memento_ts,
|
||||||
status_headers, is_timegate):
|
status_headers, is_timegate, is_proxy):
|
||||||
|
|
||||||
# memento url + header
|
# memento url + header
|
||||||
if not memento_dt and memento_ts:
|
if not memento_dt and memento_ts:
|
||||||
@ -337,17 +331,21 @@ class RewriterApp(object):
|
|||||||
if memento_dt:
|
if memento_dt:
|
||||||
status_headers.headers.append(('Memento-Datetime', memento_dt))
|
status_headers.headers.append(('Memento-Datetime', memento_dt))
|
||||||
|
|
||||||
memento_url = full_prefix + memento_ts + self.replay_mod
|
if is_proxy:
|
||||||
memento_url += '/' + url
|
memento_url = url
|
||||||
|
else:
|
||||||
|
memento_url = full_prefix + memento_ts + self.replay_mod
|
||||||
|
memento_url += '/' + url
|
||||||
else:
|
else:
|
||||||
memento_url = None
|
memento_url = None
|
||||||
|
|
||||||
timegate_url, timemap_url = self._get_timegate_timemap(url, full_prefix)
|
timegate_url, timemap_url = self._get_timegate_timemap(url, full_prefix)
|
||||||
|
|
||||||
link = []
|
link = []
|
||||||
link.append(MementoUtils.make_link(url, 'original'))
|
if not is_proxy:
|
||||||
link.append(MementoUtils.make_link(timegate_url, 'timegate'))
|
link.append(MementoUtils.make_link(url, 'original'))
|
||||||
link.append(MementoUtils.make_link(timemap_url, 'timemap'))
|
link.append(MementoUtils.make_link(timegate_url, 'timegate'))
|
||||||
|
link.append(MementoUtils.make_link(timemap_url, 'timemap'))
|
||||||
|
|
||||||
if memento_dt:
|
if memento_dt:
|
||||||
link.append(MementoUtils.make_memento_link(memento_url, 'memento', memento_dt))
|
link.append(MementoUtils.make_memento_link(memento_url, 'memento', memento_dt))
|
||||||
|
@ -34,6 +34,7 @@ class DefaultHeaderRewriter(object):
|
|||||||
'content-md5': 'prefix',
|
'content-md5': 'prefix',
|
||||||
'content-range': 'keep',
|
'content-range': 'keep',
|
||||||
'content-security-policy': 'prefix',
|
'content-security-policy': 'prefix',
|
||||||
|
'content-security-policy-report-only': 'prefix',
|
||||||
'content-type': 'keep',
|
'content-type': 'keep',
|
||||||
|
|
||||||
'date': 'keep',
|
'date': 'keep',
|
||||||
@ -102,7 +103,10 @@ class DefaultHeaderRewriter(object):
|
|||||||
return (name, value)
|
return (name, value)
|
||||||
|
|
||||||
elif rule == 'url-rewrite':
|
elif rule == 'url-rewrite':
|
||||||
return (name, self.rwinfo.url_rewriter.rewrite(value))
|
if self.rwinfo.is_url_rw():
|
||||||
|
return (name, self.rwinfo.url_rewriter.rewrite(value))
|
||||||
|
else:
|
||||||
|
return (name, value)
|
||||||
|
|
||||||
elif rule == 'prefix-if-content-rewrite':
|
elif rule == 'prefix-if-content-rewrite':
|
||||||
if self.rwinfo.is_content_rw:
|
if self.rwinfo.is_content_rw:
|
||||||
|
@ -19,9 +19,14 @@ class RewriteInputRequest(DirectWSGIInputRequest):
|
|||||||
self.rewriter = rewriter
|
self.rewriter = rewriter
|
||||||
self.extra_cookie = None
|
self.extra_cookie = None
|
||||||
|
|
||||||
self.splits = urlsplit(self.url)
|
is_proxy = ('wsgiprox.proxy_host' in env)
|
||||||
|
|
||||||
|
self.splits = urlsplit(self.url) if not is_proxy else None
|
||||||
|
|
||||||
def get_full_request_uri(self):
|
def get_full_request_uri(self):
|
||||||
|
if not self.splits:
|
||||||
|
return self.url
|
||||||
|
|
||||||
uri = self.splits.path
|
uri = self.splits.path
|
||||||
if not uri:
|
if not uri:
|
||||||
uri = '/'
|
uri = '/'
|
||||||
@ -39,17 +44,20 @@ class RewriteInputRequest(DirectWSGIInputRequest):
|
|||||||
for name, value in iteritems(self.env):
|
for name, value in iteritems(self.env):
|
||||||
if name == 'HTTP_HOST':
|
if name == 'HTTP_HOST':
|
||||||
name = 'Host'
|
name = 'Host'
|
||||||
value = self.splits.netloc
|
if self.splits:
|
||||||
|
value = self.splits.netloc
|
||||||
|
|
||||||
elif name == 'HTTP_ORIGIN':
|
elif name == 'HTTP_ORIGIN':
|
||||||
name = 'Origin'
|
name = 'Origin'
|
||||||
value = (self.splits.scheme + '://' + self.splits.netloc)
|
if self.splits:
|
||||||
|
value = (self.splits.scheme + '://' + self.splits.netloc)
|
||||||
|
|
||||||
elif name == 'HTTP_X_CSRFTOKEN':
|
elif name == 'HTTP_X_CSRFTOKEN':
|
||||||
name = 'X-CSRFToken'
|
name = 'X-CSRFToken'
|
||||||
cookie_val = extract_client_cookie(self.env, 'csrftoken')
|
if self.splits:
|
||||||
if cookie_val:
|
cookie_val = extract_client_cookie(self.env, 'csrftoken')
|
||||||
value = cookie_val
|
if cookie_val:
|
||||||
|
value = cookie_val
|
||||||
|
|
||||||
elif name == 'HTTP_X_PYWB_REQUESTED_WITH':
|
elif name == 'HTTP_X_PYWB_REQUESTED_WITH':
|
||||||
continue
|
continue
|
||||||
@ -62,12 +70,8 @@ class RewriteInputRequest(DirectWSGIInputRequest):
|
|||||||
|
|
||||||
elif name == 'HTTP_X_FORWARDED_PROTO':
|
elif name == 'HTTP_X_FORWARDED_PROTO':
|
||||||
name = 'X-Forwarded-Proto'
|
name = 'X-Forwarded-Proto'
|
||||||
value = self.splits.scheme
|
if self.splits:
|
||||||
|
value = self.splits.scheme
|
||||||
elif name == 'HTTP_COOKIE':
|
|
||||||
name = 'Cookie'
|
|
||||||
value = self._req_cookie_rewrite(value)
|
|
||||||
has_cookies = True
|
|
||||||
|
|
||||||
elif name.startswith('HTTP_'):
|
elif name.startswith('HTTP_'):
|
||||||
name = name[5:].title().replace('_', '-')
|
name = name[5:].title().replace('_', '-')
|
||||||
@ -81,31 +85,11 @@ class RewriteInputRequest(DirectWSGIInputRequest):
|
|||||||
if value:
|
if value:
|
||||||
headers[name] = value
|
headers[name] = value
|
||||||
|
|
||||||
if not has_cookies:
|
|
||||||
value = self._req_cookie_rewrite('')
|
|
||||||
if value:
|
|
||||||
headers['Cookie'] = value
|
|
||||||
|
|
||||||
if self.extra_cookie:
|
if self.extra_cookie:
|
||||||
headers['Cookie'] = self.extra_cookie + ';' + headers.get('Cookie', '')
|
headers['Cookie'] = self.extra_cookie + ';' + headers.get('Cookie', '')
|
||||||
|
|
||||||
return headers
|
return headers
|
||||||
|
|
||||||
def _req_cookie_rewrite(self, value):
|
|
||||||
return value
|
|
||||||
|
|
||||||
rule = self.rewriter.ruleset.get_first_match(self.urlkey)
|
|
||||||
if not rule or not rule.req_cookie_rewrite:
|
|
||||||
return value
|
|
||||||
|
|
||||||
for cr in rule.req_cookie_rewrite:
|
|
||||||
try:
|
|
||||||
value = cr['rx'].sub(cr['replace'], value)
|
|
||||||
except KeyError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return value
|
|
||||||
|
|
||||||
def extract_range(self):
|
def extract_range(self):
|
||||||
use_206 = False
|
use_206 = False
|
||||||
start = None
|
start = None
|
||||||
|
@ -161,7 +161,28 @@ class UrlRewriter(object):
|
|||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class SchemeOnlyUrlRewriter(UrlRewriter):
|
class IdentityUrlRewriter(UrlRewriter):
|
||||||
|
"""
|
||||||
|
No rewriting performed, return original url
|
||||||
|
"""
|
||||||
|
def rewrite(self, url, mod=None):
|
||||||
|
return url
|
||||||
|
|
||||||
|
def get_new_url(self, **kwargs):
|
||||||
|
return kwargs.get('url', self.wburl.url)
|
||||||
|
|
||||||
|
def rebase_rewriter(self, new_url):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def get_cookie_rewriter(self, scope=None):
|
||||||
|
return None
|
||||||
|
|
||||||
|
def deprefix_url(self):
|
||||||
|
return self.wburl.url
|
||||||
|
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
class SchemeOnlyUrlRewriter(IdentityUrlRewriter):
|
||||||
"""
|
"""
|
||||||
A url rewriter which ensures that any urls have the same
|
A url rewriter which ensures that any urls have the same
|
||||||
scheme (http or https) as the base url.
|
scheme (http or https) as the base url.
|
||||||
@ -182,14 +203,3 @@ class SchemeOnlyUrlRewriter(UrlRewriter):
|
|||||||
|
|
||||||
return url
|
return url
|
||||||
|
|
||||||
def get_new_url(self, **kwargs):
|
|
||||||
return kwargs.get('url', self.wburl.url)
|
|
||||||
|
|
||||||
def rebase_rewriter(self, new_url):
|
|
||||||
return self
|
|
||||||
|
|
||||||
def get_cookie_rewriter(self, scope=None):
|
|
||||||
return None
|
|
||||||
|
|
||||||
def deprefix_url(self):
|
|
||||||
return self.wburl.url
|
|
||||||
|
@ -337,13 +337,6 @@ rules:
|
|||||||
- match: 'yt\.setConfig.*PLAYER_CONFIG.*args":\s*{'
|
- match: 'yt\.setConfig.*PLAYER_CONFIG.*args":\s*{'
|
||||||
replace: '{0} "dash": "0", dashmpd: "", '
|
replace: '{0} "dash": "0", dashmpd: "", '
|
||||||
|
|
||||||
req_cookie_rewrite:
|
|
||||||
- match: '^(((?!PREF).)*)$'
|
|
||||||
replace: '\1; PREF=f2=40000000'
|
|
||||||
|
|
||||||
- match: '(.*PREF=)([^ ;]*)(.*)'
|
|
||||||
replace: '\1&f2=40000000\3'
|
|
||||||
|
|
||||||
# testing rules -- not for valid domain
|
# testing rules -- not for valid domain
|
||||||
#=================================================================
|
#=================================================================
|
||||||
# this rule block is a non-existent prefix merely for testing
|
# this rule block is a non-existent prefix merely for testing
|
||||||
@ -376,17 +369,6 @@ rules:
|
|||||||
rewrite:
|
rewrite:
|
||||||
js_rewrite_location: urls
|
js_rewrite_location: urls
|
||||||
|
|
||||||
req_cookie_rewrite:
|
|
||||||
- match: '^(((?!FOO).)*)$'
|
|
||||||
replace: '\1; FOO=bar=1'
|
|
||||||
|
|
||||||
- match: '(.*FOO=)([^ ;]*)(.*)'
|
|
||||||
replace: '\1&bar=1\3'
|
|
||||||
|
|
||||||
- match: ''
|
|
||||||
invalid_: ''
|
|
||||||
|
|
||||||
|
|
||||||
# all domain rules -- fallback to this dataset
|
# all domain rules -- fallback to this dataset
|
||||||
#=================================================================
|
#=================================================================
|
||||||
# Applies to all urls -- should be last
|
# Applies to all urls -- should be last
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
from requests.adapters import HTTPAdapter
|
from requests.adapters import HTTPAdapter
|
||||||
|
import requests
|
||||||
|
|
||||||
class DefaultAdapters(object):
|
class DefaultAdapters(object):
|
||||||
live_adapter = HTTPAdapter(max_retries=3)
|
live_adapter = HTTPAdapter(max_retries=3)
|
||||||
remote_adapter = HTTPAdapter(max_retries=3)
|
remote_adapter = HTTPAdapter(max_retries=3)
|
||||||
|
|
||||||
|
requests.packages.urllib3.disable_warnings()
|
||||||
|
|
||||||
|
@ -3,8 +3,8 @@ from warcio.statusandheaders import StatusAndHeadersParser
|
|||||||
|
|
||||||
from warcio.utils import to_native_str
|
from warcio.utils import to_native_str
|
||||||
|
|
||||||
from six.moves.urllib.parse import urlsplit, quote, unquote_plus
|
from six.moves.urllib.parse import urlsplit, quote, unquote_plus, urlencode
|
||||||
from six import iteritems, StringIO
|
from six import iteritems, StringIO, PY3
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
@ -230,7 +230,7 @@ class PostQueryExtractor(object):
|
|||||||
environ=env,
|
environ=env,
|
||||||
keep_blank_values=True)
|
keep_blank_values=True)
|
||||||
|
|
||||||
if six.PY3:
|
if PY3:
|
||||||
args['encoding'] = 'utf-8'
|
args['encoding'] = 'utf-8'
|
||||||
|
|
||||||
data = cgi.FieldStorage(**args)
|
data = cgi.FieldStorage(**args)
|
||||||
|
@ -12,3 +12,4 @@ webencodings
|
|||||||
gevent==1.2.2
|
gevent==1.2.2
|
||||||
webassets==0.12.1
|
webassets==0.12.1
|
||||||
portalocker
|
portalocker
|
||||||
|
wsgiprox>=1.4.1
|
||||||
|
101
tests/test_proxy.py
Normal file
101
tests/test_proxy.py
Normal file
@ -0,0 +1,101 @@
|
|||||||
|
from pywb.warcserver.test.testutils import BaseTestClass, TempDirTests
|
||||||
|
|
||||||
|
from .base_config_test import CollsDirMixin
|
||||||
|
from pywb.utils.geventserver import GeventServer
|
||||||
|
from pywb.apps.frontendapp import FrontEndApp
|
||||||
|
from pywb.manager.manager import main as manager
|
||||||
|
|
||||||
|
import os
|
||||||
|
import requests
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
@pytest.fixture(params=['http', 'https'])
|
||||||
|
def scheme(request):
|
||||||
|
return request.param
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
class BaseTestProxy(TempDirTests, BaseTestClass):
|
||||||
|
@classmethod
|
||||||
|
def setup_class(cls, coll='pywb', config_file='config_test.yaml'):
|
||||||
|
super(BaseTestProxy, cls).setup_class()
|
||||||
|
config_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), config_file)
|
||||||
|
|
||||||
|
cls.root_ca_file = os.path.join(cls.root_dir, 'pywb-ca-test.pem')
|
||||||
|
|
||||||
|
cls.app = FrontEndApp(config_file=config_file)
|
||||||
|
opts = {'ca_name': 'pywb HTTPS Proxy CA',
|
||||||
|
'ca_file_cache': cls.root_ca_file}
|
||||||
|
|
||||||
|
cls.proxy_app = cls.app.init_proxy(coll, opts)
|
||||||
|
|
||||||
|
cls.server = GeventServer(cls.proxy_app)
|
||||||
|
cls.proxies = cls.proxy_dict(cls.server.port)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def teardown_class(cls):
|
||||||
|
cls.server.stop()
|
||||||
|
|
||||||
|
super(BaseTestProxy, cls).teardown_class()
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def proxy_dict(cls, port, host='localhost'):
|
||||||
|
return {'http': 'http://{0}:{1}'.format(host, port),
|
||||||
|
'https': 'https://{0}:{1}'.format(host, port)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
class TestProxy(BaseTestProxy):
|
||||||
|
def test_proxy_replay(self, scheme):
|
||||||
|
res = requests.get('{0}://example.com/'.format(scheme),
|
||||||
|
proxies=self.proxies,
|
||||||
|
verify=self.root_ca_file)
|
||||||
|
|
||||||
|
assert 'WB Insert' in res.text
|
||||||
|
assert 'Example Domain' in res.text
|
||||||
|
|
||||||
|
assert res.headers['Link'] == '<http://example.com>; rel="memento"; datetime="Mon, 27 Jan 2014 17:12:51 GMT"'
|
||||||
|
assert res.headers['Memento-Datetime'] == 'Mon, 27 Jan 2014 17:12:51 GMT'
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
class TestRecordingProxy(CollsDirMixin, BaseTestProxy):
|
||||||
|
@classmethod
|
||||||
|
def setup_class(cls, coll='pywb', config_file='config_test.yaml'):
|
||||||
|
super(TestRecordingProxy, cls).setup_class('test/record', 'config_test_record.yaml')
|
||||||
|
manager(['init', 'test'])
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def teardown_class(cls):
|
||||||
|
if cls.app.recorder:
|
||||||
|
cls.app.recorder.writer.close()
|
||||||
|
super(TestRecordingProxy, cls).teardown_class()
|
||||||
|
|
||||||
|
def test_proxy_record(self, scheme):
|
||||||
|
archive_dir = os.path.join(self.root_dir, '_test_colls', 'test', 'archive')
|
||||||
|
assert os.path.isdir(archive_dir)
|
||||||
|
|
||||||
|
res = requests.get('{0}://httpbin.org/'.format(scheme),
|
||||||
|
proxies=self.proxies,
|
||||||
|
verify=self.root_ca_file)
|
||||||
|
|
||||||
|
assert 'is_live = true' in res.text
|
||||||
|
assert 'httpbin(1)' in res.text
|
||||||
|
|
||||||
|
assert len(os.listdir(archive_dir)) == 1
|
||||||
|
|
||||||
|
def test_proxy_replay_recorded(self, scheme):
|
||||||
|
manager(['reindex', 'test'])
|
||||||
|
|
||||||
|
self.proxy_app.prefix_resolver.fixed_prefix = '/test/bn_/'
|
||||||
|
|
||||||
|
res = requests.get('{0}://httpbin.org/'.format(scheme),
|
||||||
|
proxies=self.proxies,
|
||||||
|
verify=self.root_ca_file)
|
||||||
|
|
||||||
|
assert 'is_live = false' in res.text
|
||||||
|
assert 'httpbin(1)' in res.text
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user