mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
* config cleanup: remove 'hostpaths' setting entirely, avoiding the need to specify host on which pywb
will run (this was cumbersome to maintain and not really useful) ReferRedirect just checks that the current request host header, if present, matches that of the referrer and checks that the coll and script name match. * removed proxy_pac as it was also unneeded/unused and required use of the hostpaths * added test for invalid CONNECT usage (405 response)
This commit is contained in:
parent
6b476d83de
commit
eaaefbfd24
10
config.yaml
10
config.yaml
@ -66,16 +66,6 @@ archive_paths: ./sample_archive/warcs/
|
|||||||
|
|
||||||
# ==== Other Paths ====
|
# ==== Other Paths ====
|
||||||
|
|
||||||
# list of host names that pywb will be running from to detect
|
|
||||||
# 'fallthrough' requests based on referrer
|
|
||||||
#
|
|
||||||
# eg: an incorrect request for http://localhost:8080/image.gif with a referrer
|
|
||||||
# of http://localhost:8080/pywb/index.html, pywb can correctly redirect
|
|
||||||
# to http://localhost:8080/pywb/image.gif
|
|
||||||
#
|
|
||||||
|
|
||||||
#hostpaths: ['http://localhost:8080']
|
|
||||||
|
|
||||||
# Rewrite urls with absolute paths instead of relative
|
# Rewrite urls with absolute paths instead of relative
|
||||||
#absoulte_paths: true
|
#absoulte_paths: true
|
||||||
|
|
||||||
|
@ -16,12 +16,7 @@ class ArchivalRouter(object):
|
|||||||
# optional port setting may be ignored by wsgi container
|
# optional port setting may be ignored by wsgi container
|
||||||
self.port = kwargs.get('port')
|
self.port = kwargs.get('port')
|
||||||
|
|
||||||
hostpaths = kwargs.get('hostpaths')
|
self.fallback = ReferRedirect()
|
||||||
|
|
||||||
if hostpaths:
|
|
||||||
self.fallback = ReferRedirect(hostpaths)
|
|
||||||
else:
|
|
||||||
self.fallback = None
|
|
||||||
|
|
||||||
self.abs_path = kwargs.get('abs_path')
|
self.abs_path = kwargs.get('abs_path')
|
||||||
|
|
||||||
@ -133,12 +128,6 @@ class Route(object):
|
|||||||
# based on the referrer settings
|
# based on the referrer settings
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class ReferRedirect:
|
class ReferRedirect:
|
||||||
def __init__(self, match_prefixs):
|
|
||||||
if isinstance(match_prefixs, list):
|
|
||||||
self.match_prefixs = match_prefixs
|
|
||||||
else:
|
|
||||||
self.match_prefixs = [match_prefixs]
|
|
||||||
|
|
||||||
def __call__(self, env, the_router):
|
def __call__(self, env, the_router):
|
||||||
referrer = env.get('HTTP_REFERER')
|
referrer = env.get('HTTP_REFERER')
|
||||||
|
|
||||||
@ -151,10 +140,10 @@ class ReferRedirect:
|
|||||||
# get referrer path name
|
# get referrer path name
|
||||||
ref_split = urlparse.urlsplit(referrer)
|
ref_split = urlparse.urlsplit(referrer)
|
||||||
|
|
||||||
# ensure referrer starts with one of allowed hosts
|
# require that referrer starts with current Host, if any
|
||||||
if not any(referrer.startswith(i) for i in self.match_prefixs):
|
curr_host = env.get('HTTP_HOST')
|
||||||
if ref_split.netloc != env.get('HTTP_HOST'):
|
if curr_host and curr_host != ref_split.netloc:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
path = ref_split.path
|
path = ref_split.path
|
||||||
|
|
||||||
|
@ -53,7 +53,6 @@ class ProxyRouter(object):
|
|||||||
for more details.
|
for more details.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
PAC_PATH = '/proxy.pac'
|
|
||||||
BLOCK_SIZE = 4096
|
BLOCK_SIZE = 4096
|
||||||
DEF_MAGIC_NAME = 'pywb.proxy'
|
DEF_MAGIC_NAME = 'pywb.proxy'
|
||||||
|
|
||||||
@ -64,8 +63,6 @@ class ProxyRouter(object):
|
|||||||
'p3p': 'CP="NOI ADM DEV COM NAV OUR STP"'}
|
'p3p': 'CP="NOI ADM DEV COM NAV OUR STP"'}
|
||||||
|
|
||||||
def __init__(self, routes, **kwargs):
|
def __init__(self, routes, **kwargs):
|
||||||
self.hostpaths = kwargs.get('hostpaths')
|
|
||||||
|
|
||||||
self.error_view = kwargs.get('error_view')
|
self.error_view = kwargs.get('error_view')
|
||||||
|
|
||||||
proxy_options = kwargs.get('config', {})
|
proxy_options = kwargs.get('config', {})
|
||||||
@ -89,9 +86,6 @@ class ProxyRouter(object):
|
|||||||
|
|
||||||
self.unaltered = proxy_options.get('unaltered_replay', False)
|
self.unaltered = proxy_options.get('unaltered_replay', False)
|
||||||
|
|
||||||
self.proxy_pac_path = proxy_options.get('pac_path', self.PAC_PATH)
|
|
||||||
|
|
||||||
|
|
||||||
if not proxy_options.get('enable_https_proxy'):
|
if not proxy_options.get('enable_https_proxy'):
|
||||||
self.ca = None
|
self.ca = None
|
||||||
self.proxy_cert_dl_view = None
|
self.proxy_cert_dl_view = None
|
||||||
@ -116,13 +110,10 @@ class ProxyRouter(object):
|
|||||||
def __call__(self, env):
|
def __call__(self, env):
|
||||||
is_https = (env['REQUEST_METHOD'] == 'CONNECT')
|
is_https = (env['REQUEST_METHOD'] == 'CONNECT')
|
||||||
|
|
||||||
# for non-https requests, check pac path and non-proxy urls
|
# for non-https requests, check non-proxy urls
|
||||||
if not is_https:
|
if not is_https:
|
||||||
url = env['REL_REQUEST_URI']
|
url = env['REL_REQUEST_URI']
|
||||||
|
|
||||||
if url == self.proxy_pac_path:
|
|
||||||
return self.make_pac_response(env)
|
|
||||||
|
|
||||||
if not url.startswith(('http://', 'https://')):
|
if not url.startswith(('http://', 'https://')):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@ -359,29 +350,3 @@ class ProxyRouter(object):
|
|||||||
content_type=content_type)
|
content_type=content_type)
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Proxy Auto-Config (PAC) script for the proxy
|
|
||||||
def make_pac_response(self, env):
|
|
||||||
hostname = env.get('HTTP_HOST')
|
|
||||||
if not hostname:
|
|
||||||
server_hostport = env['SERVER_NAME'] + ':' + env['SERVER_PORT']
|
|
||||||
hostonly = env['SERVER_NAME']
|
|
||||||
else:
|
|
||||||
server_hostport = hostname
|
|
||||||
hostonly = hostname.split(':')[0]
|
|
||||||
|
|
||||||
buff = 'function FindProxyForURL (url, host) {\n'
|
|
||||||
|
|
||||||
direct = ' if (shExpMatch(host, "{0}")) {{ return "DIRECT"; }}\n'
|
|
||||||
|
|
||||||
for hostpath in self.hostpaths:
|
|
||||||
parts = urlparse.urlsplit(hostpath).netloc.split(':')
|
|
||||||
buff += direct.format(parts[0])
|
|
||||||
|
|
||||||
buff += direct.format(hostonly)
|
|
||||||
|
|
||||||
buff += '\n return "PROXY {0}";\n}}\n'.format(server_hostport)
|
|
||||||
|
|
||||||
content_type = 'application/x-ns-proxy-autoconfig'
|
|
||||||
|
|
||||||
return WbResponse.text_response(buff, content_type=content_type)
|
|
||||||
|
@ -25,14 +25,7 @@
|
|||||||
# not matching route -- skipped
|
# not matching route -- skipped
|
||||||
>>> _test_route_req(Route('web', BaseHandler()), {'REL_REQUEST_URI': '/other/test.example.com', 'SCRIPT_NAME': ''})
|
>>> _test_route_req(Route('web', BaseHandler()), {'REL_REQUEST_URI': '/other/test.example.com', 'SCRIPT_NAME': ''})
|
||||||
|
|
||||||
|
# Test Refer Redirects
|
||||||
# Referer Redirect Test
|
|
||||||
>>> ReferRedirect('http://localhost:8080/').match_prefixs
|
|
||||||
['http://localhost:8080/']
|
|
||||||
|
|
||||||
>>> ReferRedirect(['http://example:9090/']).match_prefixs
|
|
||||||
['http://example:9090/']
|
|
||||||
|
|
||||||
>>> _test_redir('http://localhost:8080/', '/diff_path/other.html', 'http://localhost:8080/coll/20131010/http://example.com/path/page.html')
|
>>> _test_redir('http://localhost:8080/', '/diff_path/other.html', 'http://localhost:8080/coll/20131010/http://example.com/path/page.html')
|
||||||
'http://localhost:8080/coll/20131010/http://example.com/diff_path/other.html'
|
'http://localhost:8080/coll/20131010/http://example.com/diff_path/other.html'
|
||||||
|
|
||||||
@ -55,11 +48,11 @@
|
|||||||
'http://localhost:8080/coll/20131010/http://example.com/path/other.html'
|
'http://localhost:8080/coll/20131010/http://example.com/path/other.html'
|
||||||
|
|
||||||
# Wrong Host
|
# Wrong Host
|
||||||
>>> _test_redir('http://example:8080/', '/other.html', 'http://localhost:8080/coll/20131010/http://example.com/path/page.html')
|
>>> _test_redir('http://example.com:8080/', '/other.html', 'http://localhost:8080/coll/20131010/http://example.com/path/page.html')
|
||||||
False
|
False
|
||||||
|
|
||||||
# Right Host
|
# Right Host
|
||||||
>>> _test_redir('http://localhost:8080/', '/other.html', 'http://example.com:8080/coll/20131010/http://example.com/path/page.html', http_host = 'example.com:8080')
|
>>> _test_redir('http://example.com:8080/', '/other.html', 'http://example.com:8080/coll/20131010/http://example.com/path/page.html')
|
||||||
'http://example.com:8080/coll/20131010/http://example.com/other.html'
|
'http://example.com:8080/coll/20131010/http://example.com/other.html'
|
||||||
|
|
||||||
# With custom SCRIPT_NAME
|
# With custom SCRIPT_NAME
|
||||||
@ -87,6 +80,7 @@ False
|
|||||||
from pywb.framework.archivalrouter import Route, ReferRedirect, ArchivalRouter
|
from pywb.framework.archivalrouter import Route, ReferRedirect, ArchivalRouter
|
||||||
from pywb.framework.basehandlers import BaseHandler, WbUrlHandler
|
from pywb.framework.basehandlers import BaseHandler, WbUrlHandler
|
||||||
import pprint
|
import pprint
|
||||||
|
import urlparse
|
||||||
|
|
||||||
def _test_route_req(route, env, abs_path=False):
|
def _test_route_req(route, env, abs_path=False):
|
||||||
matcher, coll = route.is_handling(env['REL_REQUEST_URI'])
|
matcher, coll = route.is_handling(env['REL_REQUEST_URI'])
|
||||||
@ -101,17 +95,16 @@ def _test_route_req(route, env, abs_path=False):
|
|||||||
pprint.pprint(the_dict)
|
pprint.pprint(the_dict)
|
||||||
|
|
||||||
|
|
||||||
def _test_redir(match_host, request_uri, referrer, script_name = '', coll = 'coll', http_host = None):
|
def _test_redir(match_host, request_uri, referrer, script_name='', coll='coll'):
|
||||||
env = {'REL_REQUEST_URI': request_uri, 'HTTP_REFERER': referrer, 'SCRIPT_NAME': script_name}
|
env = {'REL_REQUEST_URI': request_uri, 'HTTP_REFERER': referrer, 'SCRIPT_NAME': script_name}
|
||||||
|
|
||||||
if http_host:
|
env['HTTP_HOST'] = urlparse.urlsplit(match_host).netloc
|
||||||
env['HTTP_HOST'] = http_host
|
|
||||||
|
|
||||||
routes = [Route(coll, WbUrlHandler())]
|
routes = [Route(coll, WbUrlHandler())]
|
||||||
|
|
||||||
the_router = ArchivalRouter(routes)
|
the_router = ArchivalRouter(routes)
|
||||||
|
|
||||||
redir = ReferRedirect(match_host)
|
redir = ReferRedirect()
|
||||||
#req = WbRequest.from_uri(request_uri, env)
|
#req = WbRequest.from_uri(request_uri, env)
|
||||||
rep = redir(env, the_router)
|
rep = redir(env, the_router)
|
||||||
if not rep:
|
if not rep:
|
||||||
|
@ -85,16 +85,6 @@ error_html: ui/error.html
|
|||||||
|
|
||||||
# ==== Other Paths ====
|
# ==== Other Paths ====
|
||||||
|
|
||||||
# list of host names that pywb will be running from to detect
|
|
||||||
# 'fallthrough' requests based on referrer
|
|
||||||
#
|
|
||||||
# eg: an incorrect request for http://localhost:8080/image.gif with a referrer
|
|
||||||
# of http://localhost:8080/pywb/index.html, pywb can correctly redirect
|
|
||||||
# to http://localhost:8080/pywb/image.gif
|
|
||||||
#
|
|
||||||
|
|
||||||
#hostpaths: ['http://localhost:8080']
|
|
||||||
|
|
||||||
# Rewrite urls with absolute paths instead of relative
|
# Rewrite urls with absolute paths instead of relative
|
||||||
absoulte_paths: true
|
absoulte_paths: true
|
||||||
|
|
||||||
|
@ -219,16 +219,17 @@ class TestWb:
|
|||||||
assert '/pywb/20140127171251mp_/http://www.iana.org/domains/example' in resp.body
|
assert '/pywb/20140127171251mp_/http://www.iana.org/domains/example' in resp.body
|
||||||
|
|
||||||
def test_redirect_relative_3(self):
|
def test_redirect_relative_3(self):
|
||||||
|
# webtest uses Host: localhost:80 by default
|
||||||
# first two requests should result in same redirect
|
# first two requests should result in same redirect
|
||||||
target = 'http://localhost:8080/pywb/2014mp_/http://iana.org/_css/2013.1/screen.css'
|
target = 'http://localhost:80/pywb/2014mp_/http://iana.org/_css/2013.1/screen.css'
|
||||||
|
|
||||||
# without timestamp
|
# without timestamp
|
||||||
resp = self.testapp.get('/_css/2013.1/screen.css', headers = [('Referer', 'http://localhost:8080/pywb/2014mp_/http://iana.org/')])
|
resp = self.testapp.get('/_css/2013.1/screen.css', headers = [('Referer', 'http://localhost:80/pywb/2014mp_/http://iana.org/')])
|
||||||
assert resp.status_int == 302
|
assert resp.status_int == 302
|
||||||
assert resp.headers['Location'] == target, resp.headers['Location']
|
assert resp.headers['Location'] == target, resp.headers['Location']
|
||||||
|
|
||||||
# with timestamp
|
# with timestamp
|
||||||
resp = self.testapp.get('/2014/_css/2013.1/screen.css', headers = [('Referer', 'http://localhost:8080/pywb/2014mp_/http://iana.org/')])
|
resp = self.testapp.get('/2014/_css/2013.1/screen.css', headers = [('Referer', 'http://localhost:80/pywb/2014mp_/http://iana.org/')])
|
||||||
assert resp.status_int == 302
|
assert resp.status_int == 302
|
||||||
assert resp.headers['Location'] == target, resp.headers['Location']
|
assert resp.headers['Location'] == target, resp.headers['Location']
|
||||||
|
|
||||||
@ -313,7 +314,7 @@ class TestWb:
|
|||||||
|
|
||||||
def test_post_redirect(self):
|
def test_post_redirect(self):
|
||||||
# post handled without redirect (since 307 not allowed)
|
# post handled without redirect (since 307 not allowed)
|
||||||
resp = self.testapp.post('/post', {'foo': 'bar', 'test': 'abc'}, headers=[('Referer', 'http://localhost:8080/pywb/2014mp_/http://httpbin.org/post')])
|
resp = self.testapp.post('/post', {'foo': 'bar', 'test': 'abc'}, headers=[('Referer', 'http://localhost:80/pywb/2014mp_/http://httpbin.org/post')])
|
||||||
assert resp.status_int == 200
|
assert resp.status_int == 200
|
||||||
assert '"foo": "bar"' in resp.body
|
assert '"foo": "bar"' in resp.body
|
||||||
assert '"test": "abc"' in resp.body
|
assert '"test": "abc"' in resp.body
|
||||||
|
@ -77,3 +77,11 @@ class TestProxyWb:
|
|||||||
resp = self.testapp.get('/x-ignore-this-x', headers = headers,
|
resp = self.testapp.get('/x-ignore-this-x', headers = headers,
|
||||||
extra_environ = dict(REQUEST_URI = 'http://www.iana.org/', SCRIPT_NAME = ''),
|
extra_environ = dict(REQUEST_URI = 'http://www.iana.org/', SCRIPT_NAME = ''),
|
||||||
status=407)
|
status=407)
|
||||||
|
|
||||||
|
|
||||||
|
def test_proxy_connect_unsupported(self):
|
||||||
|
resp = self.testapp.request('/x-ignore-this-x', method='CONNECT',
|
||||||
|
environ=dict(REQUEST_URI='example:443', SCRIPT_NAME=''),
|
||||||
|
status=405)
|
||||||
|
|
||||||
|
assert resp.status_int == 405
|
||||||
|
@ -62,7 +62,7 @@ class TestHttpsProxy:
|
|||||||
def setup(self):
|
def setup(self):
|
||||||
self.session = requests.Session()
|
self.session = requests.Session()
|
||||||
|
|
||||||
def get_url(self, url, headers=None):
|
def get_url(self, url):
|
||||||
global sesh_key
|
global sesh_key
|
||||||
if sesh_key:
|
if sesh_key:
|
||||||
self.session.headers.update({'Cookie': '__pywb_proxy_sesh=' + sesh_key})
|
self.session.headers.update({'Cookie': '__pywb_proxy_sesh=' + sesh_key})
|
||||||
@ -174,16 +174,14 @@ class TestHttpsProxy:
|
|||||||
def test_replay_static(self):
|
def test_replay_static(self):
|
||||||
resp = self.get_url('https://pywb.proxy/static/default/wb.js')
|
resp = self.get_url('https://pywb.proxy/static/default/wb.js')
|
||||||
assert resp.status_code == 200
|
assert resp.status_code == 200
|
||||||
found = u'function init_banner' in resp.text
|
assert 'function init_banner' in resp.text
|
||||||
assert found, resp.text
|
|
||||||
|
|
||||||
# download index page and cert downloads
|
# download index page and cert downloads
|
||||||
def test_replay_dl_page(self):
|
def test_replay_dl_page(self):
|
||||||
resp = self.get_url('https://pywb.proxy/')
|
resp = self.get_url('https://pywb.proxy/')
|
||||||
assert resp.status_code == 200
|
assert resp.status_code == 200
|
||||||
assert 'text/html' in resp.headers['content-type']
|
assert 'text/html' in resp.headers['content-type']
|
||||||
found = u'Download' in resp.text
|
assert 'Download' in resp.text
|
||||||
assert found, resp.text
|
|
||||||
|
|
||||||
def test_dl_pem(self):
|
def test_dl_pem(self):
|
||||||
resp = self.get_url('https://pywb.proxy/pywb-ca.pem')
|
resp = self.get_url('https://pywb.proxy/pywb-ca.pem')
|
||||||
@ -194,4 +192,3 @@ class TestHttpsProxy:
|
|||||||
resp = self.get_url('https://pywb.proxy/pywb-ca.p12')
|
resp = self.get_url('https://pywb.proxy/pywb-ca.p12')
|
||||||
|
|
||||||
assert resp.headers['content-type'] == 'application/x-pkcs12'
|
assert resp.headers['content-type'] == 'application/x-pkcs12'
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user