mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
* config cleanup: remove 'hostpaths' setting entirely, avoiding the need to specify host on which pywb
will run (this was cumbersome to maintain and not really useful) ReferRedirect just checks that the current request host header, if present, matches that of the referrer and checks that the coll and script name match. * removed proxy_pac as it was also unneeded/unused and required use of the hostpaths * added test for invalid CONNECT usage (405 response)
This commit is contained in:
parent
6b476d83de
commit
eaaefbfd24
10
config.yaml
10
config.yaml
@ -66,16 +66,6 @@ archive_paths: ./sample_archive/warcs/
|
||||
|
||||
# ==== Other Paths ====
|
||||
|
||||
# list of host names that pywb will be running from to detect
|
||||
# 'fallthrough' requests based on referrer
|
||||
#
|
||||
# eg: an incorrect request for http://localhost:8080/image.gif with a referrer
|
||||
# of http://localhost:8080/pywb/index.html, pywb can correctly redirect
|
||||
# to http://localhost:8080/pywb/image.gif
|
||||
#
|
||||
|
||||
#hostpaths: ['http://localhost:8080']
|
||||
|
||||
# Rewrite urls with absolute paths instead of relative
|
||||
#absoulte_paths: true
|
||||
|
||||
|
@ -16,12 +16,7 @@ class ArchivalRouter(object):
|
||||
# optional port setting may be ignored by wsgi container
|
||||
self.port = kwargs.get('port')
|
||||
|
||||
hostpaths = kwargs.get('hostpaths')
|
||||
|
||||
if hostpaths:
|
||||
self.fallback = ReferRedirect(hostpaths)
|
||||
else:
|
||||
self.fallback = None
|
||||
self.fallback = ReferRedirect()
|
||||
|
||||
self.abs_path = kwargs.get('abs_path')
|
||||
|
||||
@ -133,12 +128,6 @@ class Route(object):
|
||||
# based on the referrer settings
|
||||
#=================================================================
|
||||
class ReferRedirect:
|
||||
def __init__(self, match_prefixs):
|
||||
if isinstance(match_prefixs, list):
|
||||
self.match_prefixs = match_prefixs
|
||||
else:
|
||||
self.match_prefixs = [match_prefixs]
|
||||
|
||||
def __call__(self, env, the_router):
|
||||
referrer = env.get('HTTP_REFERER')
|
||||
|
||||
@ -151,10 +140,10 @@ class ReferRedirect:
|
||||
# get referrer path name
|
||||
ref_split = urlparse.urlsplit(referrer)
|
||||
|
||||
# ensure referrer starts with one of allowed hosts
|
||||
if not any(referrer.startswith(i) for i in self.match_prefixs):
|
||||
if ref_split.netloc != env.get('HTTP_HOST'):
|
||||
return None
|
||||
# require that referrer starts with current Host, if any
|
||||
curr_host = env.get('HTTP_HOST')
|
||||
if curr_host and curr_host != ref_split.netloc:
|
||||
return None
|
||||
|
||||
path = ref_split.path
|
||||
|
||||
|
@ -53,7 +53,6 @@ class ProxyRouter(object):
|
||||
for more details.
|
||||
"""
|
||||
|
||||
PAC_PATH = '/proxy.pac'
|
||||
BLOCK_SIZE = 4096
|
||||
DEF_MAGIC_NAME = 'pywb.proxy'
|
||||
|
||||
@ -64,8 +63,6 @@ class ProxyRouter(object):
|
||||
'p3p': 'CP="NOI ADM DEV COM NAV OUR STP"'}
|
||||
|
||||
def __init__(self, routes, **kwargs):
|
||||
self.hostpaths = kwargs.get('hostpaths')
|
||||
|
||||
self.error_view = kwargs.get('error_view')
|
||||
|
||||
proxy_options = kwargs.get('config', {})
|
||||
@ -89,9 +86,6 @@ class ProxyRouter(object):
|
||||
|
||||
self.unaltered = proxy_options.get('unaltered_replay', False)
|
||||
|
||||
self.proxy_pac_path = proxy_options.get('pac_path', self.PAC_PATH)
|
||||
|
||||
|
||||
if not proxy_options.get('enable_https_proxy'):
|
||||
self.ca = None
|
||||
self.proxy_cert_dl_view = None
|
||||
@ -116,13 +110,10 @@ class ProxyRouter(object):
|
||||
def __call__(self, env):
|
||||
is_https = (env['REQUEST_METHOD'] == 'CONNECT')
|
||||
|
||||
# for non-https requests, check pac path and non-proxy urls
|
||||
# for non-https requests, check non-proxy urls
|
||||
if not is_https:
|
||||
url = env['REL_REQUEST_URI']
|
||||
|
||||
if url == self.proxy_pac_path:
|
||||
return self.make_pac_response(env)
|
||||
|
||||
if not url.startswith(('http://', 'https://')):
|
||||
return None
|
||||
|
||||
@ -359,29 +350,3 @@ class ProxyRouter(object):
|
||||
content_type=content_type)
|
||||
else:
|
||||
return None
|
||||
|
||||
# Proxy Auto-Config (PAC) script for the proxy
|
||||
def make_pac_response(self, env):
|
||||
hostname = env.get('HTTP_HOST')
|
||||
if not hostname:
|
||||
server_hostport = env['SERVER_NAME'] + ':' + env['SERVER_PORT']
|
||||
hostonly = env['SERVER_NAME']
|
||||
else:
|
||||
server_hostport = hostname
|
||||
hostonly = hostname.split(':')[0]
|
||||
|
||||
buff = 'function FindProxyForURL (url, host) {\n'
|
||||
|
||||
direct = ' if (shExpMatch(host, "{0}")) {{ return "DIRECT"; }}\n'
|
||||
|
||||
for hostpath in self.hostpaths:
|
||||
parts = urlparse.urlsplit(hostpath).netloc.split(':')
|
||||
buff += direct.format(parts[0])
|
||||
|
||||
buff += direct.format(hostonly)
|
||||
|
||||
buff += '\n return "PROXY {0}";\n}}\n'.format(server_hostport)
|
||||
|
||||
content_type = 'application/x-ns-proxy-autoconfig'
|
||||
|
||||
return WbResponse.text_response(buff, content_type=content_type)
|
||||
|
@ -25,14 +25,7 @@
|
||||
# not matching route -- skipped
|
||||
>>> _test_route_req(Route('web', BaseHandler()), {'REL_REQUEST_URI': '/other/test.example.com', 'SCRIPT_NAME': ''})
|
||||
|
||||
|
||||
# Referer Redirect Test
|
||||
>>> ReferRedirect('http://localhost:8080/').match_prefixs
|
||||
['http://localhost:8080/']
|
||||
|
||||
>>> ReferRedirect(['http://example:9090/']).match_prefixs
|
||||
['http://example:9090/']
|
||||
|
||||
# Test Refer Redirects
|
||||
>>> _test_redir('http://localhost:8080/', '/diff_path/other.html', 'http://localhost:8080/coll/20131010/http://example.com/path/page.html')
|
||||
'http://localhost:8080/coll/20131010/http://example.com/diff_path/other.html'
|
||||
|
||||
@ -55,11 +48,11 @@
|
||||
'http://localhost:8080/coll/20131010/http://example.com/path/other.html'
|
||||
|
||||
# Wrong Host
|
||||
>>> _test_redir('http://example:8080/', '/other.html', 'http://localhost:8080/coll/20131010/http://example.com/path/page.html')
|
||||
>>> _test_redir('http://example.com:8080/', '/other.html', 'http://localhost:8080/coll/20131010/http://example.com/path/page.html')
|
||||
False
|
||||
|
||||
# Right Host
|
||||
>>> _test_redir('http://localhost:8080/', '/other.html', 'http://example.com:8080/coll/20131010/http://example.com/path/page.html', http_host = 'example.com:8080')
|
||||
>>> _test_redir('http://example.com:8080/', '/other.html', 'http://example.com:8080/coll/20131010/http://example.com/path/page.html')
|
||||
'http://example.com:8080/coll/20131010/http://example.com/other.html'
|
||||
|
||||
# With custom SCRIPT_NAME
|
||||
@ -87,6 +80,7 @@ False
|
||||
from pywb.framework.archivalrouter import Route, ReferRedirect, ArchivalRouter
|
||||
from pywb.framework.basehandlers import BaseHandler, WbUrlHandler
|
||||
import pprint
|
||||
import urlparse
|
||||
|
||||
def _test_route_req(route, env, abs_path=False):
|
||||
matcher, coll = route.is_handling(env['REL_REQUEST_URI'])
|
||||
@ -101,17 +95,16 @@ def _test_route_req(route, env, abs_path=False):
|
||||
pprint.pprint(the_dict)
|
||||
|
||||
|
||||
def _test_redir(match_host, request_uri, referrer, script_name = '', coll = 'coll', http_host = None):
|
||||
def _test_redir(match_host, request_uri, referrer, script_name='', coll='coll'):
|
||||
env = {'REL_REQUEST_URI': request_uri, 'HTTP_REFERER': referrer, 'SCRIPT_NAME': script_name}
|
||||
|
||||
if http_host:
|
||||
env['HTTP_HOST'] = http_host
|
||||
env['HTTP_HOST'] = urlparse.urlsplit(match_host).netloc
|
||||
|
||||
routes = [Route(coll, WbUrlHandler())]
|
||||
|
||||
the_router = ArchivalRouter(routes)
|
||||
|
||||
redir = ReferRedirect(match_host)
|
||||
redir = ReferRedirect()
|
||||
#req = WbRequest.from_uri(request_uri, env)
|
||||
rep = redir(env, the_router)
|
||||
if not rep:
|
||||
|
@ -85,16 +85,6 @@ error_html: ui/error.html
|
||||
|
||||
# ==== Other Paths ====
|
||||
|
||||
# list of host names that pywb will be running from to detect
|
||||
# 'fallthrough' requests based on referrer
|
||||
#
|
||||
# eg: an incorrect request for http://localhost:8080/image.gif with a referrer
|
||||
# of http://localhost:8080/pywb/index.html, pywb can correctly redirect
|
||||
# to http://localhost:8080/pywb/image.gif
|
||||
#
|
||||
|
||||
#hostpaths: ['http://localhost:8080']
|
||||
|
||||
# Rewrite urls with absolute paths instead of relative
|
||||
absoulte_paths: true
|
||||
|
||||
|
@ -219,16 +219,17 @@ class TestWb:
|
||||
assert '/pywb/20140127171251mp_/http://www.iana.org/domains/example' in resp.body
|
||||
|
||||
def test_redirect_relative_3(self):
|
||||
# webtest uses Host: localhost:80 by default
|
||||
# first two requests should result in same redirect
|
||||
target = 'http://localhost:8080/pywb/2014mp_/http://iana.org/_css/2013.1/screen.css'
|
||||
target = 'http://localhost:80/pywb/2014mp_/http://iana.org/_css/2013.1/screen.css'
|
||||
|
||||
# without timestamp
|
||||
resp = self.testapp.get('/_css/2013.1/screen.css', headers = [('Referer', 'http://localhost:8080/pywb/2014mp_/http://iana.org/')])
|
||||
resp = self.testapp.get('/_css/2013.1/screen.css', headers = [('Referer', 'http://localhost:80/pywb/2014mp_/http://iana.org/')])
|
||||
assert resp.status_int == 302
|
||||
assert resp.headers['Location'] == target, resp.headers['Location']
|
||||
|
||||
# with timestamp
|
||||
resp = self.testapp.get('/2014/_css/2013.1/screen.css', headers = [('Referer', 'http://localhost:8080/pywb/2014mp_/http://iana.org/')])
|
||||
resp = self.testapp.get('/2014/_css/2013.1/screen.css', headers = [('Referer', 'http://localhost:80/pywb/2014mp_/http://iana.org/')])
|
||||
assert resp.status_int == 302
|
||||
assert resp.headers['Location'] == target, resp.headers['Location']
|
||||
|
||||
@ -313,7 +314,7 @@ class TestWb:
|
||||
|
||||
def test_post_redirect(self):
|
||||
# post handled without redirect (since 307 not allowed)
|
||||
resp = self.testapp.post('/post', {'foo': 'bar', 'test': 'abc'}, headers=[('Referer', 'http://localhost:8080/pywb/2014mp_/http://httpbin.org/post')])
|
||||
resp = self.testapp.post('/post', {'foo': 'bar', 'test': 'abc'}, headers=[('Referer', 'http://localhost:80/pywb/2014mp_/http://httpbin.org/post')])
|
||||
assert resp.status_int == 200
|
||||
assert '"foo": "bar"' in resp.body
|
||||
assert '"test": "abc"' in resp.body
|
||||
|
@ -77,3 +77,11 @@ class TestProxyWb:
|
||||
resp = self.testapp.get('/x-ignore-this-x', headers = headers,
|
||||
extra_environ = dict(REQUEST_URI = 'http://www.iana.org/', SCRIPT_NAME = ''),
|
||||
status=407)
|
||||
|
||||
|
||||
def test_proxy_connect_unsupported(self):
|
||||
resp = self.testapp.request('/x-ignore-this-x', method='CONNECT',
|
||||
environ=dict(REQUEST_URI='example:443', SCRIPT_NAME=''),
|
||||
status=405)
|
||||
|
||||
assert resp.status_int == 405
|
||||
|
@ -62,7 +62,7 @@ class TestHttpsProxy:
|
||||
def setup(self):
|
||||
self.session = requests.Session()
|
||||
|
||||
def get_url(self, url, headers=None):
|
||||
def get_url(self, url):
|
||||
global sesh_key
|
||||
if sesh_key:
|
||||
self.session.headers.update({'Cookie': '__pywb_proxy_sesh=' + sesh_key})
|
||||
@ -174,16 +174,14 @@ class TestHttpsProxy:
|
||||
def test_replay_static(self):
|
||||
resp = self.get_url('https://pywb.proxy/static/default/wb.js')
|
||||
assert resp.status_code == 200
|
||||
found = u'function init_banner' in resp.text
|
||||
assert found, resp.text
|
||||
assert 'function init_banner' in resp.text
|
||||
|
||||
# download index page and cert downloads
|
||||
def test_replay_dl_page(self):
|
||||
resp = self.get_url('https://pywb.proxy/')
|
||||
assert resp.status_code == 200
|
||||
assert 'text/html' in resp.headers['content-type']
|
||||
found = u'Download' in resp.text
|
||||
assert found, resp.text
|
||||
assert 'Download' in resp.text
|
||||
|
||||
def test_dl_pem(self):
|
||||
resp = self.get_url('https://pywb.proxy/pywb-ca.pem')
|
||||
@ -194,4 +192,3 @@ class TestHttpsProxy:
|
||||
resp = self.get_url('https://pywb.proxy/pywb-ca.p12')
|
||||
|
||||
assert resp.headers['content-type'] == 'application/x-pkcs12'
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user