diff --git a/config.yaml b/config.yaml index fc2290ba..c5e662e5 100644 --- a/config.yaml +++ b/config.yaml @@ -66,16 +66,6 @@ archive_paths: ./sample_archive/warcs/ # ==== Other Paths ==== -# list of host names that pywb will be running from to detect -# 'fallthrough' requests based on referrer -# -# eg: an incorrect request for http://localhost:8080/image.gif with a referrer -# of http://localhost:8080/pywb/index.html, pywb can correctly redirect -# to http://localhost:8080/pywb/image.gif -# - -#hostpaths: ['http://localhost:8080'] - # Rewrite urls with absolute paths instead of relative #absoulte_paths: true diff --git a/pywb/framework/archivalrouter.py b/pywb/framework/archivalrouter.py index 861fad90..df5b7ec6 100644 --- a/pywb/framework/archivalrouter.py +++ b/pywb/framework/archivalrouter.py @@ -16,12 +16,7 @@ class ArchivalRouter(object): # optional port setting may be ignored by wsgi container self.port = kwargs.get('port') - hostpaths = kwargs.get('hostpaths') - - if hostpaths: - self.fallback = ReferRedirect(hostpaths) - else: - self.fallback = None + self.fallback = ReferRedirect() self.abs_path = kwargs.get('abs_path') @@ -133,12 +128,6 @@ class Route(object): # based on the referrer settings #================================================================= class ReferRedirect: - def __init__(self, match_prefixs): - if isinstance(match_prefixs, list): - self.match_prefixs = match_prefixs - else: - self.match_prefixs = [match_prefixs] - def __call__(self, env, the_router): referrer = env.get('HTTP_REFERER') @@ -151,10 +140,10 @@ class ReferRedirect: # get referrer path name ref_split = urlparse.urlsplit(referrer) - # ensure referrer starts with one of allowed hosts - if not any(referrer.startswith(i) for i in self.match_prefixs): - if ref_split.netloc != env.get('HTTP_HOST'): - return None + # require that referrer starts with current Host, if any + curr_host = env.get('HTTP_HOST') + if curr_host and curr_host != ref_split.netloc: + return None path = ref_split.path diff --git a/pywb/framework/proxy.py b/pywb/framework/proxy.py index 848489e5..e07a531f 100644 --- a/pywb/framework/proxy.py +++ b/pywb/framework/proxy.py @@ -53,7 +53,6 @@ class ProxyRouter(object): for more details. """ - PAC_PATH = '/proxy.pac' BLOCK_SIZE = 4096 DEF_MAGIC_NAME = 'pywb.proxy' @@ -64,8 +63,6 @@ class ProxyRouter(object): 'p3p': 'CP="NOI ADM DEV COM NAV OUR STP"'} def __init__(self, routes, **kwargs): - self.hostpaths = kwargs.get('hostpaths') - self.error_view = kwargs.get('error_view') proxy_options = kwargs.get('config', {}) @@ -89,9 +86,6 @@ class ProxyRouter(object): self.unaltered = proxy_options.get('unaltered_replay', False) - self.proxy_pac_path = proxy_options.get('pac_path', self.PAC_PATH) - - if not proxy_options.get('enable_https_proxy'): self.ca = None self.proxy_cert_dl_view = None @@ -116,13 +110,10 @@ class ProxyRouter(object): def __call__(self, env): is_https = (env['REQUEST_METHOD'] == 'CONNECT') - # for non-https requests, check pac path and non-proxy urls + # for non-https requests, check non-proxy urls if not is_https: url = env['REL_REQUEST_URI'] - if url == self.proxy_pac_path: - return self.make_pac_response(env) - if not url.startswith(('http://', 'https://')): return None @@ -359,29 +350,3 @@ class ProxyRouter(object): content_type=content_type) else: return None - - # Proxy Auto-Config (PAC) script for the proxy - def make_pac_response(self, env): - hostname = env.get('HTTP_HOST') - if not hostname: - server_hostport = env['SERVER_NAME'] + ':' + env['SERVER_PORT'] - hostonly = env['SERVER_NAME'] - else: - server_hostport = hostname - hostonly = hostname.split(':')[0] - - buff = 'function FindProxyForURL (url, host) {\n' - - direct = ' if (shExpMatch(host, "{0}")) {{ return "DIRECT"; }}\n' - - for hostpath in self.hostpaths: - parts = urlparse.urlsplit(hostpath).netloc.split(':') - buff += direct.format(parts[0]) - - buff += direct.format(hostonly) - - buff += '\n return "PROXY {0}";\n}}\n'.format(server_hostport) - - content_type = 'application/x-ns-proxy-autoconfig' - - return WbResponse.text_response(buff, content_type=content_type) diff --git a/pywb/framework/test/test_archivalrouter.py b/pywb/framework/test/test_archivalrouter.py index 52009353..4509947c 100644 --- a/pywb/framework/test/test_archivalrouter.py +++ b/pywb/framework/test/test_archivalrouter.py @@ -25,14 +25,7 @@ # not matching route -- skipped >>> _test_route_req(Route('web', BaseHandler()), {'REL_REQUEST_URI': '/other/test.example.com', 'SCRIPT_NAME': ''}) - -# Referer Redirect Test ->>> ReferRedirect('http://localhost:8080/').match_prefixs -['http://localhost:8080/'] - ->>> ReferRedirect(['http://example:9090/']).match_prefixs -['http://example:9090/'] - +# Test Refer Redirects >>> _test_redir('http://localhost:8080/', '/diff_path/other.html', 'http://localhost:8080/coll/20131010/http://example.com/path/page.html') 'http://localhost:8080/coll/20131010/http://example.com/diff_path/other.html' @@ -55,11 +48,11 @@ 'http://localhost:8080/coll/20131010/http://example.com/path/other.html' # Wrong Host ->>> _test_redir('http://example:8080/', '/other.html', 'http://localhost:8080/coll/20131010/http://example.com/path/page.html') +>>> _test_redir('http://example.com:8080/', '/other.html', 'http://localhost:8080/coll/20131010/http://example.com/path/page.html') False # Right Host ->>> _test_redir('http://localhost:8080/', '/other.html', 'http://example.com:8080/coll/20131010/http://example.com/path/page.html', http_host = 'example.com:8080') +>>> _test_redir('http://example.com:8080/', '/other.html', 'http://example.com:8080/coll/20131010/http://example.com/path/page.html') 'http://example.com:8080/coll/20131010/http://example.com/other.html' # With custom SCRIPT_NAME @@ -87,6 +80,7 @@ False from pywb.framework.archivalrouter import Route, ReferRedirect, ArchivalRouter from pywb.framework.basehandlers import BaseHandler, WbUrlHandler import pprint +import urlparse def _test_route_req(route, env, abs_path=False): matcher, coll = route.is_handling(env['REL_REQUEST_URI']) @@ -101,17 +95,16 @@ def _test_route_req(route, env, abs_path=False): pprint.pprint(the_dict) -def _test_redir(match_host, request_uri, referrer, script_name = '', coll = 'coll', http_host = None): +def _test_redir(match_host, request_uri, referrer, script_name='', coll='coll'): env = {'REL_REQUEST_URI': request_uri, 'HTTP_REFERER': referrer, 'SCRIPT_NAME': script_name} - if http_host: - env['HTTP_HOST'] = http_host + env['HTTP_HOST'] = urlparse.urlsplit(match_host).netloc routes = [Route(coll, WbUrlHandler())] the_router = ArchivalRouter(routes) - redir = ReferRedirect(match_host) + redir = ReferRedirect() #req = WbRequest.from_uri(request_uri, env) rep = redir(env, the_router) if not rep: diff --git a/tests/test_config.yaml b/tests/test_config.yaml index 468a3131..2603bb2a 100644 --- a/tests/test_config.yaml +++ b/tests/test_config.yaml @@ -85,16 +85,6 @@ error_html: ui/error.html # ==== Other Paths ==== -# list of host names that pywb will be running from to detect -# 'fallthrough' requests based on referrer -# -# eg: an incorrect request for http://localhost:8080/image.gif with a referrer -# of http://localhost:8080/pywb/index.html, pywb can correctly redirect -# to http://localhost:8080/pywb/image.gif -# - -#hostpaths: ['http://localhost:8080'] - # Rewrite urls with absolute paths instead of relative absoulte_paths: true diff --git a/tests/test_integration.py b/tests/test_integration.py index 7a360341..a3d9d26c 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -219,16 +219,17 @@ class TestWb: assert '/pywb/20140127171251mp_/http://www.iana.org/domains/example' in resp.body def test_redirect_relative_3(self): + # webtest uses Host: localhost:80 by default # first two requests should result in same redirect - target = 'http://localhost:8080/pywb/2014mp_/http://iana.org/_css/2013.1/screen.css' + target = 'http://localhost:80/pywb/2014mp_/http://iana.org/_css/2013.1/screen.css' # without timestamp - resp = self.testapp.get('/_css/2013.1/screen.css', headers = [('Referer', 'http://localhost:8080/pywb/2014mp_/http://iana.org/')]) + resp = self.testapp.get('/_css/2013.1/screen.css', headers = [('Referer', 'http://localhost:80/pywb/2014mp_/http://iana.org/')]) assert resp.status_int == 302 assert resp.headers['Location'] == target, resp.headers['Location'] # with timestamp - resp = self.testapp.get('/2014/_css/2013.1/screen.css', headers = [('Referer', 'http://localhost:8080/pywb/2014mp_/http://iana.org/')]) + resp = self.testapp.get('/2014/_css/2013.1/screen.css', headers = [('Referer', 'http://localhost:80/pywb/2014mp_/http://iana.org/')]) assert resp.status_int == 302 assert resp.headers['Location'] == target, resp.headers['Location'] @@ -313,7 +314,7 @@ class TestWb: def test_post_redirect(self): # post handled without redirect (since 307 not allowed) - resp = self.testapp.post('/post', {'foo': 'bar', 'test': 'abc'}, headers=[('Referer', 'http://localhost:8080/pywb/2014mp_/http://httpbin.org/post')]) + resp = self.testapp.post('/post', {'foo': 'bar', 'test': 'abc'}, headers=[('Referer', 'http://localhost:80/pywb/2014mp_/http://httpbin.org/post')]) assert resp.status_int == 200 assert '"foo": "bar"' in resp.body assert '"test": "abc"' in resp.body diff --git a/tests/test_proxy.py b/tests/test_proxy.py index 124b6b1e..cee011af 100644 --- a/tests/test_proxy.py +++ b/tests/test_proxy.py @@ -77,3 +77,11 @@ class TestProxyWb: resp = self.testapp.get('/x-ignore-this-x', headers = headers, extra_environ = dict(REQUEST_URI = 'http://www.iana.org/', SCRIPT_NAME = ''), status=407) + + + def test_proxy_connect_unsupported(self): + resp = self.testapp.request('/x-ignore-this-x', method='CONNECT', + environ=dict(REQUEST_URI='example:443', SCRIPT_NAME=''), + status=405) + + assert resp.status_int == 405 diff --git a/tests/test_proxy_https.py b/tests/test_proxy_https.py index d5e864fc..940f6e06 100644 --- a/tests/test_proxy_https.py +++ b/tests/test_proxy_https.py @@ -62,7 +62,7 @@ class TestHttpsProxy: def setup(self): self.session = requests.Session() - def get_url(self, url, headers=None): + def get_url(self, url): global sesh_key if sesh_key: self.session.headers.update({'Cookie': '__pywb_proxy_sesh=' + sesh_key}) @@ -174,16 +174,14 @@ class TestHttpsProxy: def test_replay_static(self): resp = self.get_url('https://pywb.proxy/static/default/wb.js') assert resp.status_code == 200 - found = u'function init_banner' in resp.text - assert found, resp.text + assert 'function init_banner' in resp.text # download index page and cert downloads def test_replay_dl_page(self): resp = self.get_url('https://pywb.proxy/') assert resp.status_code == 200 assert 'text/html' in resp.headers['content-type'] - found = u'Download' in resp.text - assert found, resp.text + assert 'Download' in resp.text def test_dl_pem(self): resp = self.get_url('https://pywb.proxy/pywb-ca.pem') @@ -194,4 +192,3 @@ class TestHttpsProxy: resp = self.get_url('https://pywb.proxy/pywb-ca.p12') assert resp.headers['content-type'] == 'application/x-pkcs12' -