From 16489b99d958524be0244eff02c9f267f912b101 Mon Sep 17 00:00:00 2001 From: Vangelis Banos Date: Mon, 6 May 2019 21:23:10 +0000 Subject: [PATCH] Improve target url validation In addition to checking for scheme='http', we should also check that netloc has a value. There are many meaningless URLs that pass the current check. For instance: ``` In [5]: urlparse("http://") Out[5]: ParseResult(scheme='http', netloc='', path='', params='', query='', fragment='') In [6]: urlparse("http:///") Out[6]: ParseResult(scheme='http', netloc='', path='/', params='', query='', fragment='') ``` netloc should always have a value. --- warcprox/mitmproxy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/warcprox/mitmproxy.py b/warcprox/mitmproxy.py index 705589e..f8b49dc 100644 --- a/warcprox/mitmproxy.py +++ b/warcprox/mitmproxy.py @@ -233,7 +233,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler): else: self.url = self.path u = urllib_parse.urlparse(self.url) - if u.scheme != 'http': + if u.scheme != 'http' or u.netloc == '': raise Exception( 'unable to parse request %r as a proxy request' % ( self.requestline))