diff --git a/pywb/rewrite/test/test_wburl.py b/pywb/rewrite/test/test_wburl.py index 453cf550..580d1a02 100644 --- a/pywb/rewrite/test/test_wburl.py +++ b/pywb/rewrite/test/test_wburl.py @@ -20,6 +20,9 @@ u""" >>> repr(WbUrl('cs_/example.com')) "('latest_replay', '', 'cs_', 'http://example.com', 'cs_/http://example.com')" +>>> repr(WbUrl('cs_/example.com/?foo=http://example.com/')) +"('latest_replay', '', 'cs_', 'http://example.com/?foo=http://example.com/', 'cs_/http://example.com/?foo=http://example.com/')" + >>> repr(WbUrl('im_/20130102.org')) "('latest_replay', '', 'im_', 'http://20130102.org', 'im_/http://20130102.org')" diff --git a/pywb/rewrite/wburl.py b/pywb/rewrite/wburl.py index 2d7ec538..df414d22 100644 --- a/pywb/rewrite/wburl.py +++ b/pywb/rewrite/wburl.py @@ -98,6 +98,8 @@ class WbUrl(BaseWbUrl): FIRST_PATH = re.compile('(? http:// # no protocol -> http:// - inx = self.url.find(':/') + #inx = self.url.find('://') + inx = -1 + m = self.SCHEME_RX.match(self.url) + if m: + inx = m.span(1)[0] + #if inx < 0: # check for other partially encoded variants # m = self.PARTIAL_ENC_RX.match(self.url)