mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
wburl improved scheme detection: use regex to match acceptable scheme before :/, don't treat something like 'a.com/?x=http://' as having a scheme, update tests to check for this
This commit is contained in:
parent
9a3017bfcd
commit
1bb7aa01ce
@ -20,6 +20,9 @@ u"""
|
|||||||
>>> repr(WbUrl('cs_/example.com'))
|
>>> repr(WbUrl('cs_/example.com'))
|
||||||
"('latest_replay', '', 'cs_', 'http://example.com', 'cs_/http://example.com')"
|
"('latest_replay', '', 'cs_', 'http://example.com', 'cs_/http://example.com')"
|
||||||
|
|
||||||
|
>>> repr(WbUrl('cs_/example.com/?foo=http://example.com/'))
|
||||||
|
"('latest_replay', '', 'cs_', 'http://example.com/?foo=http://example.com/', 'cs_/http://example.com/?foo=http://example.com/')"
|
||||||
|
|
||||||
>>> repr(WbUrl('im_/20130102.org'))
|
>>> repr(WbUrl('im_/20130102.org'))
|
||||||
"('latest_replay', '', 'im_', 'http://20130102.org', 'im_/http://20130102.org')"
|
"('latest_replay', '', 'im_', 'http://20130102.org', 'im_/http://20130102.org')"
|
||||||
|
|
||||||
|
@ -98,6 +98,8 @@ class WbUrl(BaseWbUrl):
|
|||||||
|
|
||||||
FIRST_PATH = re.compile('(?<![:/])[/?](?![/])')
|
FIRST_PATH = re.compile('(?<![:/])[/?](?![/])')
|
||||||
|
|
||||||
|
SCHEME_RX = re.compile('[a-zA-Z0-9+-.]+(:/)')
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def percent_encode_host(url):
|
def percent_encode_host(url):
|
||||||
@ -200,7 +202,12 @@ class WbUrl(BaseWbUrl):
|
|||||||
|
|
||||||
# protocol agnostic url -> http://
|
# protocol agnostic url -> http://
|
||||||
# no protocol -> http://
|
# no protocol -> http://
|
||||||
inx = self.url.find(':/')
|
#inx = self.url.find('://')
|
||||||
|
inx = -1
|
||||||
|
m = self.SCHEME_RX.match(self.url)
|
||||||
|
if m:
|
||||||
|
inx = m.span(1)[0]
|
||||||
|
|
||||||
#if inx < 0:
|
#if inx < 0:
|
||||||
# check for other partially encoded variants
|
# check for other partially encoded variants
|
||||||
# m = self.PARTIAL_ENC_RX.match(self.url)
|
# m = self.PARTIAL_ENC_RX.match(self.url)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user