1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

wburl to_uri: when parsing scheme, also for first '?' in addition to '/' to catch any irregular urls (to be ignored)

This commit is contained in:
Ilya Kreymer 2015-04-11 10:21:52 -07:00
parent d39f26872d
commit b21e288063
2 changed files with 7 additions and 1 deletions

View File

@ -76,6 +76,12 @@ http://xn--e1afmkfd.xn--80akhbyknj4f
>>> print(WbUrl.to_uri('https://xn--e1afmkfd.xn--80akhbyknj4f/foo/bar?abc=def'))
https://xn--e1afmkfd.xn--80akhbyknj4f/foo/bar?abc=def
>>> print(WbUrl.to_uri('somescheme://test?foo=bar%9F'))
somescheme://test?foo=bar%9F
>>> print(WbUrl.to_uri('/test/foo=bar%9F'))
/test/foo=bar%9F
# truncated
>>> print(WbUrl.to_uri('http://' + quote_plus(u'пример.испытание'.encode('utf-8'))[1:]))
http://xn--d0-olcluwd.xn--80akhbyknj4f

View File

@ -91,7 +91,7 @@ class WbUrl(BaseWbUrl):
DEFAULT_SCHEME = 'http://'
FIRST_PATH = re.compile('(?<![:/])/(?![/])')
FIRST_PATH = re.compile('(?<![:/])[/?](?![/])')
@staticmethod