From b21e2880632167d852a5089db819d67ac35c6b02 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Sat, 11 Apr 2015 10:21:52 -0700 Subject: [PATCH] wburl to_uri: when parsing scheme, also for first '?' in addition to '/' to catch any irregular urls (to be ignored) --- pywb/rewrite/test/test_wburl.py | 6 ++++++ pywb/rewrite/wburl.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/pywb/rewrite/test/test_wburl.py b/pywb/rewrite/test/test_wburl.py index d012d9ee..dea03a47 100644 --- a/pywb/rewrite/test/test_wburl.py +++ b/pywb/rewrite/test/test_wburl.py @@ -76,6 +76,12 @@ http://xn--e1afmkfd.xn--80akhbyknj4f >>> print(WbUrl.to_uri('https://xn--e1afmkfd.xn--80akhbyknj4f/foo/bar?abc=def')) https://xn--e1afmkfd.xn--80akhbyknj4f/foo/bar?abc=def +>>> print(WbUrl.to_uri('somescheme://test?foo=bar%9F')) +somescheme://test?foo=bar%9F + +>>> print(WbUrl.to_uri('/test/foo=bar%9F')) +/test/foo=bar%9F + # truncated >>> print(WbUrl.to_uri('http://' + quote_plus(u'пример.испытание'.encode('utf-8'))[1:])) http://xn--d0-olcluwd.xn--80akhbyknj4f diff --git a/pywb/rewrite/wburl.py b/pywb/rewrite/wburl.py index d2711956..b7d90449 100644 --- a/pywb/rewrite/wburl.py +++ b/pywb/rewrite/wburl.py @@ -91,7 +91,7 @@ class WbUrl(BaseWbUrl): DEFAULT_SCHEME = 'http://' - FIRST_PATH = re.compile('(?