mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
wburl: detect and decode partially encoded schemes in url, such as http%3A//,
https%A2F2F// before handling further add additional tests for wburl
This commit is contained in:
parent
d7eb40af20
commit
c996e70a6e
@ -131,7 +131,7 @@ class WbRequest(object):
|
||||
if not self.wb_url:
|
||||
return
|
||||
|
||||
mime = self.env.get('CONTENT_TYPE').split(';')[0]
|
||||
mime = self.env.get('CONTENT_TYPE', '').split(';')[0]
|
||||
length = self.env.get('CONTENT_LENGTH')
|
||||
stream = self.env['wsgi.input']
|
||||
|
||||
|
@ -26,6 +26,13 @@
|
||||
>>> repr(WbUrl('https://example.com/xyz?a=%2f&b=%2E'))
|
||||
"('latest_replay', '', '', 'https://example.com/xyz?a=%2f&b=%2E', 'https://example.com/xyz?a=%2f&b=%2E')"
|
||||
|
||||
# Test scheme partially encoded urls
|
||||
>>> repr(WbUrl('https%3A//example.com/'))
|
||||
"('latest_replay', '', '', 'https://example.com/', 'https://example.com/')"
|
||||
|
||||
>>> repr(WbUrl('2014/http%3A%2F%2Fexample.com/'))
|
||||
"('replay', '2014', '', 'http://example.com/', '2014/http://example.com/')"
|
||||
|
||||
# Query Urls
|
||||
# ======================
|
||||
>>> repr(WbUrl('*/http://example.com/abc?def=a'))
|
||||
@ -57,6 +64,21 @@
|
||||
>>> repr(WbUrl('/example.com/'))
|
||||
"('latest_replay', '', '', 'http://example.com/', 'http://example.com/')"
|
||||
|
||||
# Is_ Tests
|
||||
>>> u = WbUrl('*/http://example.com/abc?def=a*')
|
||||
>>> u.is_url_query()
|
||||
True
|
||||
|
||||
>>> u.is_query()
|
||||
True
|
||||
|
||||
>>> u2 = WbUrl('20130102im_/https:/example.com')
|
||||
>>> u2.is_embed
|
||||
True
|
||||
|
||||
>>> u2.is_replay()
|
||||
True
|
||||
|
||||
|
||||
# Error Urls
|
||||
# ======================
|
||||
|
@ -85,6 +85,9 @@ class WbUrl(BaseWbUrl):
|
||||
REPLAY_REGEX = re.compile('^(\d*)([a-z]+_)?/{0,3}(.+)$')
|
||||
|
||||
DEFAULT_SCHEME = 'http://'
|
||||
|
||||
PARTIAL_ENC_RX = re.compile('(https?%3A)?(%2F%2F)?', re.I)
|
||||
|
||||
# ======================
|
||||
|
||||
def __init__(self, url):
|
||||
@ -99,6 +102,14 @@ class WbUrl(BaseWbUrl):
|
||||
# protocol agnostic url -> http://
|
||||
# no protocol -> http://
|
||||
inx = self.url.find(':/')
|
||||
if inx < 0:
|
||||
# check for other partially encoded variants
|
||||
m = self.PARTIAL_ENC_RX.match(self.url)
|
||||
if m:
|
||||
len_ = len(m.group(0))
|
||||
self.url = urllib.unquote_plus(self.url[:len_]) + self.url[len_:]
|
||||
inx = self.url.find(':/')
|
||||
|
||||
if inx < 0:
|
||||
self.url = self.DEFAULT_SCHEME + self.url
|
||||
else:
|
||||
|
Loading…
x
Reference in New Issue
Block a user