1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

rewrite: fix rel url resolution to better handle parent rel path.

Explicitly resolve path when possible, remove only if at root level
This commit is contained in:
Ilya Kreymer 2014-07-14 19:13:19 -07:00
parent 1b1a1f8115
commit 7032160cf9
2 changed files with 34 additions and 1 deletions

View File

@ -86,6 +86,9 @@ r"""
>>> _test_css("background: url(file.jpeg)")
'background: url(/web/20131010em_/http://example.com/file.jpeg)'
>>> _test_css("background:#abc url('/static/styles/../images/layout/logo.png')")
"background:#abc url('/web/20131010em_/http://example.com/static/images/layout/logo.png')"
>>> _test_css("background: url('')")
"background: url('')"

View File

@ -57,7 +57,7 @@ class UrlRewriter(object):
else:
# optimize: join if not absolute url, otherwise just use that
if not is_abs:
new_url = urlparse.urljoin(wburl.url, url).replace('../', '')
new_url = self.urljoin(wburl.url, url)
else:
new_url = url
@ -92,6 +92,36 @@ class UrlRewriter(object):
def __repr__(self):
return "UrlRewriter('{0}', '{1}')".format(self.wburl, self.prefix)
@staticmethod
def urljoin(orig_url, url):
new_url = urlparse.urljoin(orig_url, url)
if '../' not in new_url:
return new_url
parts = urlparse.urlsplit(new_url)
scheme, netloc, path, query, frag = parts
path_parts = path.split('/')
i = len(path_parts) - 1
while i >= 0:
if path_parts[i] == '..':
del path_parts[i]
if i > 0:
del path_parts[i - 1]
i -= 1
i -= 1
if path_parts == ['']:
path = '/'
else:
path = '/'.join(path_parts)
parts = (scheme, netloc, path, query, frag)
new_url = urlparse.urlunsplit(parts)
return new_url
#=================================================================
class HttpsUrlRewriter(object):