1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

rewrite: better fix for multiple ../ in urls, additional tests

This commit is contained in:
Ilya Kreymer 2014-07-14 20:50:45 -07:00
parent 7032160cf9
commit e858b8faae
3 changed files with 27 additions and 3 deletions

View File

@ -89,6 +89,9 @@ r"""
>>> _test_css("background:#abc url('/static/styles/../images/layout/logo.png')")
"background:#abc url('/web/20131010em_/http://example.com/static/images/layout/logo.png')"
>>> _test_css("background:#000 url('/static/styles/../../images/layout/logo.png')")
"background:#000 url('/web/20131010em_/http://example.com/images/layout/logo.png')"
>>> _test_css("background: url('')")
"background: url('')"

View File

@ -1,4 +1,21 @@
"""
# urljoin tests
>>> UrlRewriter.urljoin('http://example.com/test/', '../file.html')
'http://example.com/file.html'
>>> UrlRewriter.urljoin('http://example.com/test/', '../path/../../../file.html')
'http://example.com/file.html'
>>> UrlRewriter.urljoin('http://example.com/test/', '/../file.html')
'http://example.com/file.html'
>>> UrlRewriter.urljoin('http://example.com/', '/abc/../../file.html')
'http://example.com/file.html'
>>> UrlRewriter.urljoin('http://example.com/path/more/', 'abc/../../file.html')
'http://example.com/path/file.html'
# UrlRewriter tests
>>> do_rewrite('other.html', '20131010/http://example.com/path/page.html', 'https://web.archive.org/web/')
'https://web.archive.org/web/20131010/http://example.com/path/other.html'

View File

@ -102,14 +102,18 @@ class UrlRewriter(object):
scheme, netloc, path, query, frag = parts
path_parts = path.split('/')
i = len(path_parts) - 1
while i >= 0:
i = 0
n = len(path_parts) - 1
while i < n:
if path_parts[i] == '..':
del path_parts[i]
n -= 1
if i > 0:
del path_parts[i - 1]
n -= 1
i -= 1
i -= 1
else:
i += 1
if path_parts == ['']:
path = '/'