diff --git a/pywb/rewrite/test/test_regex_rewriters.py b/pywb/rewrite/test/test_regex_rewriters.py index cbd2cb21..4391edee 100644 --- a/pywb/rewrite/test/test_regex_rewriters.py +++ b/pywb/rewrite/test/test_regex_rewriters.py @@ -86,6 +86,9 @@ r""" >>> _test_css("background: url(file.jpeg)") 'background: url(/web/20131010em_/http://example.com/file.jpeg)' +>>> _test_css("background:#abc url('/static/styles/../images/layout/logo.png')") +"background:#abc url('/web/20131010em_/http://example.com/static/images/layout/logo.png')" + >>> _test_css("background: url('')") "background: url('')" diff --git a/pywb/rewrite/url_rewriter.py b/pywb/rewrite/url_rewriter.py index 79136ff5..236aba96 100644 --- a/pywb/rewrite/url_rewriter.py +++ b/pywb/rewrite/url_rewriter.py @@ -57,7 +57,7 @@ class UrlRewriter(object): else: # optimize: join if not absolute url, otherwise just use that if not is_abs: - new_url = urlparse.urljoin(wburl.url, url).replace('../', '') + new_url = self.urljoin(wburl.url, url) else: new_url = url @@ -92,6 +92,36 @@ class UrlRewriter(object): def __repr__(self): return "UrlRewriter('{0}', '{1}')".format(self.wburl, self.prefix) + @staticmethod + def urljoin(orig_url, url): + new_url = urlparse.urljoin(orig_url, url) + if '../' not in new_url: + return new_url + + parts = urlparse.urlsplit(new_url) + scheme, netloc, path, query, frag = parts + + path_parts = path.split('/') + i = len(path_parts) - 1 + while i >= 0: + if path_parts[i] == '..': + del path_parts[i] + if i > 0: + del path_parts[i - 1] + i -= 1 + i -= 1 + + if path_parts == ['']: + path = '/' + else: + path = '/'.join(path_parts) + + parts = (scheme, netloc, path, query, frag) + + + new_url = urlparse.urlunsplit(parts) + return new_url + #================================================================= class HttpsUrlRewriter(object):