diff --git a/pywb/rewrite/test/test_regex_rewriters.py b/pywb/rewrite/test/test_regex_rewriters.py index 253328e5..92975a7f 100644 --- a/pywb/rewrite/test/test_regex_rewriters.py +++ b/pywb/rewrite/test/test_regex_rewriters.py @@ -45,6 +45,16 @@ r""" >>> _test_js('document_domain = "anotherdomain.com"; window.document.domain = "example.com"') 'document_domain = "anotherdomain.com"; window.document.WB_wombat_domain = "example.com"' +# protocol-rel escapes +>>> _test_js('"//example.com/"') +'"/web/20131010/http://example.com/"' + +>>> _test_js(r'"\/\/example.com/"') +'"/web/20131010/http:\\/\\/example.com/"' + +>>> _test_js(r'"\\/\\/example.com/"') +'"/web/20131010/http:\\\\/\\\\/example.com/"' + # custom rules added >>> _test_js('window.location = "http://example.com/abc.html"; some_func(); ', [('some_func\(\).*', RegexRewriter.format('/*{0}*/'), 0)]) 'window.WB_wombat_location = "/web/20131010/http://example.com/abc.html"; /*some_func(); */' diff --git a/pywb/rewrite/test/test_url_rewriter.py b/pywb/rewrite/test/test_url_rewriter.py index be0ca7da..3d324069 100644 --- a/pywb/rewrite/test/test_url_rewriter.py +++ b/pywb/rewrite/test/test_url_rewriter.py @@ -50,6 +50,21 @@ >>> do_rewrite(r'http:\/\/some-other-site.com', '20101226101112/http://example.com/index.html', 'localhost:8080/') 'localhost:8080/20101226101112/http:\\\\/\\\\/some-other-site.com' +>>> do_rewrite(r'//some-other-site.com', '20101226101112/http://example.com/index.html', 'localhost:8080/') +'localhost:8080/20101226101112/http://some-other-site.com' + +>>> do_rewrite(r'\/\/some-other-site.com', '20101226101112/http://example.com/index.html', 'localhost:8080/') +'localhost:8080/20101226101112/http:\\\\/\\\\/some-other-site.com' + +>>> do_rewrite(r'\\/\\/some-other-site.com', '20101226101112/http://example.com/index.html', 'localhost:8080/') +'localhost:8080/20101226101112/http:\\\\/\\\\/some-other-site.com' + +>>> do_rewrite(r'http:\/\/some-other-site.com', '20101226101112/http://example.com/index.html', 'localhost:8080/') +'localhost:8080/20101226101112/http:\\\\/\\\\/some-other-site.com' + +>>> do_rewrite(r'http:\/\/some-other-site.com', '20101226101112/http://example.com/index.html', 'localhost:8080/') +'localhost:8080/20101226101112/http:\\\\/\\\\/some-other-site.com' + >>> do_rewrite('../../other.html', '2020/http://example.com/index.html', '/') '/2020/http://example.com/other.html' diff --git a/pywb/rewrite/url_rewriter.py b/pywb/rewrite/url_rewriter.py index 61a48e50..aa87260c 100644 --- a/pywb/rewrite/url_rewriter.py +++ b/pywb/rewrite/url_rewriter.py @@ -17,7 +17,9 @@ class UrlRewriter(object): PROTOCOLS = ['http:', 'https:', 'ftp:', 'mms:', 'rtsp:', 'wais:'] - def __init__(self, wburl, prefix, full_prefix=None, rel_prefix=None, + REL_SCHEME = ('//', r'\/\/', r'\\/\\/') + + def __init__(self, wburl, prefix, full_prefix=None, rel_prefix=None, root_path=None, cookie_scope=None): self.wburl = wburl if isinstance(wburl, WbUrl) else WbUrl(wburl) self.prefix = prefix @@ -45,7 +47,7 @@ class UrlRewriter(object): is_abs = any(url.startswith(x) for x in self.PROTOCOLS) - if url.startswith('//'): + if url.startswith(self.REL_SCHEME): is_abs = True url = 'http:' + url