mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
rewrite: properly rewrite scheme relative JS-escaped urls:
'\/\/example.com', '\\/\\/example.com/', treat same as '//example.com' adding http: prefix
This commit is contained in:
parent
b8b8c30573
commit
d7eb40af20
@ -45,6 +45,16 @@ r"""
|
|||||||
>>> _test_js('document_domain = "anotherdomain.com"; window.document.domain = "example.com"')
|
>>> _test_js('document_domain = "anotherdomain.com"; window.document.domain = "example.com"')
|
||||||
'document_domain = "anotherdomain.com"; window.document.WB_wombat_domain = "example.com"'
|
'document_domain = "anotherdomain.com"; window.document.WB_wombat_domain = "example.com"'
|
||||||
|
|
||||||
|
# protocol-rel escapes
|
||||||
|
>>> _test_js('"//example.com/"')
|
||||||
|
'"/web/20131010/http://example.com/"'
|
||||||
|
|
||||||
|
>>> _test_js(r'"\/\/example.com/"')
|
||||||
|
'"/web/20131010/http:\\/\\/example.com/"'
|
||||||
|
|
||||||
|
>>> _test_js(r'"\\/\\/example.com/"')
|
||||||
|
'"/web/20131010/http:\\\\/\\\\/example.com/"'
|
||||||
|
|
||||||
# custom rules added
|
# custom rules added
|
||||||
>>> _test_js('window.location = "http://example.com/abc.html"; some_func(); ', [('some_func\(\).*', RegexRewriter.format('/*{0}*/'), 0)])
|
>>> _test_js('window.location = "http://example.com/abc.html"; some_func(); ', [('some_func\(\).*', RegexRewriter.format('/*{0}*/'), 0)])
|
||||||
'window.WB_wombat_location = "/web/20131010/http://example.com/abc.html"; /*some_func(); */'
|
'window.WB_wombat_location = "/web/20131010/http://example.com/abc.html"; /*some_func(); */'
|
||||||
|
@ -50,6 +50,21 @@
|
|||||||
>>> do_rewrite(r'http:\/\/some-other-site.com', '20101226101112/http://example.com/index.html', 'localhost:8080/')
|
>>> do_rewrite(r'http:\/\/some-other-site.com', '20101226101112/http://example.com/index.html', 'localhost:8080/')
|
||||||
'localhost:8080/20101226101112/http:\\\\/\\\\/some-other-site.com'
|
'localhost:8080/20101226101112/http:\\\\/\\\\/some-other-site.com'
|
||||||
|
|
||||||
|
>>> do_rewrite(r'//some-other-site.com', '20101226101112/http://example.com/index.html', 'localhost:8080/')
|
||||||
|
'localhost:8080/20101226101112/http://some-other-site.com'
|
||||||
|
|
||||||
|
>>> do_rewrite(r'\/\/some-other-site.com', '20101226101112/http://example.com/index.html', 'localhost:8080/')
|
||||||
|
'localhost:8080/20101226101112/http:\\\\/\\\\/some-other-site.com'
|
||||||
|
|
||||||
|
>>> do_rewrite(r'\\/\\/some-other-site.com', '20101226101112/http://example.com/index.html', 'localhost:8080/')
|
||||||
|
'localhost:8080/20101226101112/http:\\\\/\\\\/some-other-site.com'
|
||||||
|
|
||||||
|
>>> do_rewrite(r'http:\/\/some-other-site.com', '20101226101112/http://example.com/index.html', 'localhost:8080/')
|
||||||
|
'localhost:8080/20101226101112/http:\\\\/\\\\/some-other-site.com'
|
||||||
|
|
||||||
|
>>> do_rewrite(r'http:\/\/some-other-site.com', '20101226101112/http://example.com/index.html', 'localhost:8080/')
|
||||||
|
'localhost:8080/20101226101112/http:\\\\/\\\\/some-other-site.com'
|
||||||
|
|
||||||
>>> do_rewrite('../../other.html', '2020/http://example.com/index.html', '/')
|
>>> do_rewrite('../../other.html', '2020/http://example.com/index.html', '/')
|
||||||
'/2020/http://example.com/other.html'
|
'/2020/http://example.com/other.html'
|
||||||
|
|
||||||
|
@ -17,7 +17,9 @@ class UrlRewriter(object):
|
|||||||
|
|
||||||
PROTOCOLS = ['http:', 'https:', 'ftp:', 'mms:', 'rtsp:', 'wais:']
|
PROTOCOLS = ['http:', 'https:', 'ftp:', 'mms:', 'rtsp:', 'wais:']
|
||||||
|
|
||||||
def __init__(self, wburl, prefix, full_prefix=None, rel_prefix=None,
|
REL_SCHEME = ('//', r'\/\/', r'\\/\\/')
|
||||||
|
|
||||||
|
def __init__(self, wburl, prefix, full_prefix=None, rel_prefix=None,
|
||||||
root_path=None, cookie_scope=None):
|
root_path=None, cookie_scope=None):
|
||||||
self.wburl = wburl if isinstance(wburl, WbUrl) else WbUrl(wburl)
|
self.wburl = wburl if isinstance(wburl, WbUrl) else WbUrl(wburl)
|
||||||
self.prefix = prefix
|
self.prefix = prefix
|
||||||
@ -45,7 +47,7 @@ class UrlRewriter(object):
|
|||||||
|
|
||||||
is_abs = any(url.startswith(x) for x in self.PROTOCOLS)
|
is_abs = any(url.startswith(x) for x in self.PROTOCOLS)
|
||||||
|
|
||||||
if url.startswith('//'):
|
if url.startswith(self.REL_SCHEME):
|
||||||
is_abs = True
|
is_abs = True
|
||||||
url = 'http:' + url
|
url = 'http:' + url
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user