diff --git a/pywb/rewrite/regex_rewriters.py b/pywb/rewrite/regex_rewriters.py index 2d32711d..6c7c5ff6 100644 --- a/pywb/rewrite/regex_rewriters.py +++ b/pywb/rewrite/regex_rewriters.py @@ -113,7 +113,7 @@ class JSLinkRewriterMixin(object): """ #JS_HTTPX = r'(?:(?:(?<=["\';])https?:)|(?<=["\']))\\{0,4}/\\{0,4}/[A-Za-z0-9:_@.-]+.*(?=["\s\';&\\])' #JS_HTTPX = r'(?<=["\';])(?:https?:)?\\{0,4}/\\{0,4}/[A-Za-z0-9:_@.\-/\\?&#]+(?=["\';&\\])' - JS_HTTPX = r'(?<=["\';])(?:https?:)?\\{0,4}/\\{0,4}/[A-Za-z0-9:_@.-]' + JS_HTTPX = r'(?<=["\';])(?:https?:)?\\{0,4}/\\{0,4}/[A-Za-z0-9:_@.-][^"\s\';&\\]*(?=["\';&\\])' def __init__(self, rewriter, rules=[]): rules = rules + [ diff --git a/pywb/rewrite/test/test_regex_rewriters.py b/pywb/rewrite/test/test_regex_rewriters.py index 8b0d7c22..d08406b5 100644 --- a/pywb/rewrite/test/test_regex_rewriters.py +++ b/pywb/rewrite/test/test_regex_rewriters.py @@ -32,6 +32,13 @@ r""" >>> _test_js(r'location = "http://example.com/abc.html?^foo=http://abc.example.com"') 'WB_wombat_location = "/web/20131010/http://example.com/abc.html?^foo=http://abc.example.com"' +# don't rewrite comments, only scheme rel urls +>>> _test_js(r'location = "http://example.com/abc.html?^foo=http://abc.example.com";//some comments') +'WB_wombat_location = "/web/20131010/http://example.com/abc.html?^foo=http://abc.example.com";//some comments' + +>>> _test_js(r'location = "//example.com/abc.html?^foo=http://abc.example.com"//some comments') +'WB_wombat_location = "/web/20131010/http://example.com/abc.html?^foo=http://abc.example.com"//some comments' + # not rewritten -- to be handled on client side >>> _test_js(r'location = "/abc.html"') 'WB_wombat_location = "/abc.html"'