From 6acac67d3cbd97cff1df2061222380ac98fa5afb Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Mon, 23 Mar 2015 11:49:24 -0700 Subject: [PATCH] rewrite: fix js rewrite again to ensure '// comments' are not rewritten as scheme-rel urls add tests --- pywb/rewrite/regex_rewriters.py | 2 +- pywb/rewrite/test/test_regex_rewriters.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/pywb/rewrite/regex_rewriters.py b/pywb/rewrite/regex_rewriters.py index 2d32711d..6c7c5ff6 100644 --- a/pywb/rewrite/regex_rewriters.py +++ b/pywb/rewrite/regex_rewriters.py @@ -113,7 +113,7 @@ class JSLinkRewriterMixin(object): """ #JS_HTTPX = r'(?:(?:(?<=["\';])https?:)|(?<=["\']))\\{0,4}/\\{0,4}/[A-Za-z0-9:_@.-]+.*(?=["\s\';&\\])' #JS_HTTPX = r'(?<=["\';])(?:https?:)?\\{0,4}/\\{0,4}/[A-Za-z0-9:_@.\-/\\?&#]+(?=["\';&\\])' - JS_HTTPX = r'(?<=["\';])(?:https?:)?\\{0,4}/\\{0,4}/[A-Za-z0-9:_@.-]' + JS_HTTPX = r'(?<=["\';])(?:https?:)?\\{0,4}/\\{0,4}/[A-Za-z0-9:_@.-][^"\s\';&\\]*(?=["\';&\\])' def __init__(self, rewriter, rules=[]): rules = rules + [ diff --git a/pywb/rewrite/test/test_regex_rewriters.py b/pywb/rewrite/test/test_regex_rewriters.py index 8b0d7c22..d08406b5 100644 --- a/pywb/rewrite/test/test_regex_rewriters.py +++ b/pywb/rewrite/test/test_regex_rewriters.py @@ -32,6 +32,13 @@ r""" >>> _test_js(r'location = "http://example.com/abc.html?^foo=http://abc.example.com"') 'WB_wombat_location = "/web/20131010/http://example.com/abc.html?^foo=http://abc.example.com"' +# don't rewrite comments, only scheme rel urls +>>> _test_js(r'location = "http://example.com/abc.html?^foo=http://abc.example.com";//some comments') +'WB_wombat_location = "/web/20131010/http://example.com/abc.html?^foo=http://abc.example.com";//some comments' + +>>> _test_js(r'location = "//example.com/abc.html?^foo=http://abc.example.com"//some comments') +'WB_wombat_location = "/web/20131010/http://example.com/abc.html?^foo=http://abc.example.com"//some comments' + # not rewritten -- to be handled on client side >>> _test_js(r'location = "/abc.html"') 'WB_wombat_location = "/abc.html"'