From a3b931b45e690fd4d1f4ce86b37c22fbbb64fd3b Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Sat, 1 Nov 2014 15:39:51 -0700 Subject: [PATCH] regex rewrite: fix js regex (dashes), add additional test case --- pywb/rewrite/regex_rewriters.py | 3 ++- pywb/rewrite/test/test_regex_rewriters.py | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/pywb/rewrite/regex_rewriters.py b/pywb/rewrite/regex_rewriters.py index 544e12e9..179e06fd 100644 --- a/pywb/rewrite/regex_rewriters.py +++ b/pywb/rewrite/regex_rewriters.py @@ -111,7 +111,8 @@ class JSLinkOnlyRewriter(RegexRewriter): JS Rewriter which rewrites absolute http://, https:// and // urls at the beginning of a string """ - JS_HTTPX = r'(?<="|\'|;)(?:https?:)?\\{0,4}/\\{0,4}/[A-Za-z0-9:_@.-]+' + #JS_HTTPX = r'(?:(?:(?<=["\';])https?:)|(?<=["\']))\\{0,4}/\\{0,4}/[A-Za-z0-9:_@.-]+.*(?=["\s\';&\\])' + JS_HTTPX = r'(?<=["\';])(?:https?:)?\\{0,4}/\\{0,4}/[A-Za-z0-9:_@.\-/\\?&#]+(?=["\';&\\])' def __init__(self, rewriter, rules=[]): rules = rules + [ diff --git a/pywb/rewrite/test/test_regex_rewriters.py b/pywb/rewrite/test/test_regex_rewriters.py index e0a95a84..253328e5 100644 --- a/pywb/rewrite/test/test_regex_rewriters.py +++ b/pywb/rewrite/test/test_regex_rewriters.py @@ -61,6 +61,9 @@ r""" >>> _test_js('"http:\\/\\/www.example.com\\/some\\/path\\/?query=1"') '"/web/20131010/http:\\/\\/www.example.com\\/some\\/path\\/?query=1"' +>>> _test_js('"http:\/\/sub-site.example.com\/path-dashes\/path_other\/foo_bar.txt"') +'"/web/20131010/http:\\/\\/sub-site.example.com\\/path-dashes\\/path_other\\/foo_bar.txt"' + #================================================================= # XML Rewriting