1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

RegexRewriter Optimization (#354)

* bump version to 2.0.5

* regexrewriter: work on splitting rules into separate class hierarchy from rewriter.
rules logic and regexs can be inited once, while rewriter is per response being rewritten

* regexrewriter: refactor remaining rewriters to use a shared rules factory to avoid reiniting rules

* fix spacing

* fixes: ensure custom rules added first, fix fb rewrite_dash
content_rewriter tests: update tests to check with location-only and js obj proxy rewriter, check fb dash rewriter

* simplify JSNoneRewriter
This commit is contained in:
Ilya Kreymer 2018-08-05 16:40:19 -07:00 committed by GitHub
parent 2f062cf5c7
commit 973a2dcff9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 250 additions and 176 deletions

View File

@ -5,52 +5,118 @@ from six.moves.urllib.parse import unquote
# =================================================================
class RegexRewriter(StreamingRewriter):
# @staticmethod
# def comment_out(string):
# return '/*' + string + '*/'
class RxRules(object):
HTTPX_MATCH_STR = r'https?:\\?/\\?/[A-Za-z0-9:_@.-]+'
@staticmethod
def format(template):
return lambda string: template.format(string)
@staticmethod
def fixed(string):
return lambda _: string
@staticmethod
def remove_https(string):
def remove_https(string, _):
return string.replace("https", "http")
@staticmethod
def replace_str(replacer):
return lambda x, _: x.replace('this', replacer)
@staticmethod
def format(template):
return lambda string, _: template.format(string)
@staticmethod
def fixed(string):
return lambda _, _2: string
@staticmethod
def archival_rewrite():
return lambda string, rewriter: rewriter.rewrite(string)
@staticmethod
def add_prefix(prefix):
return lambda string: prefix + string
return lambda string, _: prefix + string
@staticmethod
def add_suffix(suffix):
return lambda string: string + suffix
return lambda string, _: string + suffix
@staticmethod
def archival_rewrite(rewriter):
return lambda string: rewriter.rewrite(string)
HTTPX_MATCH_STR = r'https?:\\?/\\?/[A-Za-z0-9:_@.-]+'
# DEFAULT_OP = add_prefix
def __init__(self, rewriter, rules):
super(RegexRewriter, self).__init__(rewriter)
# rules = self.create_rules(http_prefix)
def compile_rules(rules):
# Build regexstr, concatenating regex list
regex_str = '|'.join(['(' + rx + ')' for rx, op, count in rules])
# ensure it's not middle of a word, wrap in non-capture group
regex_str = '(?<!\w)(?:' + regex_str + ')'
self.regex = re.compile(regex_str, re.M)
self.rules = rules
return re.compile(regex_str, re.M)
def __init__(self, rules=None):
self.rules = rules or []
self.regex = self.compile_rules(self.rules)
def __call__(self, extra_rules=None):
if not extra_rules:
return self.rules, self.regex
all_rules = extra_rules + self.rules
regex = self.compile_rules(all_rules)
return all_rules, regex
# =================================================================
class JSWombatProxyRules(RxRules):
def __init__(self):
local_init_func = '\nvar {0} = function(name) {{\
return (self._wb_wombat && self._wb_wombat.local_init &&\
self._wb_wombat.local_init(name)) || self[name]; }};\n\
if (!self.__WB_pmw) {{ self.__WB_pmw = function(obj) {{ return obj; }} }}\n\
{{\n'
local_init_func_name = '_____WB$wombat$assign$function_____'
local_var_line = 'let {0} = {1}("{0}");'
this_rw = '(this && this._WB_wombat_obj_proxy || this)'
check_loc = '(self.__WB_check_loc && self.__WB_check_loc(location) || {}).href = '
self.local_objs = [
'window',
'self',
'document',
'location',
'top',
'parent',
'frames',
'opener']
local_declares = '\n'.join([local_var_line.format(obj, local_init_func_name) for obj in self.local_objs])
prop_str = '|'.join(self.local_objs)
rules = [
(r'(?<=\.)postMessage\b\(', self.add_prefix('__WB_pmw(self).'), 0),
(r'(?<!\.)\blocation\b\s*[=]\s*(?![=])', self.add_suffix(check_loc), 0),
(r'\breturn\s+this\b\s*(?![.$])', self.replace_str(this_rw), 0),
(r'(?<=[\n])\s*this\b(?=(?:\.(?:{0})\b))'.format(prop_str), self.replace_str(';' + this_rw), 0),
(r'(?<![$.])\s*this\b(?=(?:\.(?:{0})\b))'.format(prop_str), self.replace_str(this_rw), 0),
(r'(?<=[=])\s*this\b\s*(?![.$])', self.replace_str(this_rw), 0),
('\}(?:\s*\))?\s*\(this\)', self.replace_str(this_rw), 0),
(r'(?<=[^|&][|&]{2})\s*this\b\s*(?![|&.$]([^|&]|$))', self.replace_str(this_rw), 0),
]
super(JSWombatProxyRules, self).__init__(rules)
self.first_buff = local_init_func.format(local_init_func_name) + local_declares
self.last_buff = '\n\n}'
# =================================================================
class RegexRewriter(StreamingRewriter):
rules_factory = RxRules()
def __init__(self, rewriter, extra_rules=None, first_buff=''):
super(RegexRewriter, self).__init__(rewriter, first_buff=first_buff)
# rules = self.create_rules(http_prefix)
self.rules, self.regex = self.rules_factory(extra_rules)
def filter(self, m):
return True
@ -79,7 +145,7 @@ class RegexRewriter(StreamingRewriter):
# if not hasattr(op, '__call__'):
# op = RegexRewriter.DEFAULT_OP(op)
result = op(m.group(i))
result = op(m.group(i), self.url_rewriter)
final_str = result
# if extracting partial match
@ -96,11 +162,11 @@ class RegexRewriter(StreamingRewriter):
def parse_rule(obj):
match = obj.get('match')
if 'rewrite' in obj:
replace = RegexRewriter.archival_rewrite(rewriter)
replace = RxRules.archival_rewrite()
elif 'function' in obj:
replace = load_py_name(obj['function'])
else:
replace = RegexRewriter.format(obj.get('replace', '{0}'))
replace = RxRules.format(obj.get('replace', '{0}'))
group = obj.get('group', 0)
result = (match, replace, group)
return result
@ -111,9 +177,32 @@ class RegexRewriter(StreamingRewriter):
# =================================================================
class JSLinkRewriterMixin(object):
class JSLocationRewriterRules(RxRules):
"""
JS Rewriter which rewrites absolute http://, https:// and // urls
JS Rewriter mixin which rewrites location and domain to the
specified prefix (default: ``WB_wombat_``)
"""
def __init__(self, prefix='WB_wombat_'):
super(JSLocationRewriterRules, self).__init__(self.get_rules(prefix))
def get_rules(self, prefix):
rules = [
(r'(?<![$\'"])\b(?:location|top)\b(?![$\'":])', self.add_prefix(prefix), 0),
(r'(?<=[?])\s*(?:\w+[.])?(location)\s*(?=[:])', self.add_prefix(prefix), 1),
(r'(?<=\.)postMessage\b\(', self.add_prefix('__WB_pmw(self.window).'), 0),
(r'(?<=\.)frameElement\b', self.add_prefix(prefix), 0),
]
return rules
# =================================================================
class JSLinkAndLocationRewriterRules(JSLocationRewriterRules):
"""
JS Rewriter rules which also rewrite absolute http://, https:// and // urls
at the beginning of a string
"""
# JS_HTTPX = r'(?:(?:(?<=["\';])https?:)|(?<=["\']))\\{0,4}/\\{0,4}/[A-Za-z0-9:_@.-]+.*(?=["\s\';&\\])'
@ -122,94 +211,44 @@ class JSLinkRewriterMixin(object):
# JS_HTTPX = r'(?:(?<=["\';])https?:|(?<=["\']))\\{0,4}/\\{0,4}/[A-Za-z0-9:_@.-][^"\s\';&\\]*(?=["\';&\\])'
JS_HTTPX = r'(?:(?<=["\';])https?:|(?<=["\']))\\{0,4}/\\{0,4}/[A-Za-z0-9:_@%.\\-]+/'
def __init__(self, rewriter, rules=[]):
rules = rules + [
(self.JS_HTTPX, RegexRewriter.archival_rewrite(rewriter), 0)
]
super(JSLinkRewriterMixin, self).__init__(rewriter, rules)
def get_rules(self, prefix):
rules = super(JSLinkAndLocationRewriterRules, self).get_rules(prefix)
rules.append((self.JS_HTTPX, RxRules.archival_rewrite(), 0))
return rules
# =================================================================
class JSLocationRewriterMixin(object):
"""
JS Rewriter mixin which rewrites location and domain to the
specified prefix (default: ``WB_wombat_``)
"""
def __init__(self, rewriter, rules=[], prefix='WB_wombat_'):
rules = rules + [
(r'(?<![$\'"])\b(?:location|top)\b(?![$\'":])', RegexRewriter.add_prefix(prefix), 0),
(r'(?<=[?])\s*(?:\w+[.])?(location)\s*(?=[:])', RegexRewriter.add_prefix(prefix), 1),
(r'(?<=\.)postMessage\b\(', RegexRewriter.add_prefix('__WB_pmw(self.window).'), 0),
(r'(?<=\.)frameElement\b', RegexRewriter.add_prefix(prefix), 0),
]
super(JSLocationRewriterMixin, self).__init__(rewriter, rules)
class JSLocationOnlyRewriter(RegexRewriter):
rules_factory = JSLocationRewriterRules()
# =================================================================
class JSWombatProxyRewriterMixin(object):
class JSLinkAndLocationRewriter(RegexRewriter):
rules_factory = JSLinkAndLocationRewriterRules()
JSRewriter = JSLinkAndLocationRewriter
# =================================================================
class JSWombatProxyRewriter(RegexRewriter):
"""
JS Rewriter mixin which wraps the contents of the
script in an anonymous block scope and inserts
Wombat js-proxy setup
"""
local_init_func = '\nvar {0} = function(name) {{\
return (self._wb_wombat && self._wb_wombat.local_init &&\
self._wb_wombat.local_init(name)) || self[name]; }};\n\
if (!self.__WB_pmw) {{ self.__WB_pmw = function(obj) {{ return obj; }} }}\n\
{{\n'
rules_factory = JSWombatProxyRules()
local_init_func_name = '_____WB$wombat$assign$function_____'
def __init__(self, rewriter, extra_rules=None):
super(JSWombatProxyRewriter, self).__init__(rewriter, extra_rules=extra_rules)
local_var_line = 'let {0} = {1}("{0}");'
local_objs = ['window',
'self',
'document',
'location',
'top',
'parent',
'frames',
'opener']
THIS_RW = '(this && this._WB_wombat_obj_proxy || this)'
CHECK_LOC = '(self.__WB_check_loc && self.__WB_check_loc(location) || {}).href = '
@classmethod
def replace_str(cls, replacer):
return lambda x: x.replace('this', replacer)
def __init__(self, rewriter, rules=[]):
#func_rw = 'Function("return {0}")'.format(self.THIS_RW)
prop_str = '|'.join(self.local_objs)
rules = rules + [
(r'(?<=\.)postMessage\b\(', self.add_prefix('__WB_pmw(self).'), 0),
(r'(?<!\.)\blocation\b\s*[=]\s*(?![=])', self.add_suffix(self.CHECK_LOC), 0),
(r'\breturn\s+this\b\s*(?![.$])', self.replace_str(self.THIS_RW), 0),
(r'(?<=[\n])\s*this\b(?=(?:\.(?:{0})\b))'.format(prop_str), self.replace_str(';' + self.THIS_RW), 0),
(r'(?<![$.])\s*this\b(?=(?:\.(?:{0})\b))'.format(prop_str), self.replace_str(self.THIS_RW), 0),
(r'(?<=[=])\s*this\b\s*(?![.$])', self.replace_str(self.THIS_RW), 0),
('\}(?:\s*\))?\s*\(this\)', self.replace_str(self.THIS_RW), 0),
(r'(?<=[^|&][|&]{2})\s*this\b\s*(?![|&.$]([^|&]|$))', self.replace_str(self.THIS_RW), 0),
]
super(JSWombatProxyRewriterMixin, self).__init__(rewriter, rules)
local_declares = '\n'.join([self.local_var_line.format(obj, self.local_init_func_name) for obj in self.local_objs])
self.first_buff = self.local_init_func.format(self.local_init_func_name) + local_declares
self.last_buff = '\n\n}'
self.first_buff = self.rules_factory.first_buff
self.last_buff = self.rules_factory.last_buff
self.local_objs = self.rules_factory.local_objs
def rewrite_complete(self, string, **kwargs):
if not kwargs.get('inline_attr'):
return super(JSWombatProxyRewriterMixin, self).rewrite_complete(string)
return super(JSWombatProxyRewriter, self).rewrite_complete(string)
# check if any of the wrapped objects are used in the script
# if not, don't rewrite
@ -231,31 +270,8 @@ if (!self.__WB_pmw) {{ self.__WB_pmw = function(obj) {{ return obj; }} }}\n\
return self.last_buff
# =================================================================
class JSLocationOnlyRewriter(JSLocationRewriterMixin, RegexRewriter):
pass
# =================================================================
class JSLinkOnlyRewriter(JSLinkRewriterMixin, RegexRewriter):
pass
# =================================================================
class JSLinkAndLocationRewriter(JSLocationRewriterMixin,
JSLinkRewriterMixin,
RegexRewriter):
pass
# =================================================================
class JSNoneRewriter(RegexRewriter):
def __init__(self, rewriter, rules=[]):
super(JSNoneRewriter, self).__init__(rewriter, rules)
# =================================================================
class JSWombatProxyRewriter(JSWombatProxyRewriterMixin, RegexRewriter):
pass
@ -287,16 +303,40 @@ class JSReplaceFuzzy(object):
# =================================================================
# Set 'default' JSRewriter
JSRewriter = JSLinkAndLocationRewriter
class CSSRules(RxRules):
CSS_URL_REGEX = "url\\s*\\(\\s*(?:[\\\\\"']|(?:&.{1,4};))*\\s*([^)'\"]+)\\s*(?:[\\\\\"']|(?:&.{1,4};))*\\s*\\)"
CSS_IMPORT_NO_URL_REGEX = ("@import\\s+(?!url)\\(?\\s*['\"]?" +
"(?!url[\\s\\(])([\w.:/\\\\-]+)")
def __init__(self):
rules = [
(self.CSS_URL_REGEX, self.archival_rewrite(), 1),
(self.CSS_IMPORT_NO_URL_REGEX, self.archival_rewrite(), 1),
]
super(CSSRules, self).__init__(rules)
# =================================================================
class CSSRewriter(RegexRewriter):
rules_factory = CSSRules()
# =================================================================
class XMLRules(RxRules):
def __init__(self):
rules = [
('([A-Za-z:]+[\s=]+)?["\'\s]*(' +
self.HTTPX_MATCH_STR + ')',
self.archival_rewrite(), 2),
]
super(XMLRules, self).__init__(rules)
# =================================================================
class XMLRewriter(RegexRewriter):
def __init__(self, rewriter, extra=[]):
rules = self._create_rules(rewriter)
super(XMLRewriter, self).__init__(rewriter, rules)
rules_factory = XMLRules()
# custom filter to reject 'xmlns' attr
def filter(self, m):
@ -306,30 +346,5 @@ class XMLRewriter(RegexRewriter):
return True
def _create_rules(self, rewriter):
return [
('([A-Za-z:]+[\s=]+)?["\'\s]*(' +
RegexRewriter.HTTPX_MATCH_STR + ')',
RegexRewriter.archival_rewrite(rewriter), 2),
]
# =================================================================
class CSSRewriter(RegexRewriter):
CSS_URL_REGEX = "url\\s*\\(\\s*(?:[\\\\\"']|(?:&.{1,4};))*\\s*([^)'\"]+)\\s*(?:[\\\\\"']|(?:&.{1,4};))*\\s*\\)"
CSS_IMPORT_NO_URL_REGEX = ("@import\\s+(?!url)\\(?\\s*['\"]?" +
"(?!url[\\s\\(])([\w.:/\\\\-]+)")
def __init__(self, rewriter):
rules = self._create_rules(rewriter)
super(CSSRewriter, self).__init__(rewriter, rules)
def _create_rules(self, rewriter):
return [
(CSSRewriter.CSS_URL_REGEX,
RegexRewriter.archival_rewrite(rewriter), 1),
(CSSRewriter.CSS_IMPORT_NO_URL_REGEX,
RegexRewriter.archival_rewrite(rewriter), 1),
]

View File

@ -58,7 +58,7 @@ class RewriteDASH(BufferedRewriter):
# ============================================================================
def rewrite_fb_dash(string):
def rewrite_fb_dash(string, *args):
DASH_SPLIT = r'\n",dash_prefetched_representation_ids:'
inx = string.find(DASH_SPLIT)
if inx < 0:

View File

@ -10,7 +10,7 @@ from pywb.utils.io import chunk_encode_iter
from pywb.rewrite.wburl import WbUrl
from pywb.rewrite.url_rewriter import UrlRewriter
from pywb.rewrite.default_rewriter import DefaultRewriter
from pywb.rewrite.default_rewriter import DefaultRewriter, RewriterWithJSProxy
from pywb import get_test_dir
@ -35,6 +35,7 @@ class TestContentRewriter(object):
@classmethod
def setup_class(self):
self.content_rewriter = DefaultRewriter()
self.js_proxy_content_rewriter = RewriterWithJSProxy()
def _create_response_record(self, url, headers, payload, warc_headers):
writer = BufferWARCWriter()
@ -53,7 +54,7 @@ class TestContentRewriter(object):
def rewrite_record(self, headers, content, ts, url='http://example.com/',
prefix='http://localhost:8080/prefix/', warc_headers=None,
request_url=None, is_live=None):
request_url=None, is_live=None, use_js_proxy=True):
record = self._create_response_record(url, headers, content, warc_headers)
@ -68,7 +69,10 @@ class TestContentRewriter(object):
cdx['is_fuzzy'] = '1'
cdx['is_live'] = is_live
return self.content_rewriter(record, url_rewriter, None, cdx=cdx)
if use_js_proxy:
return self.js_proxy_content_rewriter(record, url_rewriter, None, cdx=cdx)
else:
return self.content_rewriter(record, url_rewriter, None, cdx=cdx)
def test_rewrite_html(self, headers):
content = '<html><body><a href="http://example.com/"></a></body></html>'
@ -109,17 +113,34 @@ class TestContentRewriter(object):
assert ('Content-Type', 'text/html') in headers.headers
exp = '<html><body><a href="http://localhost:8080/prefix/201701/http://example.com/"></a></body></html>'
assert b''.join(gen).decode('utf-8') == exp
result = b''.join(gen).decode('utf-8')
assert exp == result
def test_rewrite_js_mod(self, headers):
content = 'function() { location.href = "http://example.com/"; }'
headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701js_')
headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701js_', use_js_proxy=False)
assert ('Content-Type', 'text/javascript') in headers.headers
exp = 'function() { WB_wombat_location.href = "http://example.com/"; }'
assert b''.join(gen).decode('utf-8') == exp
result = b''.join(gen).decode('utf-8')
assert exp == result
def test_rewrite_js_mod_with_obj_proxy(self, headers):
content = 'function() { location.href = "http://example.com/"; }'
headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701js_', use_js_proxy=True)
assert ('Content-Type', 'text/javascript') in headers.headers
exp = 'function() { location.href = "http://example.com/"; }'
result = b''.join(gen).decode('utf-8')
assert 'let window ' in result
assert exp in result
def test_rewrite_cs_mod(self, headers):
content = '.foo { background: url(http://localhost:8080/prefix/201701cs_/http://example.com/) }'
@ -136,7 +157,7 @@ class TestContentRewriter(object):
headers = {'Content-Type': 'application/x-javascript'}
content = 'function() { location.href = "http://example.com/"; }'
headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701js_')
headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701js_', use_js_proxy=False)
assert ('Content-Type', 'application/x-javascript') in headers.headers
@ -281,11 +302,14 @@ class TestContentRewriter(object):
content = '/**/ jQuery_ABC({"foo": "bar"});'
headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701js_',
url='http://example.com/path/file')
url='http://example.com/path/file',
use_js_proxy=True)
assert ('Content-Type', 'text/javascript') in headers.headers
assert b''.join(gen).decode('utf-8') == content
result = b''.join(gen).decode('utf-8')
assert 'let window' in result
assert content in result
def test_rewrite_text_no_type(self):
headers = {}
@ -307,7 +331,9 @@ class TestContentRewriter(object):
assert headers.headers == [('Content-Type', 'text/javascript')]
assert b''.join(gen).decode('utf-8') == content
result = b''.join(gen).decode('utf-8')
assert 'let window ' in result
assert content in result
def test_custom_fuzzy_replace(self):
headers = {'Content-Type': 'application/octet-stream'}
@ -329,7 +355,7 @@ class TestContentRewriter(object):
content = '{"foo":"bar", "dash": {"on": "true"}, "some": ["list"]'
# is_live
rw_headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701mp_',
rw_headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701js_',
url='https://player.vimeo.com/video/123445/config/config?A=B',
is_live='1')
@ -342,6 +368,21 @@ class TestContentRewriter(object):
assert b''.join(gen).decode('utf-8') == content
def test_custom_live_js_obj_proxy(self):
headers = {'Content-Type': 'text/javascript'}
content = '{"foo":"bar", "dash": {"on": "true"}, "some": ["list"], "hls": {"A": "B"}'
# is_live
rw_headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701js_',
url='https://player.vimeo.com/video/123445/config/config?A=B',
is_live='1',
use_js_proxy=True)
# rewritten
rw_content = '{"foo":"bar", "__dash": {"on": "true"}, "some": ["list"], "__hls": {"A": "B"}'
assert rw_content in b''.join(gen).decode('utf-8')
def test_custom_ajax_rewrite(self):
headers = {'Content-Type': 'application/json',
'X-Pywb-Requested-With': 'XMLHttpRequest'}
@ -451,6 +492,26 @@ http://example.com/video_4.m3u8
</MPD>"""
assert b''.join(gen).decode('utf-8') == filtered
def test_dash_fb_in_js(self):
headers = {'Content-Type': 'text/javascript'}
with open(os.path.join(get_test_dir(), 'text_content', 'sample_dash.mpd'), 'rt') as fh:
content = 'dash_manifest:"' + fh.read().encode('unicode-escape').decode('utf-8')
rep_ids = r'\n",dash_prefetched_representation_ids:["4","5"]'
content += rep_ids
headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701js_',
url='http://facebook.com/example/dash/manifest.mpd')
assert headers.headers == [('Content-Type', 'text/javascript')]
result = b''.join(gen).decode('utf-8')
# 4, 5 representations removed, replaced with default 1, 7
assert 'dash_prefetched_representation_ids:["1", "7"]' in result
assert rep_ids not in result
def test_dash_custom_max_resolution(self):
headers = {'Content-Type': 'application/dash+xml'}
with open(os.path.join(get_test_dir(), 'text_content', 'sample_dash.mpd'), 'rt') as fh:
@ -533,5 +594,3 @@ http://example.com/video_4.m3u8
assert b''.join(gen).decode('utf-8') == filtered

View File

@ -3,7 +3,7 @@ r"""
# Custom Regex
#=================================================================
# Test https->http converter (other tests below in subclasses)
>>> RegexRewriter(urlrewriter, [(RegexRewriter.HTTPX_MATCH_STR, RegexRewriter.remove_https, 0)]).rewrite('a = https://example.com; b = http://example.com; c = https://some-url/path/https://embedded.example.com')
>>> RegexRewriter(urlrewriter, [(RxRules.HTTPX_MATCH_STR, RxRules.remove_https, 0)]).rewrite('a = https://example.com; b = http://example.com; c = https://some-url/path/https://embedded.example.com')
'a = http://example.com; b = http://example.com; c = http://some-url/path/http://embedded.example.com'
@ -101,7 +101,7 @@ r"""
'"/web/20131010/\\\\/\\\\/example.com/"'
# custom rules added
>>> _test_js('window.location = "http://example.com/abc.html"; some_func(); ', [('some_func\(\).*', RegexRewriter.format('/*{0}*/'), 0)])
>>> _test_js('window.location = "http://example.com/abc.html"; some_func(); ', [('some_func\(\).*', RxRules.format('/*{0}*/'), 0)])
'window.WB_wombat_location = "/web/20131010/http://example.com/abc.html"; /*some_func(); */'
# scheme-agnostic
@ -274,7 +274,7 @@ r"""
#=================================================================
from pywb.rewrite.url_rewriter import UrlRewriter
from pywb.rewrite.regex_rewriters import RegexRewriter, JSRewriter, CSSRewriter, XMLRewriter
from pywb.rewrite.regex_rewriters import RegexRewriter, JSRewriter, CSSRewriter, XMLRewriter, RxRules
from pywb.rewrite.regex_rewriters import JSWombatProxyRewriter