RegexRewriter Optimization (#354)

* bump version to 2.0.5 * regexrewriter: work on splitting rules into separate class hierarchy from rewriter. rules logic and regexs can be inited once, while rewriter is per response being rewritten * regexrewriter: refactor remaining rewriters to use a shared rules factory to avoid reiniting rules * fix spacing * fixes: ensure custom rules added first, fix fb rewrite_dash content_rewriter tests: update tests to check with location-only and js obj proxy rewriter, check fb dash rewriter * simplify JSNoneRewriter
2025-03-24 06:59:52 +01:00 · 2018-08-05 16:40:19 -07:00 · 2018-08-05 16:40:19 -07:00 · 973a2dcff9
commit 973a2dcff9
parent 2f062cf5c7
4 changed files with 250 additions and 176 deletions
--- a/pywb/rewrite/regex_rewriters.py
+++ b/pywb/rewrite/regex_rewriters.py
@ -5,52 +5,118 @@ from six.moves.urllib.parse import unquote
 # =================================================================
-class RegexRewriter(StreamingRewriter):
+class RxRules(object):
-    # @staticmethod
+    HTTPX_MATCH_STR = r'https?:\\?/\\?/[A-Za-z0-9:_@.-]+'
    # def comment_out(string):
    #    return '/*' + string + '*/'
    @staticmethod
-    def format(template):
+    def remove_https(string, _):
        return lambda string: template.format(string)
    @staticmethod
    def fixed(string):
        return lambda _: string
    @staticmethod
    def remove_https(string):
        return string.replace("https", "http")
    @staticmethod
    def replace_str(replacer):
        return lambda x, _: x.replace('this', replacer)
    @staticmethod
    def format(template):
        return lambda string, _: template.format(string)
    @staticmethod
    def fixed(string):
        return lambda _, _2: string
    @staticmethod
    def archival_rewrite():
        return lambda string, rewriter: rewriter.rewrite(string)
    @staticmethod
    def add_prefix(prefix):
-        return lambda string: prefix + string
+        return lambda string, _: prefix + string
    @staticmethod
    def add_suffix(suffix):
-        return lambda string: string + suffix
+        return lambda string, _: string + suffix
    @staticmethod
-    def archival_rewrite(rewriter):
+    def compile_rules(rules):
        return lambda string: rewriter.rewrite(string)
    HTTPX_MATCH_STR = r'https?:\\?/\\?/[A-Za-z0-9:_@.-]+'
    # DEFAULT_OP = add_prefix
    def __init__(self, rewriter, rules):
        super(RegexRewriter, self).__init__(rewriter)
        # rules = self.create_rules(http_prefix)
        # Build regexstr, concatenating regex list
        regex_str = '|'.join(['(' + rx + ')' for rx, op, count in rules])
        # ensure it's not middle of a word, wrap in non-capture group
        regex_str = '(?<!\w)(?:' + regex_str + ')'
-        self.regex = re.compile(regex_str, re.M)
+        return re.compile(regex_str, re.M)
-        self.rules = rules
+
    def __init__(self, rules=None):
        self.rules = rules or []
        self.regex = self.compile_rules(self.rules)
    def __call__(self, extra_rules=None):
        if not extra_rules:
            return self.rules, self.regex
        all_rules = extra_rules + self.rules
        regex = self.compile_rules(all_rules)
        return all_rules, regex
 # =================================================================
 class JSWombatProxyRules(RxRules):
    def __init__(self):
        local_init_func = '\nvar {0} = function(name) {{\
 return (self._wb_wombat && self._wb_wombat.local_init &&\
 self._wb_wombat.local_init(name)) || self[name]; }};\n\
 if (!self.__WB_pmw) {{ self.__WB_pmw = function(obj) {{ return obj; }} }}\n\
 {{\n'
        local_init_func_name = '_____WB$wombat$assign$function_____'
        local_var_line = 'let {0} = {1}("{0}");'
        this_rw = '(this && this._WB_wombat_obj_proxy || this)'
        check_loc = '(self.__WB_check_loc && self.__WB_check_loc(location) || {}).href = '
        self.local_objs = [
                      'window',
                      'self',
                      'document',
                      'location',
                      'top',
                      'parent',
                      'frames',
                      'opener']
        local_declares = '\n'.join([local_var_line.format(obj, local_init_func_name) for obj in self.local_objs])
        prop_str = '|'.join(self.local_objs)
        rules = [
           (r'(?<=\.)postMessage\b\(', self.add_prefix('__WB_pmw(self).'), 0),
           (r'(?<!\.)\blocation\b\s*[=]\s*(?![=])', self.add_suffix(check_loc), 0),
           (r'\breturn\s+this\b\s*(?![.$])', self.replace_str(this_rw), 0),
           (r'(?<=[\n])\s*this\b(?=(?:\.(?:{0})\b))'.format(prop_str), self.replace_str(';' + this_rw), 0),
           (r'(?<![$.])\s*this\b(?=(?:\.(?:{0})\b))'.format(prop_str), self.replace_str(this_rw), 0),
           (r'(?<=[=])\s*this\b\s*(?![.$])', self.replace_str(this_rw), 0),
           ('\}(?:\s*\))?\s*\(this\)', self.replace_str(this_rw), 0),
           (r'(?<=[^|&][|&]{2})\s*this\b\s*(?![|&.$]([^|&]|$))', self.replace_str(this_rw), 0),
        ]
        super(JSWombatProxyRules, self).__init__(rules)
        self.first_buff = local_init_func.format(local_init_func_name) + local_declares
        self.last_buff = '\n\n}'
 # =================================================================
 class RegexRewriter(StreamingRewriter):
    rules_factory = RxRules()
    def __init__(self, rewriter, extra_rules=None, first_buff=''):
        super(RegexRewriter, self).__init__(rewriter, first_buff=first_buff)
        # rules = self.create_rules(http_prefix)
        self.rules, self.regex = self.rules_factory(extra_rules)
    def filter(self, m):
        return True
@ -79,7 +145,7 @@ class RegexRewriter(StreamingRewriter):
            # if not hasattr(op, '__call__'):
            #    op = RegexRewriter.DEFAULT_OP(op)
-            result = op(m.group(i))
+            result = op(m.group(i), self.url_rewriter)
            final_str = result
            # if extracting partial match
@ -96,11 +162,11 @@ class RegexRewriter(StreamingRewriter):
            def parse_rule(obj):
                match = obj.get('match')
                if 'rewrite' in obj:
-                    replace = RegexRewriter.archival_rewrite(rewriter)
+                    replace = RxRules.archival_rewrite()
                elif 'function' in obj:
                    replace = load_py_name(obj['function'])
                else:
-                    replace = RegexRewriter.format(obj.get('replace', '{0}'))
+                    replace = RxRules.format(obj.get('replace', '{0}'))
                group = obj.get('group', 0)
                result = (match, replace, group)
                return result
@ -111,9 +177,32 @@ class RegexRewriter(StreamingRewriter):
 # =================================================================
-class JSLinkRewriterMixin(object):
+class JSLocationRewriterRules(RxRules):
    """
-    JS Rewriter which rewrites absolute http://, https:// and // urls
+    JS Rewriter mixin which rewrites location and domain to the
    specified prefix (default: ``WB_wombat_``)
    """
    def __init__(self, prefix='WB_wombat_'):
        super(JSLocationRewriterRules, self).__init__(self.get_rules(prefix))
    def get_rules(self, prefix):
        rules = [
            (r'(?<![$\'"])\b(?:location|top)\b(?![$\'":])', self.add_prefix(prefix), 0),
            (r'(?<=[?])\s*(?:\w+[.])?(location)\s*(?=[:])', self.add_prefix(prefix), 1),
            (r'(?<=\.)postMessage\b\(', self.add_prefix('__WB_pmw(self.window).'), 0),
            (r'(?<=\.)frameElement\b', self.add_prefix(prefix), 0),
        ]
        return rules
 # =================================================================
 class JSLinkAndLocationRewriterRules(JSLocationRewriterRules):
    """
    JS Rewriter rules which also rewrite absolute http://, https:// and // urls
    at the beginning of a string
    """
    # JS_HTTPX = r'(?:(?:(?<=["\';])https?:)|(?<=["\']))\\{0,4}/\\{0,4}/[A-Za-z0-9:_@.-]+.*(?=["\s\';&\\])'
@ -122,94 +211,44 @@ class JSLinkRewriterMixin(object):
    # JS_HTTPX = r'(?:(?<=["\';])https?:|(?<=["\']))\\{0,4}/\\{0,4}/[A-Za-z0-9:_@.-][^"\s\';&\\]*(?=["\';&\\])'
    JS_HTTPX = r'(?:(?<=["\';])https?:|(?<=["\']))\\{0,4}/\\{0,4}/[A-Za-z0-9:_@%.\\-]+/'
-    def __init__(self, rewriter, rules=[]):
+    def get_rules(self, prefix):
-        rules = rules + [
+        rules = super(JSLinkAndLocationRewriterRules, self).get_rules(prefix)
-            (self.JS_HTTPX, RegexRewriter.archival_rewrite(rewriter), 0)
+        rules.append((self.JS_HTTPX, RxRules.archival_rewrite(), 0))
-        ]
+        return rules
        super(JSLinkRewriterMixin, self).__init__(rewriter, rules)
 # =================================================================
-class JSLocationRewriterMixin(object):
+class JSLocationOnlyRewriter(RegexRewriter):
-    """
+    rules_factory = JSLocationRewriterRules()
    JS Rewriter mixin which rewrites location and domain to the
    specified prefix (default: ``WB_wombat_``)
    """
    def __init__(self, rewriter, rules=[], prefix='WB_wombat_'):
        rules = rules + [
            (r'(?<![$\'"])\b(?:location|top)\b(?![$\'":])', RegexRewriter.add_prefix(prefix), 0),
            (r'(?<=[?])\s*(?:\w+[.])?(location)\s*(?=[:])', RegexRewriter.add_prefix(prefix), 1),
            (r'(?<=\.)postMessage\b\(', RegexRewriter.add_prefix('__WB_pmw(self.window).'), 0),
            (r'(?<=\.)frameElement\b', RegexRewriter.add_prefix(prefix), 0),
        ]
        super(JSLocationRewriterMixin, self).__init__(rewriter, rules)
 # =================================================================
-class JSWombatProxyRewriterMixin(object):
+class JSLinkAndLocationRewriter(RegexRewriter):
    rules_factory = JSLinkAndLocationRewriterRules()
 JSRewriter = JSLinkAndLocationRewriter
 # =================================================================
 class JSWombatProxyRewriter(RegexRewriter):
    """
    JS Rewriter mixin which wraps the contents of the
    script in an anonymous block scope and inserts
    Wombat js-proxy setup
    """
-    local_init_func = '\nvar {0} = function(name) {{\
+    rules_factory = JSWombatProxyRules()
 return (self._wb_wombat && self._wb_wombat.local_init &&\
 self._wb_wombat.local_init(name)) || self[name]; }};\n\
 if (!self.__WB_pmw) {{ self.__WB_pmw = function(obj) {{ return obj; }} }}\n\
 {{\n'
-    local_init_func_name = '_____WB$wombat$assign$function_____'
+    def __init__(self, rewriter, extra_rules=None):
        super(JSWombatProxyRewriter, self).__init__(rewriter, extra_rules=extra_rules)
-    local_var_line = 'let {0} = {1}("{0}");'
+        self.first_buff = self.rules_factory.first_buff
-
+        self.last_buff = self.rules_factory.last_buff
-    local_objs = ['window',
+        self.local_objs = self.rules_factory.local_objs
                  'self',
                  'document',
                  'location',
                  'top',
                  'parent',
                  'frames',
                  'opener']
    THIS_RW = '(this && this._WB_wombat_obj_proxy || this)'
    CHECK_LOC = '(self.__WB_check_loc && self.__WB_check_loc(location) || {}).href = '
    @classmethod
    def replace_str(cls, replacer):
        return lambda x: x.replace('this', replacer)
    def __init__(self, rewriter, rules=[]):
        #func_rw = 'Function("return {0}")'.format(self.THIS_RW)
        prop_str = '|'.join(self.local_objs)
        rules = rules + [
           (r'(?<=\.)postMessage\b\(', self.add_prefix('__WB_pmw(self).'), 0),
           (r'(?<!\.)\blocation\b\s*[=]\s*(?![=])', self.add_suffix(self.CHECK_LOC), 0),
           (r'\breturn\s+this\b\s*(?![.$])', self.replace_str(self.THIS_RW), 0),
           (r'(?<=[\n])\s*this\b(?=(?:\.(?:{0})\b))'.format(prop_str), self.replace_str(';' + self.THIS_RW), 0),
           (r'(?<![$.])\s*this\b(?=(?:\.(?:{0})\b))'.format(prop_str), self.replace_str(self.THIS_RW), 0),
           (r'(?<=[=])\s*this\b\s*(?![.$])', self.replace_str(self.THIS_RW), 0),
           ('\}(?:\s*\))?\s*\(this\)', self.replace_str(self.THIS_RW), 0),
           (r'(?<=[^|&][|&]{2})\s*this\b\s*(?![|&.$]([^|&]|$))', self.replace_str(self.THIS_RW), 0),
        ]
        super(JSWombatProxyRewriterMixin, self).__init__(rewriter, rules)
        local_declares = '\n'.join([self.local_var_line.format(obj, self.local_init_func_name) for obj in self.local_objs])
        self.first_buff = self.local_init_func.format(self.local_init_func_name) + local_declares
        self.last_buff = '\n\n}'
    def rewrite_complete(self, string, **kwargs):
        if not kwargs.get('inline_attr'):
-            return super(JSWombatProxyRewriterMixin, self).rewrite_complete(string)
+            return super(JSWombatProxyRewriter, self).rewrite_complete(string)
        # check if any of the wrapped objects are used in the script
        # if not, don't rewrite
@ -231,31 +270,8 @@ if (!self.__WB_pmw) {{ self.__WB_pmw = function(obj) {{ return obj; }} }}\n\
        return self.last_buff
 # =================================================================
 class JSLocationOnlyRewriter(JSLocationRewriterMixin, RegexRewriter):
    pass
 # =================================================================
 class JSLinkOnlyRewriter(JSLinkRewriterMixin, RegexRewriter):
    pass
 # =================================================================
 class JSLinkAndLocationRewriter(JSLocationRewriterMixin,
                                JSLinkRewriterMixin,
                                RegexRewriter):
    pass
 # =================================================================
 class JSNoneRewriter(RegexRewriter):
    def __init__(self, rewriter, rules=[]):
        super(JSNoneRewriter, self).__init__(rewriter, rules)
 # =================================================================
 class JSWombatProxyRewriter(JSWombatProxyRewriterMixin, RegexRewriter):
    pass
@ -287,16 +303,40 @@ class JSReplaceFuzzy(object):
 # =================================================================
-# Set 'default' JSRewriter
+class CSSRules(RxRules):
-JSRewriter = JSLinkAndLocationRewriter
+    CSS_URL_REGEX = "url\\s*\\(\\s*(?:[\\\\\"']|(?:&.{1,4};))*\\s*([^)'\"]+)\\s*(?:[\\\\\"']|(?:&.{1,4};))*\\s*\\)"
    CSS_IMPORT_NO_URL_REGEX = ("@import\\s+(?!url)\\(?\\s*['\"]?" +
                               "(?!url[\\s\\(])([\w.:/\\\\-]+)")
    def __init__(self):
        rules = [
            (self.CSS_URL_REGEX, self.archival_rewrite(), 1),
            (self.CSS_IMPORT_NO_URL_REGEX, self.archival_rewrite(), 1),
        ]
        super(CSSRules, self).__init__(rules)
 # =================================================================
 class CSSRewriter(RegexRewriter):
    rules_factory = CSSRules()
 # =================================================================
 class XMLRules(RxRules):
    def __init__(self):
        rules = [
            ('([A-Za-z:]+[\s=]+)?["\'\s]*(' +
             self.HTTPX_MATCH_STR + ')',
             self.archival_rewrite(), 2),
        ]
        super(XMLRules, self).__init__(rules)
 # =================================================================
 class XMLRewriter(RegexRewriter):
-    def __init__(self, rewriter, extra=[]):
+    rules_factory = XMLRules()
        rules = self._create_rules(rewriter)
        super(XMLRewriter, self).__init__(rewriter, rules)
    # custom filter to reject 'xmlns' attr
    def filter(self, m):
@ -306,30 +346,5 @@ class XMLRewriter(RegexRewriter):
        return True
    def _create_rules(self, rewriter):
        return [
            ('([A-Za-z:]+[\s=]+)?["\'\s]*(' +
             RegexRewriter.HTTPX_MATCH_STR + ')',
             RegexRewriter.archival_rewrite(rewriter), 2),
        ]
 # =================================================================
 class CSSRewriter(RegexRewriter):
    CSS_URL_REGEX = "url\\s*\\(\\s*(?:[\\\\\"']|(?:&.{1,4};))*\\s*([^)'\"]+)\\s*(?:[\\\\\"']|(?:&.{1,4};))*\\s*\\)"
    CSS_IMPORT_NO_URL_REGEX = ("@import\\s+(?!url)\\(?\\s*['\"]?" +
                               "(?!url[\\s\\(])([\w.:/\\\\-]+)")
    def __init__(self, rewriter):
        rules = self._create_rules(rewriter)
        super(CSSRewriter, self).__init__(rewriter, rules)
    def _create_rules(self, rewriter):
        return [
            (CSSRewriter.CSS_URL_REGEX,
             RegexRewriter.archival_rewrite(rewriter), 1),
            (CSSRewriter.CSS_IMPORT_NO_URL_REGEX,
             RegexRewriter.archival_rewrite(rewriter), 1),
        ]
--- a/pywb/rewrite/rewrite_dash.py
+++ b/pywb/rewrite/rewrite_dash.py
@ -58,7 +58,7 @@ class RewriteDASH(BufferedRewriter):
 # ============================================================================
-def rewrite_fb_dash(string):
+def rewrite_fb_dash(string, *args):
    DASH_SPLIT = r'\n",dash_prefetched_representation_ids:'
    inx = string.find(DASH_SPLIT)
    if inx < 0:
--- a/pywb/rewrite/test/test_content_rewriter.py
+++ b/pywb/rewrite/test/test_content_rewriter.py
@ -10,7 +10,7 @@ from pywb.utils.io import chunk_encode_iter
 from pywb.rewrite.wburl import WbUrl
 from pywb.rewrite.url_rewriter import UrlRewriter
-from pywb.rewrite.default_rewriter import DefaultRewriter
+from pywb.rewrite.default_rewriter import DefaultRewriter, RewriterWithJSProxy
 from pywb import get_test_dir
@ -35,6 +35,7 @@ class TestContentRewriter(object):
    @classmethod
    def setup_class(self):
        self.content_rewriter = DefaultRewriter()
        self.js_proxy_content_rewriter = RewriterWithJSProxy()
    def _create_response_record(self, url, headers, payload, warc_headers):
        writer = BufferWARCWriter()
@ -53,7 +54,7 @@ class TestContentRewriter(object):
    def rewrite_record(self, headers, content, ts, url='http://example.com/',
                       prefix='http://localhost:8080/prefix/', warc_headers=None,
-                       request_url=None, is_live=None):
+                       request_url=None, is_live=None, use_js_proxy=True):
        record = self._create_response_record(url, headers, content, warc_headers)
@ -68,7 +69,10 @@ class TestContentRewriter(object):
            cdx['is_fuzzy'] = '1'
        cdx['is_live'] = is_live
-        return self.content_rewriter(record, url_rewriter, None, cdx=cdx)
+        if use_js_proxy:
            return self.js_proxy_content_rewriter(record, url_rewriter, None, cdx=cdx)
        else:
            return self.content_rewriter(record, url_rewriter, None, cdx=cdx)
    def test_rewrite_html(self, headers):
        content = '<html><body><a href="http://example.com/"></a></body></html>'
@ -109,17 +113,34 @@ class TestContentRewriter(object):
        assert ('Content-Type', 'text/html') in headers.headers
        exp = '<html><body><a href="http://localhost:8080/prefix/201701/http://example.com/"></a></body></html>'
-        assert b''.join(gen).decode('utf-8') == exp
+
        result = b''.join(gen).decode('utf-8')
        assert exp == result
    def test_rewrite_js_mod(self, headers):
        content = 'function() { location.href = "http://example.com/"; }'
-        headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701js_')
+        headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701js_', use_js_proxy=False)
        assert ('Content-Type', 'text/javascript') in headers.headers
        exp = 'function() { WB_wombat_location.href = "http://example.com/"; }'
-        assert b''.join(gen).decode('utf-8') == exp
+        result = b''.join(gen).decode('utf-8')
        assert exp == result
    def test_rewrite_js_mod_with_obj_proxy(self, headers):
        content = 'function() { location.href = "http://example.com/"; }'
        headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701js_', use_js_proxy=True)
        assert ('Content-Type', 'text/javascript') in headers.headers
        exp = 'function() { location.href = "http://example.com/"; }'
        result = b''.join(gen).decode('utf-8')
        assert 'let window ' in result
        assert exp in result
    def test_rewrite_cs_mod(self, headers):
        content = '.foo { background: url(http://localhost:8080/prefix/201701cs_/http://example.com/) }'
@ -136,7 +157,7 @@ class TestContentRewriter(object):
        headers = {'Content-Type': 'application/x-javascript'}
        content = 'function() { location.href = "http://example.com/"; }'
-        headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701js_')
+        headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701js_', use_js_proxy=False)
        assert ('Content-Type', 'application/x-javascript') in headers.headers
@ -281,11 +302,14 @@ class TestContentRewriter(object):
        content = '/**/ jQuery_ABC({"foo": "bar"});'
        headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701js_',
-                                                  url='http://example.com/path/file')
+                                                  url='http://example.com/path/file',
                                                  use_js_proxy=True)
        assert ('Content-Type', 'text/javascript') in headers.headers
-        assert b''.join(gen).decode('utf-8') == content
+        result = b''.join(gen).decode('utf-8')
        assert 'let window' in result
        assert content in result
    def test_rewrite_text_no_type(self):
        headers = {}
@ -307,7 +331,9 @@ class TestContentRewriter(object):
        assert headers.headers == [('Content-Type', 'text/javascript')]
-        assert b''.join(gen).decode('utf-8') == content
+        result = b''.join(gen).decode('utf-8')
        assert 'let window ' in result
        assert content in result
    def test_custom_fuzzy_replace(self):
        headers = {'Content-Type': 'application/octet-stream'}
@ -329,7 +355,7 @@ class TestContentRewriter(object):
        content = '{"foo":"bar", "dash": {"on": "true"}, "some": ["list"]'
        # is_live
-        rw_headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701mp_',
+        rw_headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701js_',
                                                  url='https://player.vimeo.com/video/123445/config/config?A=B',
                                                  is_live='1')
@ -342,6 +368,21 @@ class TestContentRewriter(object):
        assert b''.join(gen).decode('utf-8') == content
    def test_custom_live_js_obj_proxy(self):
        headers = {'Content-Type': 'text/javascript'}
        content = '{"foo":"bar", "dash": {"on": "true"}, "some": ["list"], "hls": {"A": "B"}'
        # is_live
        rw_headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701js_',
                                                  url='https://player.vimeo.com/video/123445/config/config?A=B',
                                                  is_live='1',
                                                  use_js_proxy=True)
        # rewritten
        rw_content = '{"foo":"bar", "__dash": {"on": "true"}, "some": ["list"], "__hls": {"A": "B"}'
        assert rw_content in b''.join(gen).decode('utf-8')
    def test_custom_ajax_rewrite(self):
        headers = {'Content-Type': 'application/json',
                   'X-Pywb-Requested-With': 'XMLHttpRequest'}
@ -451,6 +492,26 @@ http://example.com/video_4.m3u8
 </MPD>"""
        assert b''.join(gen).decode('utf-8') == filtered
    def test_dash_fb_in_js(self):
        headers = {'Content-Type': 'text/javascript'}
        with open(os.path.join(get_test_dir(), 'text_content', 'sample_dash.mpd'), 'rt') as fh:
            content = 'dash_manifest:"' + fh.read().encode('unicode-escape').decode('utf-8')
        rep_ids = r'\n",dash_prefetched_representation_ids:["4","5"]'
        content += rep_ids
        headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701js_',
                                                  url='http://facebook.com/example/dash/manifest.mpd')
        assert headers.headers == [('Content-Type', 'text/javascript')]
        result = b''.join(gen).decode('utf-8')
        # 4, 5 representations removed, replaced with default 1, 7
        assert 'dash_prefetched_representation_ids:["1", "7"]' in result
        assert rep_ids not in result
    def test_dash_custom_max_resolution(self):
        headers = {'Content-Type': 'application/dash+xml'}
        with open(os.path.join(get_test_dir(), 'text_content', 'sample_dash.mpd'), 'rt') as fh:
@ -533,5 +594,3 @@ http://example.com/video_4.m3u8
        assert b''.join(gen).decode('utf-8') == filtered
--- a/pywb/rewrite/test/test_regex_rewriters.py
+++ b/pywb/rewrite/test/test_regex_rewriters.py
@ -3,7 +3,7 @@ r"""
 # Custom Regex
 #=================================================================
 # Test https->http converter (other tests below in subclasses)
->>> RegexRewriter(urlrewriter, [(RegexRewriter.HTTPX_MATCH_STR, RegexRewriter.remove_https, 0)]).rewrite('a = https://example.com; b = http://example.com; c = https://some-url/path/https://embedded.example.com')
+>>> RegexRewriter(urlrewriter, [(RxRules.HTTPX_MATCH_STR, RxRules.remove_https, 0)]).rewrite('a = https://example.com; b = http://example.com; c = https://some-url/path/https://embedded.example.com')
 'a = http://example.com; b = http://example.com; c = http://some-url/path/http://embedded.example.com'
@ -101,7 +101,7 @@ r"""
 '"/web/20131010/\\\\/\\\\/example.com/"'
 # custom rules added
->>> _test_js('window.location = "http://example.com/abc.html"; some_func(); ', [('some_func\(\).*', RegexRewriter.format('/*{0}*/'), 0)])
+>>> _test_js('window.location = "http://example.com/abc.html"; some_func(); ', [('some_func\(\).*', RxRules.format('/*{0}*/'), 0)])
 'window.WB_wombat_location = "/web/20131010/http://example.com/abc.html"; /*some_func(); */'
 # scheme-agnostic
@ -274,7 +274,7 @@ r"""
 #=================================================================
 from pywb.rewrite.url_rewriter import UrlRewriter
-from pywb.rewrite.regex_rewriters import RegexRewriter, JSRewriter, CSSRewriter, XMLRewriter
+from pywb.rewrite.regex_rewriters import RegexRewriter, JSRewriter, CSSRewriter, XMLRewriter, RxRules
 from pywb.rewrite.regex_rewriters import JSWombatProxyRewriter