diff --git a/pywb/rewrite/regex_rewriters.py b/pywb/rewrite/regex_rewriters.py index 571cd196..69dbef32 100644 --- a/pywb/rewrite/regex_rewriters.py +++ b/pywb/rewrite/regex_rewriters.py @@ -5,52 +5,118 @@ from six.moves.urllib.parse import unquote # ================================================================= -class RegexRewriter(StreamingRewriter): - # @staticmethod - # def comment_out(string): - # return '/*' + string + '*/' +class RxRules(object): + HTTPX_MATCH_STR = r'https?:\\?/\\?/[A-Za-z0-9:_@.-]+' @staticmethod - def format(template): - return lambda string: template.format(string) - - @staticmethod - def fixed(string): - return lambda _: string - - @staticmethod - def remove_https(string): + def remove_https(string, _): return string.replace("https", "http") + @staticmethod + def replace_str(replacer): + return lambda x, _: x.replace('this', replacer) + + @staticmethod + def format(template): + return lambda string, _: template.format(string) + + @staticmethod + def fixed(string): + return lambda _, _2: string + + @staticmethod + def archival_rewrite(): + return lambda string, rewriter: rewriter.rewrite(string) + @staticmethod def add_prefix(prefix): - return lambda string: prefix + string + return lambda string, _: prefix + string @staticmethod def add_suffix(suffix): - return lambda string: string + suffix + return lambda string, _: string + suffix @staticmethod - def archival_rewrite(rewriter): - return lambda string: rewriter.rewrite(string) - - - HTTPX_MATCH_STR = r'https?:\\?/\\?/[A-Za-z0-9:_@.-]+' - - # DEFAULT_OP = add_prefix - - def __init__(self, rewriter, rules): - super(RegexRewriter, self).__init__(rewriter) - # rules = self.create_rules(http_prefix) - + def compile_rules(rules): # Build regexstr, concatenating regex list regex_str = '|'.join(['(' + rx + ')' for rx, op, count in rules]) # ensure it's not middle of a word, wrap in non-capture group regex_str = '(?