1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-25 23:47:47 +01:00
pywb/pywb/rewrite/rewriterules.py
2014-03-16 23:12:04 -07:00

58 lines
2.1 KiB
Python

from pywb.utils.dsrules import BaseRule
from regex_rewriters import RegexRewriter, CSSRewriter, XMLRewriter
from regex_rewriters import JSLinkAndLocationRewriter, JSLinkOnlyRewriter
from html_rewriter import HTMLRewriter
from lxml_parser import LXMLHTMLRewriter
from header_rewriter import HeaderRewriter
import itertools
#=================================================================
class RewriteRules(BaseRule):
def __init__(self, url_prefix, config={}):
super(RewriteRules, self).__init__(url_prefix, config)
self.rewriters = {}
#self._script_head_inserts = config.get('script_head_inserts', {})
self.rewriters['header'] = config.get('header_class', HeaderRewriter)
self.rewriters['css'] = config.get('css_class', CSSRewriter)
self.rewriters['xml'] = config.get('xml_class', XMLRewriter)
self.rewriters['html'] = config.get('html_class', LXMLHTMLRewriter)
#self.rewriters['html'] = config.get('html_class', HTMLRewriter)
# Custom handling for js rewriting, often the most complex
self.js_rewrite_location = config.get('js_rewrite_location', True)
self.js_rewrite_location = bool(self.js_rewrite_location)
# ability to toggle rewriting
if self.js_rewrite_location:
js_default_class = JSLinkAndLocationRewriter
else:
js_default_class = JSLinkOnlyRewriter
# set js class, using either default or override from config
self.rewriters['js'] = config.get('js_class', js_default_class)
# add any regexs for js rewriter
self._add_custom_regexs('js', config)
def _add_custom_regexs(self, field, config):
regexs = config.get(field + '_regexs')
if not regexs:
return
rewriter_cls = self.rewriters[field]
rule_def_tuples = RegexRewriter.parse_rules_from_config(regexs)
def extend_rewriter_with_regex(urlrewriter):
#import sys
#sys.stderr.write('\n\nEXTEND: ' + str(rule_def_tuples))
return rewriter_cls(urlrewriter, rule_def_tuples)
self.rewriters[field] = extend_rewriter_with_regex