1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-04-01 19:51:28 +02:00
pywb/pywb/rewrite/rewriterules.py
Ilya Kreymer 181c18a1b8 pep8 pass: fix spacing, line length, issues
also remove references to obsolete cached_replay, hostnames in pywb_init
2014-12-23 15:14:03 -08:00

65 lines
2.4 KiB
Python

from pywb.utils.dsrules import BaseRule
from regex_rewriters import RegexRewriter, CSSRewriter, XMLRewriter
from regex_rewriters import JSLinkAndLocationRewriter, JSLinkOnlyRewriter
from regex_rewriters import JSLocationOnlyRewriter
from header_rewriter import HeaderRewriter
from html_rewriter import HTMLRewriter
import itertools
#=================================================================
class RewriteRules(BaseRule):
def __init__(self, url_prefix, config={}):
super(RewriteRules, self).__init__(url_prefix, config)
self.rewriters = {}
#self._script_head_inserts = config.get('script_head_inserts', {})
self.rewriters['header'] = config.get('header_class', HeaderRewriter)
self.rewriters['css'] = config.get('css_class', CSSRewriter)
self.rewriters['xml'] = config.get('xml_class', XMLRewriter)
self.rewriters['html'] = config.get('html_class', HTMLRewriter)
self.rewriters['json'] = config.get('json_class', JSLinkOnlyRewriter)
self.parse_comments = config.get('parse_comments', False)
# Custom handling for js rewriting, often the most complex
self.js_rewrite_location = config.get('js_rewrite_location', 'all')
# ability to toggle rewriting
if self.js_rewrite_location == 'all':
js_default_class = JSLinkAndLocationRewriter
elif self.js_rewrite_location == 'location':
js_default_class = JSLocationOnlyRewriter
else:
js_default_class = JSLinkOnlyRewriter
# set js class, using either default or override from config
self.rewriters['js'] = config.get('js_class', js_default_class)
# add any regexs for js rewriter
self._add_custom_regexs('js', config)
# cookie rewrite scope
self.cookie_scope = config.get('cookie_scope', 'default')
def _add_custom_regexs(self, field, config):
regexs = config.get(field + '_regexs')
if not regexs:
return
rewriter_cls = self.rewriters[field]
#rule_def_tuples = RegexRewriter.parse_rules_from_config(regexs)
parse_rules_func = RegexRewriter.parse_rules_from_config(regexs)
def extend_rewriter_with_regex(urlrewriter):
rule_def_tuples = parse_rules_func(urlrewriter)
return rewriter_cls(urlrewriter, rule_def_tuples)
self.rewriters[field] = extend_rewriter_with_regex