1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-14 15:53:28 +01:00
pywb/pywb/rewrite/default_rewriter.py
John Berlin 22b4297fc5 pywb:
- Fix: a few broken tests due to iana.org requiring a user agent in its requests
rewrite:
  - introduced a new JSWorkerRewriter class in order to support rewriting via wombat workers in the context of all supported worker variants via
  - ensured rewriter app correctly sets the static prefix
wombat:
 - add wombat as submodule!
2019-07-02 19:24:11 -07:00

147 lines
4.0 KiB
Python

from pywb.rewrite.content_rewriter import BaseContentRewriter
from pywb.rewrite.html_rewriter import HTMLRewriter
from pywb.rewrite.html_insert_rewriter import HTMLInsertOnlyRewriter
from pywb.rewrite.regex_rewriters import RegexRewriter, CSSRewriter, XMLRewriter
from pywb.rewrite.regex_rewriters import JSLocationOnlyRewriter, JSNoneRewriter, JSWombatProxyRewriter
from pywb.rewrite.header_rewriter import DefaultHeaderRewriter
from pywb.rewrite.cookie_rewriter import HostScopeCookieRewriter
from pywb.rewrite.jsonp_rewriter import JSONPRewriter
from pywb.rewrite.rewrite_dash import RewriteDASH
from pywb.rewrite.rewrite_hls import RewriteHLS
from pywb.rewrite.rewrite_amf import RewriteAMF
from pywb.rewrite.rewrite_js_workers import JSWorkerRewriter
from pywb import DEFAULT_RULES_FILE
import copy
from werkzeug.useragents import UserAgent
# ============================================================================
class DefaultRewriter(BaseContentRewriter):
DEFAULT_REWRITERS = {
'header': DefaultHeaderRewriter,
'cookie': HostScopeCookieRewriter,
'html': HTMLRewriter,
'html-banner-only': HTMLInsertOnlyRewriter,
'css': CSSRewriter,
'js': JSLocationOnlyRewriter,
'js-proxy': JSNoneRewriter,
'js-worker': JSWorkerRewriter,
'json': JSONPRewriter,
'xml': XMLRewriter,
'dash': RewriteDASH,
'hls': RewriteHLS,
'amf': RewriteAMF,
}
rewrite_types = {
# HTML
'text/html': 'guess-html',
'application/xhtml': 'html',
'application/xhtml+xml': 'html',
# CSS
'text/css': 'css',
# JS
'text/javascript': 'js',
'application/javascript': 'js',
'application/x-javascript': 'js',
# JSON
'application/json': 'json',
# HLS
'application/x-mpegURL': 'hls',
'application/vnd.apple.mpegurl': 'hls',
# DASH
'application/dash+xml': 'dash',
# AMF
'application/x-amf': 'amf',
# XML -- don't rewrite xml
#'text/xml': 'xml',
#'application/xml': 'xml',
#'application/rss+xml': 'xml',
# PLAIN
'text/plain': 'guess-text',
# DEFAULT or octet-stream
'': 'guess-text',
'application/octet-stream': 'guess-bin'
}
default_content_types = {
'html': 'text/html',
'css': 'text/css',
'js': 'text/javascript'
}
def __init__(self, replay_mod='', config=None):
config = config or {}
rules_file = config.get('rules_file', DEFAULT_RULES_FILE)
super(DefaultRewriter, self).__init__(rules_file, replay_mod)
self.all_rewriters = copy.copy(self.DEFAULT_REWRITERS)
def init_js_regex(self, regexs):
return RegexRewriter.parse_rules_from_config(regexs)
def get_rewrite_types(self):
return self.rewrite_types
# ============================================================================
class RewriterWithJSProxy(DefaultRewriter):
def __init__(self, *args, **kwargs):
super(RewriterWithJSProxy, self).__init__(*args, **kwargs)
def get_rewriter(self, rw_type, rwinfo=None):
if rw_type == 'js' and rwinfo:
# check if UA allows this
if self.ua_allows_obj_proxy(rwinfo.url_rewriter.rewrite_opts):
return JSWombatProxyRewriter
# otherwise, return default rewriter
return super(RewriterWithJSProxy, self).get_rewriter(rw_type, rwinfo)
def ua_allows_obj_proxy(self, opts):
ua = opts.get('ua')
if not ua:
ua_string = opts.get('ua_string')
if ua_string:
ua = UserAgent(ua_string)
if ua is None:
return True
supported = {
'chrome': '49.0',
'firefox': '44.0',
'safari': '10.0',
'opera': '36.0',
'edge': '12.0',
'msie': None,
}
min_vers = supported.get(ua.browser)
return (min_vers and ua.version >= min_vers)