mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
new rewriting system!
- new header rewriter - new extensible content rewriter in urlrewrite.rewriter!
This commit is contained in:
parent
331320b17a
commit
db9d0ae41a
@ -1,4 +1,4 @@
|
|||||||
FROM python:3.5.2
|
FROM python:3.5.3
|
||||||
|
|
||||||
MAINTAINER Ilya Kreymer <ikreymer at gmail.com>
|
MAINTAINER Ilya Kreymer <ikreymer at gmail.com>
|
||||||
|
|
||||||
|
@ -88,8 +88,10 @@ class HTMLRewriterMixin(object):
|
|||||||
# ===========================
|
# ===========================
|
||||||
def __init__(self, url_rewriter,
|
def __init__(self, url_rewriter,
|
||||||
head_insert=None,
|
head_insert=None,
|
||||||
js_rewriter_class=JSRewriter,
|
js_rewriter_class=None,
|
||||||
css_rewriter_class=CSSRewriter,
|
js_rewriter=None,
|
||||||
|
css_rewriter=None,
|
||||||
|
css_rewriter_class=None,
|
||||||
url = '',
|
url = '',
|
||||||
defmod='',
|
defmod='',
|
||||||
parse_comments=False):
|
parse_comments=False):
|
||||||
@ -97,8 +99,19 @@ class HTMLRewriterMixin(object):
|
|||||||
self.url_rewriter = url_rewriter
|
self.url_rewriter = url_rewriter
|
||||||
self._wb_parse_context = None
|
self._wb_parse_context = None
|
||||||
|
|
||||||
self.js_rewriter = js_rewriter_class(url_rewriter)
|
if js_rewriter:
|
||||||
self.css_rewriter = css_rewriter_class(url_rewriter)
|
self.js_rewriter = js_rewriter
|
||||||
|
elif js_rewriter_class:
|
||||||
|
self.js_rewriter = js_rewriter_class(url_rewriter)
|
||||||
|
else:
|
||||||
|
self.js_rewriter = JSRewriter(url_rewriter)
|
||||||
|
|
||||||
|
if css_rewriter:
|
||||||
|
self.css_rewriter = css_rewriter
|
||||||
|
elif css_rewriter_class:
|
||||||
|
self.css_rewriter = css_rewriter_class(url_rewriter)
|
||||||
|
else:
|
||||||
|
self.css_rewriter = CSSRewriter(url_rewriter)
|
||||||
|
|
||||||
self.head_insert = head_insert
|
self.head_insert = head_insert
|
||||||
self.parse_comments = parse_comments
|
self.parse_comments = parse_comments
|
||||||
|
102
pywb/urlrewrite/header_rewriter.py
Normal file
102
pywb/urlrewrite/header_rewriter.py
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
from warcio.statusandheaders import StatusAndHeaders
|
||||||
|
from warcio.timeutils import datetime_to_http_date
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
|
|
||||||
|
#=============================================================================
|
||||||
|
class PrefixHeaderRewriter(object):
|
||||||
|
header_rules = {
|
||||||
|
'content-type': 'keep',
|
||||||
|
'content-disposition': 'keep',
|
||||||
|
'content-range': 'keep',
|
||||||
|
'accept-rangees': 'keep',
|
||||||
|
'www-authenticate': 'keep',
|
||||||
|
'proxy-authenticate': 'keep',
|
||||||
|
|
||||||
|
'location': 'url-rewrite',
|
||||||
|
'content-location': 'url-rewrite',
|
||||||
|
'content-base': 'url-rewrite',
|
||||||
|
|
||||||
|
'content-encoding': 'keep-if-no-content-rewrite',
|
||||||
|
'content-length': 'content-length',
|
||||||
|
|
||||||
|
'set-cookie': 'cookie',
|
||||||
|
'cookie': 'cookie',
|
||||||
|
}
|
||||||
|
|
||||||
|
default_rule = 'prefix'
|
||||||
|
|
||||||
|
def __init__(self, rwinfo, header_prefix='X-Archive-Orig-'):
|
||||||
|
self.header_prefix = header_prefix
|
||||||
|
self.rwinfo = rwinfo
|
||||||
|
self.http_headers = rwinfo.record.http_headers
|
||||||
|
|
||||||
|
def __call__(self):
|
||||||
|
new_headers_list = []
|
||||||
|
for name, value in self.http_headers.headers:
|
||||||
|
rule = self.header_rules.get(name.lower(), self.default_rule)
|
||||||
|
new_header = self.rewrite_header(name, value, rule)
|
||||||
|
if new_header:
|
||||||
|
if isinstance(new_header, list):
|
||||||
|
new_headers_list.extend(new_header)
|
||||||
|
else:
|
||||||
|
new_headers_list.append(new_header)
|
||||||
|
|
||||||
|
return StatusAndHeaders(self.http_headers.statusline,
|
||||||
|
headers=new_headers_list,
|
||||||
|
protocol=self.http_headers.protocol)
|
||||||
|
|
||||||
|
def rewrite_header(self, name, value, rule):
|
||||||
|
if rule == 'keep':
|
||||||
|
return (name, value)
|
||||||
|
|
||||||
|
elif rule == 'url-rewrite':
|
||||||
|
return (name, self.rwinfo.url_rewriter.rewrite(value))
|
||||||
|
|
||||||
|
elif rule == 'keep-if-no-content-rewrite':
|
||||||
|
if not self.rwinfo.is_content_rw():
|
||||||
|
return (name, value)
|
||||||
|
|
||||||
|
elif rule == 'content-length':
|
||||||
|
if value == '0':
|
||||||
|
return (name, value)
|
||||||
|
|
||||||
|
if not self.rwinfo.is_content_rw():
|
||||||
|
try:
|
||||||
|
if int(value) >= 0:
|
||||||
|
return (name, value)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
elif rule == 'cookie':
|
||||||
|
if self.rwinfo.cookie_rewriter:
|
||||||
|
return self.rwinfo.cookie_rewriter.rewrite(value)
|
||||||
|
else:
|
||||||
|
return (name, value)
|
||||||
|
|
||||||
|
# default 'prefix'
|
||||||
|
return (self.header_prefix + name, value)
|
||||||
|
|
||||||
|
def _add_cache_headers(self, new_headers, http_cache):
|
||||||
|
try:
|
||||||
|
age = int(http_cache)
|
||||||
|
except:
|
||||||
|
age = 0
|
||||||
|
|
||||||
|
if age <= 0:
|
||||||
|
new_headers.append(('Cache-Control', 'no-cache; no-store'))
|
||||||
|
else:
|
||||||
|
dt = datetime.utcnow()
|
||||||
|
dt = dt + timedelta(seconds=age)
|
||||||
|
new_headers.append(('Cache-Control', 'max-age=' + str(age)))
|
||||||
|
new_headers.append(('Expires', datetime_to_http_date(dt)))
|
||||||
|
|
||||||
|
|
||||||
|
#=============================================================================
|
||||||
|
class ProxyHeaderRewriter(PrefixHeaderRewriter):
|
||||||
|
header_rules = {
|
||||||
|
'transfer-encoding': 'prefix',
|
||||||
|
'connection': 'prefix',
|
||||||
|
}
|
||||||
|
|
||||||
|
default_rule = 'keep'
|
@ -89,6 +89,8 @@ class RewriteInputRequest(DirectWSGIInputRequest):
|
|||||||
return headers
|
return headers
|
||||||
|
|
||||||
def _req_cookie_rewrite(self, value):
|
def _req_cookie_rewrite(self, value):
|
||||||
|
return value
|
||||||
|
|
||||||
rule = self.rewriter.ruleset.get_first_match(self.urlkey)
|
rule = self.rewriter.ruleset.get_first_match(self.urlkey)
|
||||||
if not rule or not rule.req_cookie_rewrite:
|
if not rule or not rule.req_cookie_rewrite:
|
||||||
return value
|
return value
|
||||||
|
363
pywb/urlrewrite/rewriter.py
Normal file
363
pywb/urlrewrite/rewriter.py
Normal file
@ -0,0 +1,363 @@
|
|||||||
|
from warcio.utils import to_native_str
|
||||||
|
from warcio.bufferedreaders import BufferedReader
|
||||||
|
|
||||||
|
import webencodings
|
||||||
|
import re
|
||||||
|
|
||||||
|
from pywb.utils.loaders import load_yaml_config
|
||||||
|
|
||||||
|
from pywb.rewrite.html_rewriter import HTMLRewriter
|
||||||
|
|
||||||
|
from pywb.rewrite.regex_rewriters import RegexRewriter, CSSRewriter, XMLRewriter
|
||||||
|
from pywb.rewrite.regex_rewriters import JSLinkAndLocationRewriter, JSLinkOnlyRewriter
|
||||||
|
from pywb.rewrite.regex_rewriters import JSLocationOnlyRewriter, JSNoneRewriter
|
||||||
|
|
||||||
|
from pywb.rewrite.cookie_rewriter import ExactPathCookieRewriter
|
||||||
|
|
||||||
|
from pywb.urlrewrite.header_rewriter import PrefixHeaderRewriter, ProxyHeaderRewriter
|
||||||
|
|
||||||
|
from pywb.rewrite.jsonp_rewriter import JSONPRewriter
|
||||||
|
|
||||||
|
from pywb.webagg.utils import StreamIter, BUFF_SIZE
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
class Rewriter(object):
|
||||||
|
CHARSET_REGEX = re.compile(b'<meta[^>]*?[\s;"\']charset\s*=[\s"\']*([^\s"\'/>]*)')
|
||||||
|
|
||||||
|
all_rewriters = {
|
||||||
|
'header': PrefixHeaderRewriter,
|
||||||
|
'header-proxy': ProxyHeaderRewriter,
|
||||||
|
|
||||||
|
'html': HTMLRewriter,
|
||||||
|
|
||||||
|
'css': CSSRewriter,
|
||||||
|
|
||||||
|
'js': JSLocationOnlyRewriter,
|
||||||
|
'js-proxy': JSNoneRewriter,
|
||||||
|
|
||||||
|
'json': JSONPRewriter,
|
||||||
|
|
||||||
|
'xml': XMLRewriter,
|
||||||
|
}
|
||||||
|
|
||||||
|
rewrite_types = {
|
||||||
|
# HTML
|
||||||
|
'text/html': 'html',
|
||||||
|
'application/xhtml': 'html',
|
||||||
|
'application/xhtml+xml': 'html',
|
||||||
|
|
||||||
|
# CSS
|
||||||
|
'text/css': 'css',
|
||||||
|
|
||||||
|
# JS
|
||||||
|
'text/javascript': 'js',
|
||||||
|
'application/javascript': 'js',
|
||||||
|
'application/x-javascript': 'js',
|
||||||
|
|
||||||
|
# JSON
|
||||||
|
'application/json': 'json',
|
||||||
|
|
||||||
|
# HLS
|
||||||
|
'application/x-mpegURL': 'hls',
|
||||||
|
|
||||||
|
# DASH
|
||||||
|
'application/dash+xml': 'dash',
|
||||||
|
|
||||||
|
# XML
|
||||||
|
'text/xml': 'xml',
|
||||||
|
'application/xml': 'xml',
|
||||||
|
'application/rss+xml': 'xml',
|
||||||
|
|
||||||
|
# PLAIN
|
||||||
|
'text/plain': 'plain',
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self, rules_file, replay_mod=''):
|
||||||
|
self.rules = []
|
||||||
|
self.load_rules(rules_file)
|
||||||
|
self.replay_mod = replay_mod
|
||||||
|
#for rw in self.known_rewriters:
|
||||||
|
# self.all_rewriters[rw.name] = rw
|
||||||
|
|
||||||
|
def add_rewriter(self, rw):
|
||||||
|
self.all_rewriters[rw.name] = rw
|
||||||
|
|
||||||
|
def get_rewriter(self, url, text_type):
|
||||||
|
return self.all_rewriters.get(text_type)
|
||||||
|
|
||||||
|
def load_rules(self, filename):
|
||||||
|
config = load_yaml_config(filename)
|
||||||
|
for rule in config.get('rules'):
|
||||||
|
rule = self.parse_rewrite_rule(rule)
|
||||||
|
if rule:
|
||||||
|
self.rules.append(rule)
|
||||||
|
|
||||||
|
def parse_rewrite_rule(self, config):
|
||||||
|
rw_config = config.get('rewrite')
|
||||||
|
if not rw_config:
|
||||||
|
return
|
||||||
|
|
||||||
|
rule = rw_config
|
||||||
|
url_prefix = config.get('url_prefix')
|
||||||
|
if not isinstance(url_prefix, list):
|
||||||
|
url_prefix = [url_prefix]
|
||||||
|
|
||||||
|
rule['url_prefix'] = url_prefix
|
||||||
|
|
||||||
|
regexs = rule.get('js_regexs')
|
||||||
|
if regexs:
|
||||||
|
parse_rules_func = RegexRewriter.parse_rules_from_config(regexs)
|
||||||
|
rule['js_regex_func'] = parse_rules_func
|
||||||
|
|
||||||
|
return rule
|
||||||
|
|
||||||
|
def get_rule(self, cdx):
|
||||||
|
urlkey = to_native_str(cdx['urlkey'])
|
||||||
|
|
||||||
|
for rule in self.rules:
|
||||||
|
if any((urlkey.startswith(prefix) for prefix in rule['url_prefix'])):
|
||||||
|
return rule
|
||||||
|
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def get_rw_class(self, rule, text_type, rwinfo):
|
||||||
|
if text_type == 'js' and not rwinfo.is_url_rw():
|
||||||
|
text_type = 'js-proxy'
|
||||||
|
|
||||||
|
rw_type = rule.get(text_type, text_type)
|
||||||
|
rw_class = self.all_rewriters.get(rw_type)
|
||||||
|
|
||||||
|
return rw_type, rw_class
|
||||||
|
|
||||||
|
def create_rewriter(self, text_type, rule, rwinfo, cdx, head_insert_func=None):
|
||||||
|
rw_type, rw_class = self.get_rw_class(rule, text_type, rwinfo)
|
||||||
|
|
||||||
|
if rw_type in ('js', 'js_proxy'):
|
||||||
|
extra_rules = []
|
||||||
|
if 'js_regex_func' in rule:
|
||||||
|
extra_rules = rule['js_regex_func'](rwinfo.url_rewriter)
|
||||||
|
|
||||||
|
return rw_class(rwinfo.url_rewriter, extra_rules)
|
||||||
|
|
||||||
|
elif rw_type != 'html':
|
||||||
|
return rw_class(rwinfo.url_rewriter)
|
||||||
|
|
||||||
|
# HTML Rewriter
|
||||||
|
head_insert_str = self.get_head_insert(rwinfo, rule, head_insert_func, cdx)
|
||||||
|
|
||||||
|
js_rewriter = self.create_rewriter('js', rule, rwinfo, cdx)
|
||||||
|
css_rewriter = self.create_rewriter('css', rule, rwinfo, cdx)
|
||||||
|
|
||||||
|
rw = rw_class(rwinfo.url_rewriter,
|
||||||
|
js_rewriter=js_rewriter,
|
||||||
|
css_rewriter=css_rewriter,
|
||||||
|
head_insert=head_insert_str,
|
||||||
|
url=cdx['url'],
|
||||||
|
defmod=self.replay_mod,
|
||||||
|
parse_comments=rule.get('parse_comments', False))
|
||||||
|
|
||||||
|
return rw
|
||||||
|
|
||||||
|
def get_head_insert(self, rwinfo, rule, head_insert_func, cdx):
|
||||||
|
head_insert_str = ''
|
||||||
|
charset = rwinfo.charset
|
||||||
|
|
||||||
|
# if no charset set, attempt to extract from first 1024
|
||||||
|
if not charset:
|
||||||
|
first_buff = rwinfo.read_and_keep(1024)
|
||||||
|
charset = self.extract_html_charset(first_buff)
|
||||||
|
|
||||||
|
if head_insert_func:
|
||||||
|
head_insert_orig = head_insert_func(rule, cdx)
|
||||||
|
|
||||||
|
if charset:
|
||||||
|
try:
|
||||||
|
head_insert_str = webencodings.encode(head_insert_orig, charset)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if not head_insert_str:
|
||||||
|
charset = 'utf-8'
|
||||||
|
head_insert_str = head_insert_orig.encode(charset)
|
||||||
|
|
||||||
|
head_insert_str = head_insert_str.decode('iso-8859-1')
|
||||||
|
|
||||||
|
return head_insert_str
|
||||||
|
|
||||||
|
def extract_html_charset(self, buff):
|
||||||
|
charset = None
|
||||||
|
m = self.CHARSET_REGEX.search(buff)
|
||||||
|
if m:
|
||||||
|
charset = m.group(1)
|
||||||
|
charset = to_native_str(charset)
|
||||||
|
|
||||||
|
return charset
|
||||||
|
|
||||||
|
def rewrite_headers(self, rwinfo):
|
||||||
|
if rwinfo.is_url_rw():
|
||||||
|
header_rw_name = 'header'
|
||||||
|
else:
|
||||||
|
header_rw_name = 'header-proxy'
|
||||||
|
|
||||||
|
header_rw_class = self.all_rewriters.get(header_rw_name)
|
||||||
|
rwinfo.rw_http_headers = header_rw_class(rwinfo)()
|
||||||
|
|
||||||
|
def __call__(self, record, url_rewriter, cookie_rewriter,
|
||||||
|
head_insert_func=None,
|
||||||
|
cdx=None):
|
||||||
|
|
||||||
|
rwinfo = RewriteInfo(record, self, url_rewriter, cookie_rewriter)
|
||||||
|
|
||||||
|
self.rewrite_headers(rwinfo)
|
||||||
|
|
||||||
|
content_rewriter = None
|
||||||
|
if rwinfo.is_content_rw():
|
||||||
|
rule = self.get_rule(cdx)
|
||||||
|
content_rewriter = self.create_rewriter(rwinfo.text_type, rule, rwinfo, cdx, head_insert_func)
|
||||||
|
|
||||||
|
if not content_rewriter:
|
||||||
|
return rwinfo.rw_http_headers, StreamIter(rwinfo.content_stream), False
|
||||||
|
|
||||||
|
#rwinfo.rw_http_headers.status_headers.remove_header('content-length')
|
||||||
|
|
||||||
|
# align to line end for all non-html rewriting
|
||||||
|
align = (rwinfo.text_type != 'html')
|
||||||
|
|
||||||
|
# Create rewriting generator
|
||||||
|
gen = self.rewrite_text_stream_to_gen(rwinfo.content_stream,
|
||||||
|
rewrite_func=content_rewriter.rewrite,
|
||||||
|
final_read_func=content_rewriter.close,
|
||||||
|
align_to_line=align)
|
||||||
|
|
||||||
|
return rwinfo.rw_http_headers, gen, True
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def rewrite_text_stream_to_gen(stream,
|
||||||
|
rewrite_func,
|
||||||
|
final_read_func,
|
||||||
|
align_to_line):
|
||||||
|
"""
|
||||||
|
Convert stream to generator using applying rewriting func
|
||||||
|
to each portion of the stream.
|
||||||
|
Align to line boundaries if needed.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
buff = ''
|
||||||
|
|
||||||
|
while True:
|
||||||
|
buff = stream.read(BUFF_SIZE)
|
||||||
|
if not buff:
|
||||||
|
break
|
||||||
|
|
||||||
|
if align_to_line:
|
||||||
|
buff += stream.readline()
|
||||||
|
|
||||||
|
buff = rewrite_func(buff.decode('iso-8859-1'))
|
||||||
|
yield buff.encode('iso-8859-1')
|
||||||
|
|
||||||
|
# For adding a tail/handling final buffer
|
||||||
|
buff = final_read_func()
|
||||||
|
if buff:
|
||||||
|
yield buff.encode('iso-8859-1')
|
||||||
|
|
||||||
|
finally:
|
||||||
|
stream.close()
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
class RewriteInfo(object):
|
||||||
|
TAG_REGEX = re.compile(b'^\s*\<')
|
||||||
|
|
||||||
|
def __init__(self, record, rewriter, url_rewriter, cookie_rewriter):
|
||||||
|
self.record = record
|
||||||
|
|
||||||
|
self.rw_http_headers = record.http_headers
|
||||||
|
self.content_stream = record.content_stream()
|
||||||
|
|
||||||
|
self.rewriter = rewriter
|
||||||
|
|
||||||
|
self.text_type = None
|
||||||
|
self.charset = None
|
||||||
|
|
||||||
|
self.url_rewriter = url_rewriter
|
||||||
|
|
||||||
|
if not cookie_rewriter:
|
||||||
|
cookie_rewriter = ExactPathCookieRewriter(url_rewriter)
|
||||||
|
|
||||||
|
self.cookie_rewriter = cookie_rewriter
|
||||||
|
|
||||||
|
self._fill_text_type_and_charset()
|
||||||
|
self._resolve_text_type()
|
||||||
|
|
||||||
|
def _fill_text_type_and_charset(self):
|
||||||
|
content_type = self.record.http_headers.get_header('Content-Type')
|
||||||
|
if not content_type:
|
||||||
|
return
|
||||||
|
|
||||||
|
parts = content_type.split(';', 1)
|
||||||
|
mime = parts[0]
|
||||||
|
|
||||||
|
self.text_type = self.rewriter.rewrite_types.get(mime)
|
||||||
|
if not self.text_type:
|
||||||
|
return
|
||||||
|
|
||||||
|
if len(parts) == 2:
|
||||||
|
parts = parts[1].lower().split('charset=', 1)
|
||||||
|
if len(parts) == 2:
|
||||||
|
self.charset = parts[1].strip()
|
||||||
|
|
||||||
|
def _resolve_text_type(self):
|
||||||
|
mod = self.url_rewriter.wburl.mod
|
||||||
|
|
||||||
|
if self.text_type == 'css' and mod == 'js_':
|
||||||
|
self.text_type = 'css'
|
||||||
|
|
||||||
|
# only attempt to resolve between html and other text types
|
||||||
|
if self.text_type != 'html':
|
||||||
|
return
|
||||||
|
|
||||||
|
if mod != 'js_' and mod != 'cs_':
|
||||||
|
return
|
||||||
|
|
||||||
|
buff = self.read_and_keep(128)
|
||||||
|
|
||||||
|
# check if starts with a tag, then likely html
|
||||||
|
if self.TAG_REGEX.match(buff):
|
||||||
|
self.text_type = 'html'
|
||||||
|
|
||||||
|
def read_and_keep(self, size):
|
||||||
|
buff = self.content_stream.read(size)
|
||||||
|
self.content_stream = BufferedReader(self.content_stream, starting_data=buff)
|
||||||
|
return buff
|
||||||
|
|
||||||
|
def is_content_rw(self):
|
||||||
|
if not self.url_rewriter.prefix:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if self.url_rewriter.wburl.mod == 'id_':
|
||||||
|
return False
|
||||||
|
|
||||||
|
if self.text_type == 'html':
|
||||||
|
if self.url_rewriter.rewrite_opts.get('is_ajax'):
|
||||||
|
return False
|
||||||
|
|
||||||
|
elif self.text_type == 'plain':
|
||||||
|
if self.url_rewriter.wburl.mod not in ('js_', 'cs_'):
|
||||||
|
return False
|
||||||
|
|
||||||
|
elif not self.text_type:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def is_url_rw(self):
|
||||||
|
if not self.url_rewriter:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if self.url_rewriter.wburl.mod == 'id_':
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
@ -1,8 +1,9 @@
|
|||||||
import requests
|
import requests
|
||||||
|
|
||||||
from pywb.rewrite.rewrite_amf import RewriteAMFMixin
|
#from pywb.rewrite.rewrite_amf import RewriteAMFMixin
|
||||||
from pywb.rewrite.rewrite_dash import RewriteDASHMixin
|
#from pywb.rewrite.rewrite_dash import RewriteDASHMixin
|
||||||
from pywb.rewrite.rewrite_content import RewriteContent
|
#from pywb.rewrite.rewrite_content import RewriteContent
|
||||||
|
from pywb.urlrewrite.rewriter import Rewriter
|
||||||
|
|
||||||
from pywb.rewrite.wburl import WbUrl
|
from pywb.rewrite.wburl import WbUrl
|
||||||
from pywb.rewrite.url_rewriter import UrlRewriter, SchemeOnlyUrlRewriter
|
from pywb.rewrite.url_rewriter import UrlRewriter, SchemeOnlyUrlRewriter
|
||||||
@ -44,8 +45,8 @@ class UpstreamException(WbException):
|
|||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
class Rewriter(RewriteDASHMixin, RewriteAMFMixin, RewriteContent):
|
#class Rewriter(RewriteDASHMixin, RewriteAMFMixin, RewriteContent):
|
||||||
pass
|
# pass
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
@ -67,9 +68,10 @@ class RewriterApp(object):
|
|||||||
self.frame_mod = None
|
self.frame_mod = None
|
||||||
self.replay_mod = ''
|
self.replay_mod = ''
|
||||||
|
|
||||||
frame_type = 'inverse' if framed_replay else False
|
#frame_type = 'inverse' if framed_replay else False
|
||||||
|
|
||||||
self.content_rewriter = Rewriter(is_framed_replay=frame_type)
|
#self.content_rewriter = Rewriter(is_framed_replay=frame_type)
|
||||||
|
self.content_rw = Rewriter('pkg://pywb/rules.yaml', self.replay_mod)
|
||||||
|
|
||||||
if not jinja_env:
|
if not jinja_env:
|
||||||
jinja_env = JinjaEnv(globals={'static_path': 'static'})
|
jinja_env = JinjaEnv(globals={'static_path': 'static'})
|
||||||
@ -149,7 +151,7 @@ class RewriterApp(object):
|
|||||||
urlkey = canonicalize(wb_url.url)
|
urlkey = canonicalize(wb_url.url)
|
||||||
|
|
||||||
inputreq = RewriteInputRequest(environ, urlkey, wb_url.url,
|
inputreq = RewriteInputRequest(environ, urlkey, wb_url.url,
|
||||||
self.content_rewriter)
|
self.content_rw)
|
||||||
|
|
||||||
inputreq.include_post_query(wb_url.url)
|
inputreq.include_post_query(wb_url.url)
|
||||||
|
|
||||||
@ -267,14 +269,15 @@ class RewriterApp(object):
|
|||||||
cookie_rewriter = self.cookie_tracker.get_rewriter(urlrewriter,
|
cookie_rewriter = self.cookie_tracker.get_rewriter(urlrewriter,
|
||||||
cookie_key)
|
cookie_key)
|
||||||
|
|
||||||
result = self.content_rewriter.rewrite_content(urlrewriter,
|
#result = self.content_rewriter.rewrite_content(urlrewriter,
|
||||||
record.http_headers,
|
# record.http_headers,
|
||||||
record.raw_stream,
|
# record.raw_stream,
|
||||||
head_insert_func,
|
# head_insert_func,
|
||||||
urlkey,
|
# urlkey,
|
||||||
cdx,
|
# cdx,
|
||||||
cookie_rewriter,
|
# cookie_rewriter,
|
||||||
environ)
|
# environ)
|
||||||
|
result = self.content_rw(record, urlrewriter, cookie_rewriter, head_insert_func, cdx)
|
||||||
|
|
||||||
status_headers, gen, is_rw = result
|
status_headers, gen, is_rw = result
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user