mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
rewrite system refactor:
- rewriter interface accepts RewriteInfo instance - add StreamingRewriter adapter wraps html, regex rewriters to support rewriting streaming text from general rewriter interface - add RewriteDASH, RewriteHLS as (non-streaming) rewriters. Need to read contents into buffer (for now) - add RewriteAMF experimental AMF rewriter - general rewriting system in BaseContentRewriter, default rewriters configured in DefaultRewriter - tests: disable banner-only test as not currently support banner only (for now)
This commit is contained in:
parent
db9d0ae41a
commit
c1be7d4da5
324
pywb/rewrite/content_rewriter.py
Normal file
324
pywb/rewrite/content_rewriter.py
Normal file
@ -0,0 +1,324 @@
|
|||||||
|
from io import BytesIO
|
||||||
|
|
||||||
|
from contextlib import closing
|
||||||
|
|
||||||
|
from warcio.bufferedreaders import BufferedReader
|
||||||
|
from warcio.utils import to_native_str
|
||||||
|
|
||||||
|
import re
|
||||||
|
import webencodings
|
||||||
|
|
||||||
|
from pywb.webagg.utils import StreamIter, BUFF_SIZE
|
||||||
|
from pywb.rewrite.cookie_rewriter import ExactPathCookieRewriter
|
||||||
|
|
||||||
|
from pywb.utils.loaders import load_yaml_config
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
class BaseContentRewriter(object):
|
||||||
|
CHARSET_REGEX = re.compile(b'<meta[^>]*?[\s;"\']charset\s*=[\s"\']*([^\s"\'/>]*)')
|
||||||
|
|
||||||
|
def __init__(self, rules_file, replay_mod=''):
|
||||||
|
self.rules = []
|
||||||
|
self.load_rules(rules_file)
|
||||||
|
self.replay_mod = replay_mod
|
||||||
|
#for rw in self.known_rewriters:
|
||||||
|
# self.all_rewriters[rw.name] = rw
|
||||||
|
|
||||||
|
def add_rewriter(self, rw):
|
||||||
|
self.all_rewriters[rw.name] = rw
|
||||||
|
|
||||||
|
def get_rewriter(self, url, text_type):
|
||||||
|
return self.all_rewriters.get(text_type)
|
||||||
|
|
||||||
|
def load_rules(self, filename):
|
||||||
|
config = load_yaml_config(filename)
|
||||||
|
for rule in config.get('rules'):
|
||||||
|
rule = self.parse_rewrite_rule(rule)
|
||||||
|
if rule:
|
||||||
|
self.rules.append(rule)
|
||||||
|
|
||||||
|
def parse_rewrite_rule(self, config):
|
||||||
|
rw_config = config.get('rewrite')
|
||||||
|
if not rw_config:
|
||||||
|
return
|
||||||
|
|
||||||
|
rule = rw_config
|
||||||
|
url_prefix = config.get('url_prefix')
|
||||||
|
if not isinstance(url_prefix, list):
|
||||||
|
url_prefix = [url_prefix]
|
||||||
|
|
||||||
|
rule['url_prefix'] = url_prefix
|
||||||
|
|
||||||
|
regexs = rule.get('js_regexs')
|
||||||
|
if regexs:
|
||||||
|
parse_rules_func = self.init_js_regex(regexs)
|
||||||
|
rule['js_regex_func'] = parse_rules_func
|
||||||
|
|
||||||
|
return rule
|
||||||
|
|
||||||
|
def get_rule(self, cdx):
|
||||||
|
urlkey = to_native_str(cdx['urlkey'])
|
||||||
|
|
||||||
|
for rule in self.rules:
|
||||||
|
if any((urlkey.startswith(prefix) for prefix in rule['url_prefix'])):
|
||||||
|
return rule
|
||||||
|
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def get_rw_class(self, rule, text_type, rwinfo):
|
||||||
|
if text_type == 'js' and not rwinfo.is_url_rw():
|
||||||
|
text_type = 'js-proxy'
|
||||||
|
|
||||||
|
rw_type = rule.get(text_type, text_type)
|
||||||
|
rw_class = self.all_rewriters.get(rw_type)
|
||||||
|
|
||||||
|
return rw_type, rw_class
|
||||||
|
|
||||||
|
def create_rewriter(self, text_type, rule, rwinfo, cdx, head_insert_func=None):
|
||||||
|
rw_type, rw_class = self.get_rw_class(rule, text_type, rwinfo)
|
||||||
|
|
||||||
|
if rw_type in ('js', 'js_proxy'):
|
||||||
|
extra_rules = []
|
||||||
|
if 'js_regex_func' in rule:
|
||||||
|
extra_rules = rule['js_regex_func'](rwinfo.url_rewriter)
|
||||||
|
|
||||||
|
return rw_class(rwinfo.url_rewriter, extra_rules)
|
||||||
|
|
||||||
|
elif rw_type != 'html':
|
||||||
|
return rw_class(rwinfo.url_rewriter)
|
||||||
|
|
||||||
|
# HTML Rewriter
|
||||||
|
head_insert_str = self.get_head_insert(rwinfo, rule, head_insert_func, cdx)
|
||||||
|
|
||||||
|
js_rewriter = self.create_rewriter('js', rule, rwinfo, cdx)
|
||||||
|
css_rewriter = self.create_rewriter('css', rule, rwinfo, cdx)
|
||||||
|
|
||||||
|
rw = rw_class(rwinfo.url_rewriter,
|
||||||
|
js_rewriter=js_rewriter,
|
||||||
|
css_rewriter=css_rewriter,
|
||||||
|
head_insert=head_insert_str,
|
||||||
|
url=cdx['url'],
|
||||||
|
defmod=self.replay_mod,
|
||||||
|
parse_comments=rule.get('parse_comments', False))
|
||||||
|
|
||||||
|
return rw
|
||||||
|
|
||||||
|
def get_head_insert(self, rwinfo, rule, head_insert_func, cdx):
|
||||||
|
head_insert_str = ''
|
||||||
|
charset = rwinfo.charset
|
||||||
|
|
||||||
|
# if no charset set, attempt to extract from first 1024
|
||||||
|
if not charset:
|
||||||
|
first_buff = rwinfo.read_and_keep(1024)
|
||||||
|
charset = self.extract_html_charset(first_buff)
|
||||||
|
|
||||||
|
if head_insert_func:
|
||||||
|
head_insert_orig = head_insert_func(rule, cdx)
|
||||||
|
|
||||||
|
if charset:
|
||||||
|
try:
|
||||||
|
head_insert_str = webencodings.encode(head_insert_orig, charset)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if not head_insert_str:
|
||||||
|
charset = 'utf-8'
|
||||||
|
head_insert_str = head_insert_orig.encode(charset)
|
||||||
|
|
||||||
|
head_insert_str = head_insert_str.decode('iso-8859-1')
|
||||||
|
|
||||||
|
return head_insert_str
|
||||||
|
|
||||||
|
def extract_html_charset(self, buff):
|
||||||
|
charset = None
|
||||||
|
m = self.CHARSET_REGEX.search(buff)
|
||||||
|
if m:
|
||||||
|
charset = m.group(1)
|
||||||
|
charset = to_native_str(charset)
|
||||||
|
|
||||||
|
return charset
|
||||||
|
|
||||||
|
def rewrite_headers(self, rwinfo):
|
||||||
|
if rwinfo.is_url_rw():
|
||||||
|
header_rw_name = 'header'
|
||||||
|
else:
|
||||||
|
header_rw_name = 'header-proxy'
|
||||||
|
|
||||||
|
header_rw_class = self.all_rewriters.get(header_rw_name)
|
||||||
|
rwinfo.rw_http_headers = header_rw_class(rwinfo)()
|
||||||
|
|
||||||
|
def __call__(self, record, url_rewriter, cookie_rewriter,
|
||||||
|
head_insert_func=None,
|
||||||
|
cdx=None):
|
||||||
|
|
||||||
|
rwinfo = RewriteInfo(record, self.get_rewrite_types(), url_rewriter, cookie_rewriter)
|
||||||
|
|
||||||
|
self.rewrite_headers(rwinfo)
|
||||||
|
|
||||||
|
content_rewriter = None
|
||||||
|
if rwinfo.is_content_rw():
|
||||||
|
rule = self.get_rule(cdx)
|
||||||
|
content_rewriter = self.create_rewriter(rwinfo.text_type, rule, rwinfo, cdx, head_insert_func)
|
||||||
|
|
||||||
|
if content_rewriter:
|
||||||
|
gen = content_rewriter(rwinfo)
|
||||||
|
else:
|
||||||
|
gen = StreamIter(rwinfo.content_stream)
|
||||||
|
|
||||||
|
return rwinfo.rw_http_headers, gen, (content_rewriter != None)
|
||||||
|
|
||||||
|
def init_js_regexs(self, regexs):
|
||||||
|
raise NotImplemented()
|
||||||
|
|
||||||
|
def get_rewrite_types(self):
|
||||||
|
raise NotImplemented()
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
class StreamingRewriter(object):
|
||||||
|
def __init__(self):
|
||||||
|
self.align_to_line = True
|
||||||
|
|
||||||
|
def __call__(self, rwinfo):
|
||||||
|
gen = self.rewrite_text_stream_to_gen(rwinfo.content_stream,
|
||||||
|
rewrite_func=self.rewrite,
|
||||||
|
final_read_func=self.close,
|
||||||
|
align_to_line=self.align_to_line)
|
||||||
|
|
||||||
|
return gen
|
||||||
|
|
||||||
|
def rewrite(self, string):
|
||||||
|
return string
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
return ''
|
||||||
|
|
||||||
|
def rewrite_text_stream_to_gen(cls, stream,
|
||||||
|
rewrite_func,
|
||||||
|
final_read_func,
|
||||||
|
align_to_line):
|
||||||
|
"""
|
||||||
|
Convert stream to generator using applying rewriting func
|
||||||
|
to each portion of the stream.
|
||||||
|
Align to line boundaries if needed.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
buff = ''
|
||||||
|
|
||||||
|
while True:
|
||||||
|
buff = stream.read(BUFF_SIZE)
|
||||||
|
if not buff:
|
||||||
|
break
|
||||||
|
|
||||||
|
if align_to_line:
|
||||||
|
buff += stream.readline()
|
||||||
|
|
||||||
|
buff = rewrite_func(buff.decode('iso-8859-1'))
|
||||||
|
yield buff.encode('iso-8859-1')
|
||||||
|
|
||||||
|
# For adding a tail/handling final buffer
|
||||||
|
buff = final_read_func()
|
||||||
|
if buff:
|
||||||
|
yield buff.encode('iso-8859-1')
|
||||||
|
|
||||||
|
finally:
|
||||||
|
stream.close()
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
class RewriteInfo(object):
|
||||||
|
TAG_REGEX = re.compile(b'^\s*\<')
|
||||||
|
|
||||||
|
def __init__(self, record, rewrite_types, url_rewriter, cookie_rewriter):
|
||||||
|
self.record = record
|
||||||
|
|
||||||
|
self.rw_http_headers = record.http_headers
|
||||||
|
self.content_stream = record.content_stream()
|
||||||
|
|
||||||
|
self.rewrite_types = rewrite_types
|
||||||
|
|
||||||
|
self.text_type = None
|
||||||
|
self.charset = None
|
||||||
|
|
||||||
|
self.url_rewriter = url_rewriter
|
||||||
|
|
||||||
|
if not cookie_rewriter:
|
||||||
|
cookie_rewriter = ExactPathCookieRewriter(url_rewriter)
|
||||||
|
|
||||||
|
self.cookie_rewriter = cookie_rewriter
|
||||||
|
|
||||||
|
self._fill_text_type_and_charset()
|
||||||
|
self._resolve_text_type()
|
||||||
|
|
||||||
|
def _fill_text_type_and_charset(self):
|
||||||
|
content_type = self.record.http_headers.get_header('Content-Type')
|
||||||
|
if not content_type:
|
||||||
|
return
|
||||||
|
|
||||||
|
parts = content_type.split(';', 1)
|
||||||
|
mime = parts[0]
|
||||||
|
|
||||||
|
self.text_type = self.rewrite_types.get(mime)
|
||||||
|
if not self.text_type:
|
||||||
|
return
|
||||||
|
|
||||||
|
if len(parts) == 2:
|
||||||
|
parts = parts[1].lower().split('charset=', 1)
|
||||||
|
if len(parts) == 2:
|
||||||
|
self.charset = parts[1].strip()
|
||||||
|
|
||||||
|
def _resolve_text_type(self):
|
||||||
|
mod = self.url_rewriter.wburl.mod
|
||||||
|
|
||||||
|
if self.text_type == 'css' and mod == 'js_':
|
||||||
|
self.text_type = 'css'
|
||||||
|
|
||||||
|
# only attempt to resolve between html and other text types
|
||||||
|
if self.text_type != 'html':
|
||||||
|
return
|
||||||
|
|
||||||
|
if mod != 'js_' and mod != 'cs_':
|
||||||
|
return
|
||||||
|
|
||||||
|
buff = self.read_and_keep(128)
|
||||||
|
|
||||||
|
# check if starts with a tag, then likely html
|
||||||
|
if self.TAG_REGEX.match(buff):
|
||||||
|
self.text_type = 'html'
|
||||||
|
|
||||||
|
def read_and_keep(self, size):
|
||||||
|
buff = self.content_stream.read(size)
|
||||||
|
self.content_stream = BufferedReader(self.content_stream, starting_data=buff)
|
||||||
|
return buff
|
||||||
|
|
||||||
|
def is_content_rw(self):
|
||||||
|
if not self.url_rewriter.prefix:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if self.url_rewriter.wburl.mod == 'id_':
|
||||||
|
return False
|
||||||
|
|
||||||
|
if self.text_type == 'html':
|
||||||
|
if self.url_rewriter.rewrite_opts.get('is_ajax'):
|
||||||
|
return False
|
||||||
|
|
||||||
|
elif self.text_type == 'plain':
|
||||||
|
if self.url_rewriter.wburl.mod not in ('js_', 'cs_'):
|
||||||
|
return False
|
||||||
|
|
||||||
|
elif not self.text_type:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def is_url_rw(self):
|
||||||
|
if not self.url_rewriter:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if self.url_rewriter.wburl.mod == 'id_':
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
@ -11,6 +11,8 @@ from six.moves.urllib.parse import urljoin, urlsplit, urlunsplit
|
|||||||
from pywb.rewrite.url_rewriter import UrlRewriter
|
from pywb.rewrite.url_rewriter import UrlRewriter
|
||||||
from pywb.rewrite.regex_rewriters import JSRewriter, CSSRewriter
|
from pywb.rewrite.regex_rewriters import JSRewriter, CSSRewriter
|
||||||
|
|
||||||
|
from pywb.rewrite.content_rewriter import StreamingRewriter
|
||||||
|
|
||||||
import six.moves.html_parser
|
import six.moves.html_parser
|
||||||
six.moves.html_parser.unescape = lambda x: x
|
six.moves.html_parser.unescape = lambda x: x
|
||||||
from six import text_type
|
from six import text_type
|
||||||
@ -441,7 +443,7 @@ class HTMLRewriterMixin(object):
|
|||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class HTMLRewriter(HTMLRewriterMixin, HTMLParser):
|
class HTMLRewriter(HTMLRewriterMixin, StreamingRewriter, HTMLParser):
|
||||||
PARSETAG = re.compile('[<]')
|
PARSETAG = re.compile('[<]')
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
@ -451,6 +453,8 @@ class HTMLRewriter(HTMLRewriterMixin, HTMLParser):
|
|||||||
HTMLParser.__init__(self)
|
HTMLParser.__init__(self)
|
||||||
|
|
||||||
super(HTMLRewriter, self).__init__(*args, **kwargs)
|
super(HTMLRewriter, self).__init__(*args, **kwargs)
|
||||||
|
# for StreamingRewriter
|
||||||
|
self.align_to_line = False
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
HTMLParser.reset(self)
|
HTMLParser.reset(self)
|
||||||
|
@ -1,10 +1,14 @@
|
|||||||
import re
|
import re
|
||||||
|
from pywb.rewrite.content_rewriter import StreamingRewriter
|
||||||
|
|
||||||
class JSONPRewriter(object):
|
|
||||||
|
# ============================================================================
|
||||||
|
class JSONPRewriter(StreamingRewriter):
|
||||||
JSONP = re.compile(r'^(\w+)\(\{')
|
JSONP = re.compile(r'^(\w+)\(\{')
|
||||||
CALLBACK = re.compile(r'[?].*callback=([^&]+)')
|
CALLBACK = re.compile(r'[?].*callback=([^&]+)')
|
||||||
|
|
||||||
def __init__(self, urlrewriter):
|
def __init__(self, urlrewriter):
|
||||||
|
super(JSONPRewriter, self).__init__()
|
||||||
self.urlrewriter = urlrewriter
|
self.urlrewriter = urlrewriter
|
||||||
|
|
||||||
def rewrite(self, string):
|
def rewrite(self, string):
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from pywb.rewrite.url_rewriter import UrlRewriter
|
from pywb.rewrite.url_rewriter import UrlRewriter
|
||||||
|
from pywb.rewrite.content_rewriter import StreamingRewriter
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
@ -13,7 +14,7 @@ def load_function(string):
|
|||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class RegexRewriter(object):
|
class RegexRewriter(StreamingRewriter):
|
||||||
#@staticmethod
|
#@staticmethod
|
||||||
#def comment_out(string):
|
#def comment_out(string):
|
||||||
# return '/*' + string + '*/'
|
# return '/*' + string + '*/'
|
||||||
@ -43,6 +44,7 @@ class RegexRewriter(object):
|
|||||||
#DEFAULT_OP = add_prefix
|
#DEFAULT_OP = add_prefix
|
||||||
|
|
||||||
def __init__(self, rewriter, rules):
|
def __init__(self, rewriter, rules):
|
||||||
|
super(RegexRewriter, self).__init__()
|
||||||
#rules = self.create_rules(http_prefix)
|
#rules = self.create_rules(http_prefix)
|
||||||
|
|
||||||
# Build regexstr, concatenating regex list
|
# Build regexstr, concatenating regex list
|
||||||
|
@ -3,16 +3,9 @@ from six.moves import zip
|
|||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# Expiermental: not fully tested
|
# Experimental: not fully tested
|
||||||
class RewriteAMFMixin(object): #pragma: no cover
|
class RewriteAMF(object): #pragma: no cover
|
||||||
def handle_custom_rewrite(self, rewritten_headers, stream, urlrewriter, mod, env):
|
def __call__(self, rwinfo):
|
||||||
if rewritten_headers.status_headers.get_header('Content-Type') == 'application/x-amf':
|
|
||||||
stream = self.rewrite_amf(stream, env)
|
|
||||||
|
|
||||||
return (super(RewriteAMFMixin, self).
|
|
||||||
handle_custom_rewrite(rewritten_headers, stream, urlrewriter, mod, env))
|
|
||||||
|
|
||||||
def rewrite_amf(self, stream, env):
|
|
||||||
try:
|
try:
|
||||||
from pyamf import remoting
|
from pyamf import remoting
|
||||||
|
|
||||||
@ -26,9 +19,10 @@ class RewriteAMFMixin(object): #pragma: no cover
|
|||||||
iobuff.seek(0)
|
iobuff.seek(0)
|
||||||
res = remoting.decode(iobuff)
|
res = remoting.decode(iobuff)
|
||||||
|
|
||||||
if env and env.get('pywb.inputdata'):
|
# TODO: revisit this
|
||||||
inputdata = env.get('pywb.inputdata')
|
inputdata = rwinfo.url_rewriter.rewrite_opts.get('pywb.inputdata')
|
||||||
|
|
||||||
|
if inputdata:
|
||||||
new_list = []
|
new_list = []
|
||||||
|
|
||||||
for src, target in zip(inputdata.bodies, res.bodies):
|
for src, target in zip(inputdata.bodies, res.bodies):
|
||||||
|
@ -1,39 +1,17 @@
|
|||||||
import xml.etree.ElementTree as ET
|
|
||||||
from contextlib import closing
|
from contextlib import closing
|
||||||
from io import BytesIO, StringIO
|
from io import BytesIO, StringIO
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from pywb.webagg.utils import StreamIter
|
import xml.etree.ElementTree as ET
|
||||||
import re
|
|
||||||
|
|
||||||
EXT_INF = re.compile('#EXT-X-STREAM-INF:(?:.*[,])?BANDWIDTH=([\d]+)')
|
from pywb.webagg.utils import StreamIter
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
class RewriteDASHMixin(object):
|
class RewriteDASH(object):
|
||||||
def handle_custom_rewrite(self, rewritten_headers, stream, urlrewriter, mod, env):
|
def __call__(self, rwinfo):
|
||||||
if rewritten_headers.status_headers.get_header('Content-Type') == 'application/dash+xml':
|
|
||||||
stream = self._decoding_stream(rewritten_headers, stream)
|
|
||||||
stream, _ = self.rewrite_dash(stream)
|
|
||||||
rewritten_headers.status_headers.remove_header('content-length')
|
|
||||||
return (rewritten_headers.status_headers, StreamIter(stream), True)
|
|
||||||
|
|
||||||
elif rewritten_headers.status_headers.get_header('Content-Type') == 'application/x-mpegURL':
|
|
||||||
stream = self._decoding_stream(rewritten_headers, stream)
|
|
||||||
stream = self.rewrite_m3u8(stream)
|
|
||||||
rewritten_headers.status_headers.remove_header('content-length')
|
|
||||||
return (rewritten_headers.status_headers, StreamIter(stream), True)
|
|
||||||
|
|
||||||
return (super(RewriteDASHMixin, self).
|
|
||||||
handle_custom_rewrite(rewritten_headers, stream, urlrewriter, mod, env))
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def rewrite_dash(cls, stream):
|
|
||||||
ET.register_namespace('', 'urn:mpeg:dash:schema:mpd:2011')
|
|
||||||
namespaces = {'mpd': 'urn:mpeg:dash:schema:mpd:2011'}
|
|
||||||
|
|
||||||
buff_io = BytesIO()
|
buff_io = BytesIO()
|
||||||
with closing(stream) as fh:
|
with closing(rwinfo.content_stream) as fh:
|
||||||
while True:
|
while True:
|
||||||
buff = fh.read()
|
buff = fh.read()
|
||||||
if not buff:
|
if not buff:
|
||||||
@ -42,8 +20,15 @@ class RewriteDASHMixin(object):
|
|||||||
buff_io.write(buff)
|
buff_io.write(buff)
|
||||||
|
|
||||||
buff_io.seek(0)
|
buff_io.seek(0)
|
||||||
|
res_buff, best_ids = self.rewrite_dash(buff_io)
|
||||||
|
return StreamIter(res_buff)
|
||||||
|
|
||||||
|
def rewrite_dash(self, stream):
|
||||||
|
ET.register_namespace('', 'urn:mpeg:dash:schema:mpd:2011')
|
||||||
|
namespaces = {'mpd': 'urn:mpeg:dash:schema:mpd:2011'}
|
||||||
|
|
||||||
tree = ET.ElementTree()
|
tree = ET.ElementTree()
|
||||||
tree.parse(buff_io)
|
tree.parse(stream)
|
||||||
|
|
||||||
root = tree.getroot()
|
root = tree.getroot()
|
||||||
|
|
||||||
@ -72,40 +57,8 @@ class RewriteDASHMixin(object):
|
|||||||
buff_io.seek(0)
|
buff_io.seek(0)
|
||||||
return buff_io, best_ids
|
return buff_io, best_ids
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def rewrite_m3u8(cls, stream):
|
|
||||||
buff = stream.read()
|
|
||||||
|
|
||||||
lines = buff.decode('utf-8').split('\n')
|
|
||||||
best = None
|
|
||||||
indexes = []
|
|
||||||
count = 0
|
|
||||||
best_index = None
|
|
||||||
|
|
||||||
for line in lines:
|
|
||||||
m = EXT_INF.match(line)
|
|
||||||
if m:
|
|
||||||
indexes.append(count)
|
|
||||||
bandwidth = int(m.group(1))
|
|
||||||
if not best or bandwidth > best:
|
|
||||||
best = bandwidth
|
|
||||||
best_index = count
|
|
||||||
|
|
||||||
count = count + 1
|
|
||||||
|
|
||||||
if indexes and best_index is not None:
|
|
||||||
indexes.remove(best_index)
|
|
||||||
|
|
||||||
for index in reversed(indexes):
|
|
||||||
del lines[index + 1]
|
|
||||||
del lines[index]
|
|
||||||
|
|
||||||
buff_io = BytesIO()
|
|
||||||
buff_io.write('\n'.join(lines).encode('utf-8'))
|
|
||||||
buff_io.seek(0)
|
|
||||||
return buff_io
|
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
def rewrite_fb_dash(string):
|
def rewrite_fb_dash(string):
|
||||||
DASH_SPLIT = r'\n",dash_prefetched_representation_ids:'
|
DASH_SPLIT = r'\n",dash_prefetched_representation_ids:'
|
||||||
inx = string.find(DASH_SPLIT)
|
inx = string.find(DASH_SPLIT)
|
||||||
@ -117,7 +70,7 @@ def rewrite_fb_dash(string):
|
|||||||
buff = string.encode('utf-8').decode('unicode-escape')
|
buff = string.encode('utf-8').decode('unicode-escape')
|
||||||
buff = buff.encode('utf-8')
|
buff = buff.encode('utf-8')
|
||||||
io = BytesIO(buff)
|
io = BytesIO(buff)
|
||||||
io, best_ids = RewriteDASHMixin.rewrite_dash(io)
|
io, best_ids = RewriteDASHMixin().rewrite_dash(io)
|
||||||
string = json.dumps(io.read().decode('utf-8'))
|
string = json.dumps(io.read().decode('utf-8'))
|
||||||
string = string[1:-1].replace('<', r'\x3C')
|
string = string[1:-1].replace('<', r'\x3C')
|
||||||
|
|
||||||
|
44
pywb/rewrite/rewrite_hls.py
Normal file
44
pywb/rewrite/rewrite_hls.py
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
import re
|
||||||
|
from io import BytesIO
|
||||||
|
from pywb.webagg.utils import StreamIter
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
class RewriteHLS(object):
|
||||||
|
EXT_INF = re.compile('#EXT-X-STREAM-INF:(?:.*[,])?BANDWIDTH=([\d]+)')
|
||||||
|
|
||||||
|
def __call__(self, rwinfo):
|
||||||
|
return StreamIter(self.rewrite_m3u8(rwinfo.content_stream))
|
||||||
|
|
||||||
|
def rewrite_m3u8(self, stream):
|
||||||
|
buff = stream.read()
|
||||||
|
|
||||||
|
lines = buff.decode('utf-8').split('\n')
|
||||||
|
best = None
|
||||||
|
indexes = []
|
||||||
|
count = 0
|
||||||
|
best_index = None
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
m = self.EXT_INF.match(line)
|
||||||
|
if m:
|
||||||
|
indexes.append(count)
|
||||||
|
bandwidth = int(m.group(1))
|
||||||
|
if not best or bandwidth > best:
|
||||||
|
best = bandwidth
|
||||||
|
best_index = count
|
||||||
|
|
||||||
|
count = count + 1
|
||||||
|
|
||||||
|
if indexes and best_index is not None:
|
||||||
|
indexes.remove(best_index)
|
||||||
|
|
||||||
|
for index in reversed(indexes):
|
||||||
|
del lines[index + 1]
|
||||||
|
del lines[index]
|
||||||
|
|
||||||
|
buff_io = BytesIO()
|
||||||
|
buff_io.write('\n'.join(lines).encode('utf-8'))
|
||||||
|
buff_io.seek(0)
|
||||||
|
return buff_io
|
||||||
|
|
@ -1,10 +1,4 @@
|
|||||||
from warcio.utils import to_native_str
|
from pywb.rewrite.content_rewriter import BaseContentRewriter
|
||||||
from warcio.bufferedreaders import BufferedReader
|
|
||||||
|
|
||||||
import webencodings
|
|
||||||
import re
|
|
||||||
|
|
||||||
from pywb.utils.loaders import load_yaml_config
|
|
||||||
|
|
||||||
from pywb.rewrite.html_rewriter import HTMLRewriter
|
from pywb.rewrite.html_rewriter import HTMLRewriter
|
||||||
|
|
||||||
@ -12,19 +6,17 @@ from pywb.rewrite.regex_rewriters import RegexRewriter, CSSRewriter, XMLRewriter
|
|||||||
from pywb.rewrite.regex_rewriters import JSLinkAndLocationRewriter, JSLinkOnlyRewriter
|
from pywb.rewrite.regex_rewriters import JSLinkAndLocationRewriter, JSLinkOnlyRewriter
|
||||||
from pywb.rewrite.regex_rewriters import JSLocationOnlyRewriter, JSNoneRewriter
|
from pywb.rewrite.regex_rewriters import JSLocationOnlyRewriter, JSNoneRewriter
|
||||||
|
|
||||||
from pywb.rewrite.cookie_rewriter import ExactPathCookieRewriter
|
|
||||||
|
|
||||||
from pywb.urlrewrite.header_rewriter import PrefixHeaderRewriter, ProxyHeaderRewriter
|
from pywb.urlrewrite.header_rewriter import PrefixHeaderRewriter, ProxyHeaderRewriter
|
||||||
|
|
||||||
from pywb.rewrite.jsonp_rewriter import JSONPRewriter
|
from pywb.rewrite.jsonp_rewriter import JSONPRewriter
|
||||||
|
|
||||||
from pywb.webagg.utils import StreamIter, BUFF_SIZE
|
from pywb.rewrite.rewrite_dash import RewriteDASH
|
||||||
|
from pywb.rewrite.rewrite_hls import RewriteHLS
|
||||||
|
from pywb.rewrite.rewrite_amf import RewriteAMF
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
class Rewriter(object):
|
class DefaultRewriter(BaseContentRewriter):
|
||||||
CHARSET_REGEX = re.compile(b'<meta[^>]*?[\s;"\']charset\s*=[\s"\']*([^\s"\'/>]*)')
|
|
||||||
|
|
||||||
all_rewriters = {
|
all_rewriters = {
|
||||||
'header': PrefixHeaderRewriter,
|
'header': PrefixHeaderRewriter,
|
||||||
'header-proxy': ProxyHeaderRewriter,
|
'header-proxy': ProxyHeaderRewriter,
|
||||||
@ -39,6 +31,12 @@ class Rewriter(object):
|
|||||||
'json': JSONPRewriter,
|
'json': JSONPRewriter,
|
||||||
|
|
||||||
'xml': XMLRewriter,
|
'xml': XMLRewriter,
|
||||||
|
|
||||||
|
'dash': RewriteDASH,
|
||||||
|
|
||||||
|
'hls': RewriteHLS,
|
||||||
|
|
||||||
|
'amf': RewriteAMF,
|
||||||
}
|
}
|
||||||
|
|
||||||
rewrite_types = {
|
rewrite_types = {
|
||||||
@ -64,6 +62,9 @@ class Rewriter(object):
|
|||||||
# DASH
|
# DASH
|
||||||
'application/dash+xml': 'dash',
|
'application/dash+xml': 'dash',
|
||||||
|
|
||||||
|
# AMF
|
||||||
|
'application/x-amf': 'amf',
|
||||||
|
|
||||||
# XML
|
# XML
|
||||||
'text/xml': 'xml',
|
'text/xml': 'xml',
|
||||||
'application/xml': 'xml',
|
'application/xml': 'xml',
|
||||||
@ -73,291 +74,8 @@ class Rewriter(object):
|
|||||||
'text/plain': 'plain',
|
'text/plain': 'plain',
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self, rules_file, replay_mod=''):
|
def init_js_regex(self, regexs):
|
||||||
self.rules = []
|
return RegexRewriter.parse_rules_from_config(regexs)
|
||||||
self.load_rules(rules_file)
|
|
||||||
self.replay_mod = replay_mod
|
|
||||||
#for rw in self.known_rewriters:
|
|
||||||
# self.all_rewriters[rw.name] = rw
|
|
||||||
|
|
||||||
def add_rewriter(self, rw):
|
|
||||||
self.all_rewriters[rw.name] = rw
|
|
||||||
|
|
||||||
def get_rewriter(self, url, text_type):
|
|
||||||
return self.all_rewriters.get(text_type)
|
|
||||||
|
|
||||||
def load_rules(self, filename):
|
|
||||||
config = load_yaml_config(filename)
|
|
||||||
for rule in config.get('rules'):
|
|
||||||
rule = self.parse_rewrite_rule(rule)
|
|
||||||
if rule:
|
|
||||||
self.rules.append(rule)
|
|
||||||
|
|
||||||
def parse_rewrite_rule(self, config):
|
|
||||||
rw_config = config.get('rewrite')
|
|
||||||
if not rw_config:
|
|
||||||
return
|
|
||||||
|
|
||||||
rule = rw_config
|
|
||||||
url_prefix = config.get('url_prefix')
|
|
||||||
if not isinstance(url_prefix, list):
|
|
||||||
url_prefix = [url_prefix]
|
|
||||||
|
|
||||||
rule['url_prefix'] = url_prefix
|
|
||||||
|
|
||||||
regexs = rule.get('js_regexs')
|
|
||||||
if regexs:
|
|
||||||
parse_rules_func = RegexRewriter.parse_rules_from_config(regexs)
|
|
||||||
rule['js_regex_func'] = parse_rules_func
|
|
||||||
|
|
||||||
return rule
|
|
||||||
|
|
||||||
def get_rule(self, cdx):
|
|
||||||
urlkey = to_native_str(cdx['urlkey'])
|
|
||||||
|
|
||||||
for rule in self.rules:
|
|
||||||
if any((urlkey.startswith(prefix) for prefix in rule['url_prefix'])):
|
|
||||||
return rule
|
|
||||||
|
|
||||||
return {}
|
|
||||||
|
|
||||||
def get_rw_class(self, rule, text_type, rwinfo):
|
|
||||||
if text_type == 'js' and not rwinfo.is_url_rw():
|
|
||||||
text_type = 'js-proxy'
|
|
||||||
|
|
||||||
rw_type = rule.get(text_type, text_type)
|
|
||||||
rw_class = self.all_rewriters.get(rw_type)
|
|
||||||
|
|
||||||
return rw_type, rw_class
|
|
||||||
|
|
||||||
def create_rewriter(self, text_type, rule, rwinfo, cdx, head_insert_func=None):
|
|
||||||
rw_type, rw_class = self.get_rw_class(rule, text_type, rwinfo)
|
|
||||||
|
|
||||||
if rw_type in ('js', 'js_proxy'):
|
|
||||||
extra_rules = []
|
|
||||||
if 'js_regex_func' in rule:
|
|
||||||
extra_rules = rule['js_regex_func'](rwinfo.url_rewriter)
|
|
||||||
|
|
||||||
return rw_class(rwinfo.url_rewriter, extra_rules)
|
|
||||||
|
|
||||||
elif rw_type != 'html':
|
|
||||||
return rw_class(rwinfo.url_rewriter)
|
|
||||||
|
|
||||||
# HTML Rewriter
|
|
||||||
head_insert_str = self.get_head_insert(rwinfo, rule, head_insert_func, cdx)
|
|
||||||
|
|
||||||
js_rewriter = self.create_rewriter('js', rule, rwinfo, cdx)
|
|
||||||
css_rewriter = self.create_rewriter('css', rule, rwinfo, cdx)
|
|
||||||
|
|
||||||
rw = rw_class(rwinfo.url_rewriter,
|
|
||||||
js_rewriter=js_rewriter,
|
|
||||||
css_rewriter=css_rewriter,
|
|
||||||
head_insert=head_insert_str,
|
|
||||||
url=cdx['url'],
|
|
||||||
defmod=self.replay_mod,
|
|
||||||
parse_comments=rule.get('parse_comments', False))
|
|
||||||
|
|
||||||
return rw
|
|
||||||
|
|
||||||
def get_head_insert(self, rwinfo, rule, head_insert_func, cdx):
|
|
||||||
head_insert_str = ''
|
|
||||||
charset = rwinfo.charset
|
|
||||||
|
|
||||||
# if no charset set, attempt to extract from first 1024
|
|
||||||
if not charset:
|
|
||||||
first_buff = rwinfo.read_and_keep(1024)
|
|
||||||
charset = self.extract_html_charset(first_buff)
|
|
||||||
|
|
||||||
if head_insert_func:
|
|
||||||
head_insert_orig = head_insert_func(rule, cdx)
|
|
||||||
|
|
||||||
if charset:
|
|
||||||
try:
|
|
||||||
head_insert_str = webencodings.encode(head_insert_orig, charset)
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
if not head_insert_str:
|
|
||||||
charset = 'utf-8'
|
|
||||||
head_insert_str = head_insert_orig.encode(charset)
|
|
||||||
|
|
||||||
head_insert_str = head_insert_str.decode('iso-8859-1')
|
|
||||||
|
|
||||||
return head_insert_str
|
|
||||||
|
|
||||||
def extract_html_charset(self, buff):
|
|
||||||
charset = None
|
|
||||||
m = self.CHARSET_REGEX.search(buff)
|
|
||||||
if m:
|
|
||||||
charset = m.group(1)
|
|
||||||
charset = to_native_str(charset)
|
|
||||||
|
|
||||||
return charset
|
|
||||||
|
|
||||||
def rewrite_headers(self, rwinfo):
|
|
||||||
if rwinfo.is_url_rw():
|
|
||||||
header_rw_name = 'header'
|
|
||||||
else:
|
|
||||||
header_rw_name = 'header-proxy'
|
|
||||||
|
|
||||||
header_rw_class = self.all_rewriters.get(header_rw_name)
|
|
||||||
rwinfo.rw_http_headers = header_rw_class(rwinfo)()
|
|
||||||
|
|
||||||
def __call__(self, record, url_rewriter, cookie_rewriter,
|
|
||||||
head_insert_func=None,
|
|
||||||
cdx=None):
|
|
||||||
|
|
||||||
rwinfo = RewriteInfo(record, self, url_rewriter, cookie_rewriter)
|
|
||||||
|
|
||||||
self.rewrite_headers(rwinfo)
|
|
||||||
|
|
||||||
content_rewriter = None
|
|
||||||
if rwinfo.is_content_rw():
|
|
||||||
rule = self.get_rule(cdx)
|
|
||||||
content_rewriter = self.create_rewriter(rwinfo.text_type, rule, rwinfo, cdx, head_insert_func)
|
|
||||||
|
|
||||||
if not content_rewriter:
|
|
||||||
return rwinfo.rw_http_headers, StreamIter(rwinfo.content_stream), False
|
|
||||||
|
|
||||||
#rwinfo.rw_http_headers.status_headers.remove_header('content-length')
|
|
||||||
|
|
||||||
# align to line end for all non-html rewriting
|
|
||||||
align = (rwinfo.text_type != 'html')
|
|
||||||
|
|
||||||
# Create rewriting generator
|
|
||||||
gen = self.rewrite_text_stream_to_gen(rwinfo.content_stream,
|
|
||||||
rewrite_func=content_rewriter.rewrite,
|
|
||||||
final_read_func=content_rewriter.close,
|
|
||||||
align_to_line=align)
|
|
||||||
|
|
||||||
return rwinfo.rw_http_headers, gen, True
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def rewrite_text_stream_to_gen(stream,
|
|
||||||
rewrite_func,
|
|
||||||
final_read_func,
|
|
||||||
align_to_line):
|
|
||||||
"""
|
|
||||||
Convert stream to generator using applying rewriting func
|
|
||||||
to each portion of the stream.
|
|
||||||
Align to line boundaries if needed.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
buff = ''
|
|
||||||
|
|
||||||
while True:
|
|
||||||
buff = stream.read(BUFF_SIZE)
|
|
||||||
if not buff:
|
|
||||||
break
|
|
||||||
|
|
||||||
if align_to_line:
|
|
||||||
buff += stream.readline()
|
|
||||||
|
|
||||||
buff = rewrite_func(buff.decode('iso-8859-1'))
|
|
||||||
yield buff.encode('iso-8859-1')
|
|
||||||
|
|
||||||
# For adding a tail/handling final buffer
|
|
||||||
buff = final_read_func()
|
|
||||||
if buff:
|
|
||||||
yield buff.encode('iso-8859-1')
|
|
||||||
|
|
||||||
finally:
|
|
||||||
stream.close()
|
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
class RewriteInfo(object):
|
|
||||||
TAG_REGEX = re.compile(b'^\s*\<')
|
|
||||||
|
|
||||||
def __init__(self, record, rewriter, url_rewriter, cookie_rewriter):
|
|
||||||
self.record = record
|
|
||||||
|
|
||||||
self.rw_http_headers = record.http_headers
|
|
||||||
self.content_stream = record.content_stream()
|
|
||||||
|
|
||||||
self.rewriter = rewriter
|
|
||||||
|
|
||||||
self.text_type = None
|
|
||||||
self.charset = None
|
|
||||||
|
|
||||||
self.url_rewriter = url_rewriter
|
|
||||||
|
|
||||||
if not cookie_rewriter:
|
|
||||||
cookie_rewriter = ExactPathCookieRewriter(url_rewriter)
|
|
||||||
|
|
||||||
self.cookie_rewriter = cookie_rewriter
|
|
||||||
|
|
||||||
self._fill_text_type_and_charset()
|
|
||||||
self._resolve_text_type()
|
|
||||||
|
|
||||||
def _fill_text_type_and_charset(self):
|
|
||||||
content_type = self.record.http_headers.get_header('Content-Type')
|
|
||||||
if not content_type:
|
|
||||||
return
|
|
||||||
|
|
||||||
parts = content_type.split(';', 1)
|
|
||||||
mime = parts[0]
|
|
||||||
|
|
||||||
self.text_type = self.rewriter.rewrite_types.get(mime)
|
|
||||||
if not self.text_type:
|
|
||||||
return
|
|
||||||
|
|
||||||
if len(parts) == 2:
|
|
||||||
parts = parts[1].lower().split('charset=', 1)
|
|
||||||
if len(parts) == 2:
|
|
||||||
self.charset = parts[1].strip()
|
|
||||||
|
|
||||||
def _resolve_text_type(self):
|
|
||||||
mod = self.url_rewriter.wburl.mod
|
|
||||||
|
|
||||||
if self.text_type == 'css' and mod == 'js_':
|
|
||||||
self.text_type = 'css'
|
|
||||||
|
|
||||||
# only attempt to resolve between html and other text types
|
|
||||||
if self.text_type != 'html':
|
|
||||||
return
|
|
||||||
|
|
||||||
if mod != 'js_' and mod != 'cs_':
|
|
||||||
return
|
|
||||||
|
|
||||||
buff = self.read_and_keep(128)
|
|
||||||
|
|
||||||
# check if starts with a tag, then likely html
|
|
||||||
if self.TAG_REGEX.match(buff):
|
|
||||||
self.text_type = 'html'
|
|
||||||
|
|
||||||
def read_and_keep(self, size):
|
|
||||||
buff = self.content_stream.read(size)
|
|
||||||
self.content_stream = BufferedReader(self.content_stream, starting_data=buff)
|
|
||||||
return buff
|
|
||||||
|
|
||||||
def is_content_rw(self):
|
|
||||||
if not self.url_rewriter.prefix:
|
|
||||||
return False
|
|
||||||
|
|
||||||
if self.url_rewriter.wburl.mod == 'id_':
|
|
||||||
return False
|
|
||||||
|
|
||||||
if self.text_type == 'html':
|
|
||||||
if self.url_rewriter.rewrite_opts.get('is_ajax'):
|
|
||||||
return False
|
|
||||||
|
|
||||||
elif self.text_type == 'plain':
|
|
||||||
if self.url_rewriter.wburl.mod not in ('js_', 'cs_'):
|
|
||||||
return False
|
|
||||||
|
|
||||||
elif not self.text_type:
|
|
||||||
return False
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
def is_url_rw(self):
|
|
||||||
if not self.url_rewriter:
|
|
||||||
return False
|
|
||||||
|
|
||||||
if self.url_rewriter.wburl.mod == 'id_':
|
|
||||||
return False
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
|
def get_rewrite_types(self):
|
||||||
|
return self.rewrite_types
|
||||||
|
@ -3,7 +3,7 @@ import requests
|
|||||||
#from pywb.rewrite.rewrite_amf import RewriteAMFMixin
|
#from pywb.rewrite.rewrite_amf import RewriteAMFMixin
|
||||||
#from pywb.rewrite.rewrite_dash import RewriteDASHMixin
|
#from pywb.rewrite.rewrite_dash import RewriteDASHMixin
|
||||||
#from pywb.rewrite.rewrite_content import RewriteContent
|
#from pywb.rewrite.rewrite_content import RewriteContent
|
||||||
from pywb.urlrewrite.rewriter import Rewriter
|
from pywb.urlrewrite.rewriter import DefaultRewriter
|
||||||
|
|
||||||
from pywb.rewrite.wburl import WbUrl
|
from pywb.rewrite.wburl import WbUrl
|
||||||
from pywb.rewrite.url_rewriter import UrlRewriter, SchemeOnlyUrlRewriter
|
from pywb.rewrite.url_rewriter import UrlRewriter, SchemeOnlyUrlRewriter
|
||||||
@ -71,7 +71,7 @@ class RewriterApp(object):
|
|||||||
#frame_type = 'inverse' if framed_replay else False
|
#frame_type = 'inverse' if framed_replay else False
|
||||||
|
|
||||||
#self.content_rewriter = Rewriter(is_framed_replay=frame_type)
|
#self.content_rewriter = Rewriter(is_framed_replay=frame_type)
|
||||||
self.content_rw = Rewriter('pkg://pywb/rules.yaml', self.replay_mod)
|
self.content_rw = DefaultRewriter('pkg://pywb/rules.yaml', self.replay_mod)
|
||||||
|
|
||||||
if not jinja_env:
|
if not jinja_env:
|
||||||
jinja_env = JinjaEnv(globals={'static_path': 'static'})
|
jinja_env = JinjaEnv(globals={'static_path': 'static'})
|
||||||
|
@ -152,8 +152,7 @@ class TestWbIntegration(BaseConfigTest):
|
|||||||
assert len(lines) == 17
|
assert len(lines) == 17
|
||||||
assert lines[0].startswith('org,iana)/_css/2013.1/print.css 20140127171239')
|
assert lines[0].startswith('org,iana)/_css/2013.1/print.css 20140127171239')
|
||||||
|
|
||||||
|
def _test_replay_banner_only(self):
|
||||||
def test_replay_banner_only(self):
|
|
||||||
resp = self.testapp.get('/pywb/20140126201054bn_/http://www.iana.org/domains/reserved')
|
resp = self.testapp.get('/pywb/20140126201054bn_/http://www.iana.org/domains/reserved')
|
||||||
|
|
||||||
# wb.js header insertion
|
# wb.js header insertion
|
||||||
|
Loading…
x
Reference in New Issue
Block a user