mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
rewrite refactoring:
- rewrite headers after content to ensure content-length/content-encoding rewritten if content modified - header rewriter: remove proxyrewriter, set default rule to 'prefix' or 'keep' if url rewriting or not - set is_content_rw if record.content_stream(), assume content is modified - add BufferedRewriter as base for dash, hls, amf rewriting which processes the full stream - should_rw_content() determines if should attempt content rewriting - support banner-only insert mode: added HTMLInsertOnlyRewriter, enable if no custom JS rules - test: enable banner-only test mode
This commit is contained in:
parent
c1be7d4da5
commit
d8b67319e1
@ -7,6 +7,7 @@ from warcio.utils import to_native_str
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
import webencodings
|
import webencodings
|
||||||
|
import tempfile
|
||||||
|
|
||||||
from pywb.webagg.utils import StreamIter, BUFF_SIZE
|
from pywb.webagg.utils import StreamIter, BUFF_SIZE
|
||||||
from pywb.rewrite.cookie_rewriter import ExactPathCookieRewriter
|
from pywb.rewrite.cookie_rewriter import ExactPathCookieRewriter
|
||||||
@ -78,11 +79,16 @@ class BaseContentRewriter(object):
|
|||||||
def create_rewriter(self, text_type, rule, rwinfo, cdx, head_insert_func=None):
|
def create_rewriter(self, text_type, rule, rwinfo, cdx, head_insert_func=None):
|
||||||
rw_type, rw_class = self.get_rw_class(rule, text_type, rwinfo)
|
rw_type, rw_class = self.get_rw_class(rule, text_type, rwinfo)
|
||||||
|
|
||||||
if rw_type in ('js', 'js_proxy'):
|
if rw_type in ('js', 'js-proxy'):
|
||||||
extra_rules = []
|
extra_rules = []
|
||||||
if 'js_regex_func' in rule:
|
if 'js_regex_func' in rule:
|
||||||
extra_rules = rule['js_regex_func'](rwinfo.url_rewriter)
|
extra_rules = rule['js_regex_func'](rwinfo.url_rewriter)
|
||||||
|
|
||||||
|
# if js-proxy and no rules, default to none
|
||||||
|
# js rewriting in proxy only if extra rules apply
|
||||||
|
if rw_type == 'js-proxy' and not extra_rules:
|
||||||
|
return None
|
||||||
|
|
||||||
return rw_class(rwinfo.url_rewriter, extra_rules)
|
return rw_class(rwinfo.url_rewriter, extra_rules)
|
||||||
|
|
||||||
elif rw_type != 'html':
|
elif rw_type != 'html':
|
||||||
@ -94,6 +100,10 @@ class BaseContentRewriter(object):
|
|||||||
js_rewriter = self.create_rewriter('js', rule, rwinfo, cdx)
|
js_rewriter = self.create_rewriter('js', rule, rwinfo, cdx)
|
||||||
css_rewriter = self.create_rewriter('css', rule, rwinfo, cdx)
|
css_rewriter = self.create_rewriter('css', rule, rwinfo, cdx)
|
||||||
|
|
||||||
|
# if no js rewriter, then do banner insert only
|
||||||
|
if not js_rewriter:
|
||||||
|
rw_class = self.all_rewriters.get('html-banner-only')
|
||||||
|
|
||||||
rw = rw_class(rwinfo.url_rewriter,
|
rw = rw_class(rwinfo.url_rewriter,
|
||||||
js_rewriter=js_rewriter,
|
js_rewriter=js_rewriter,
|
||||||
css_rewriter=css_rewriter,
|
css_rewriter=css_rewriter,
|
||||||
@ -140,33 +150,28 @@ class BaseContentRewriter(object):
|
|||||||
return charset
|
return charset
|
||||||
|
|
||||||
def rewrite_headers(self, rwinfo):
|
def rewrite_headers(self, rwinfo):
|
||||||
if rwinfo.is_url_rw():
|
header_rw_class = self.all_rewriters.get('header')
|
||||||
header_rw_name = 'header'
|
return header_rw_class(rwinfo)()
|
||||||
else:
|
|
||||||
header_rw_name = 'header-proxy'
|
|
||||||
|
|
||||||
header_rw_class = self.all_rewriters.get(header_rw_name)
|
|
||||||
rwinfo.rw_http_headers = header_rw_class(rwinfo)()
|
|
||||||
|
|
||||||
def __call__(self, record, url_rewriter, cookie_rewriter,
|
def __call__(self, record, url_rewriter, cookie_rewriter,
|
||||||
head_insert_func=None,
|
head_insert_func=None,
|
||||||
cdx=None):
|
cdx=None):
|
||||||
|
|
||||||
rwinfo = RewriteInfo(record, self.get_rewrite_types(), url_rewriter, cookie_rewriter)
|
rwinfo = RewriteInfo(record, self.get_rewrite_types(), url_rewriter, cookie_rewriter)
|
||||||
|
|
||||||
self.rewrite_headers(rwinfo)
|
|
||||||
|
|
||||||
content_rewriter = None
|
content_rewriter = None
|
||||||
if rwinfo.is_content_rw():
|
|
||||||
|
if rwinfo.should_rw_content():
|
||||||
rule = self.get_rule(cdx)
|
rule = self.get_rule(cdx)
|
||||||
content_rewriter = self.create_rewriter(rwinfo.text_type, rule, rwinfo, cdx, head_insert_func)
|
content_rewriter = self.create_rewriter(rwinfo.text_type, rule, rwinfo, cdx, head_insert_func)
|
||||||
|
|
||||||
if content_rewriter:
|
if content_rewriter:
|
||||||
gen = content_rewriter(rwinfo)
|
gen = content_rewriter(rwinfo)
|
||||||
else:
|
else:
|
||||||
gen = StreamIter(rwinfo.content_stream)
|
gen = StreamIter(rwinfo.record.raw_stream)
|
||||||
|
|
||||||
return rwinfo.rw_http_headers, gen, (content_rewriter != None)
|
rw_http_headers = self.rewrite_headers(rwinfo)
|
||||||
|
|
||||||
|
return rw_http_headers, gen, (content_rewriter != None)
|
||||||
|
|
||||||
def init_js_regexs(self, regexs):
|
def init_js_regexs(self, regexs):
|
||||||
raise NotImplemented()
|
raise NotImplemented()
|
||||||
@ -175,10 +180,34 @@ class BaseContentRewriter(object):
|
|||||||
raise NotImplemented()
|
raise NotImplemented()
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
class BufferedRewriter(object):
|
||||||
|
def __init__(self, url_rewriter=None):
|
||||||
|
self.url_rewriter = url_rewriter
|
||||||
|
|
||||||
|
def __call__(self, rwinfo):
|
||||||
|
stream_buffer = tempfile.SpooledTemporaryFile(BUFF_SIZE * 4)
|
||||||
|
|
||||||
|
with closing(rwinfo.content_stream) as fh:
|
||||||
|
while True:
|
||||||
|
buff = fh.read()
|
||||||
|
if not buff:
|
||||||
|
break
|
||||||
|
|
||||||
|
stream_buffer.write(buff)
|
||||||
|
|
||||||
|
stream_buffer.seek(0)
|
||||||
|
return StreamIter(self.rewrite_stream(stream_buffer))
|
||||||
|
|
||||||
|
def rewrite_stream(self, stream):
|
||||||
|
raise NotImplemented('implement in subclass')
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
class StreamingRewriter(object):
|
class StreamingRewriter(object):
|
||||||
def __init__(self):
|
def __init__(self, url_rewriter, align_to_line=True):
|
||||||
self.align_to_line = True
|
self.url_rewriter = url_rewriter
|
||||||
|
self.align_to_line = align_to_line
|
||||||
|
|
||||||
def __call__(self, rwinfo):
|
def __call__(self, rwinfo):
|
||||||
gen = self.rewrite_text_stream_to_gen(rwinfo.content_stream,
|
gen = self.rewrite_text_stream_to_gen(rwinfo.content_stream,
|
||||||
@ -233,8 +262,8 @@ class RewriteInfo(object):
|
|||||||
def __init__(self, record, rewrite_types, url_rewriter, cookie_rewriter):
|
def __init__(self, record, rewrite_types, url_rewriter, cookie_rewriter):
|
||||||
self.record = record
|
self.record = record
|
||||||
|
|
||||||
self.rw_http_headers = record.http_headers
|
self._content_stream = None
|
||||||
self.content_stream = record.content_stream()
|
self.is_content_rw = False
|
||||||
|
|
||||||
self.rewrite_types = rewrite_types
|
self.rewrite_types = rewrite_types
|
||||||
|
|
||||||
@ -287,15 +316,20 @@ class RewriteInfo(object):
|
|||||||
if self.TAG_REGEX.match(buff):
|
if self.TAG_REGEX.match(buff):
|
||||||
self.text_type = 'html'
|
self.text_type = 'html'
|
||||||
|
|
||||||
|
@property
|
||||||
|
def content_stream(self):
|
||||||
|
if not self._content_stream:
|
||||||
|
self._content_stream = self.record.content_stream()
|
||||||
|
self.is_content_rw = True
|
||||||
|
|
||||||
|
return self._content_stream
|
||||||
|
|
||||||
def read_and_keep(self, size):
|
def read_and_keep(self, size):
|
||||||
buff = self.content_stream.read(size)
|
buff = self.content_stream.read(size)
|
||||||
self.content_stream = BufferedReader(self.content_stream, starting_data=buff)
|
self._content_stream = BufferedReader(self._content_stream, starting_data=buff)
|
||||||
return buff
|
return buff
|
||||||
|
|
||||||
def is_content_rw(self):
|
def should_rw_content(self):
|
||||||
if not self.url_rewriter.prefix:
|
|
||||||
return False
|
|
||||||
|
|
||||||
if self.url_rewriter.wburl.mod == 'id_':
|
if self.url_rewriter.wburl.mod == 'id_':
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@ -310,15 +344,15 @@ class RewriteInfo(object):
|
|||||||
elif not self.text_type:
|
elif not self.text_type:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
elif self.text_type == 'css' or self.text_type == 'xml':
|
||||||
|
if self.url_rewriter.wburl.mod == 'bn_':
|
||||||
|
return False
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def is_url_rw(self):
|
def is_url_rw(self):
|
||||||
if not self.url_rewriter:
|
if self.url_rewriter.wburl.mod in ('id_', 'bn_'):
|
||||||
return False
|
|
||||||
|
|
||||||
if self.url_rewriter.wburl.mod == 'id_':
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
34
pywb/rewrite/html_insert_rewriter.py
Normal file
34
pywb/rewrite/html_insert_rewriter.py
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
import re
|
||||||
|
from pywb.rewrite.content_rewriter import StreamingRewriter
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
class HTMLInsertOnlyRewriter(StreamingRewriter):
|
||||||
|
""" Insert custom string into HTML <head> tag
|
||||||
|
no other rewriting performed
|
||||||
|
"""
|
||||||
|
HEAD_REGEX = re.compile('<\s*head\\b[^>]*[>]+', re.I)
|
||||||
|
|
||||||
|
def __init__(self, url_rewriter, **kwargs):
|
||||||
|
super(HTMLInsertOnlyRewriter, self).__init__(url_rewriter, False)
|
||||||
|
self.head_insert = kwargs['head_insert']
|
||||||
|
|
||||||
|
self.done = False
|
||||||
|
|
||||||
|
def rewrite(self, string):
|
||||||
|
if self.done:
|
||||||
|
return string
|
||||||
|
|
||||||
|
# only try to find <head> in first buffer
|
||||||
|
self.done = True
|
||||||
|
m = self.HEAD_REGEX.search(string)
|
||||||
|
if m:
|
||||||
|
inx = m.end()
|
||||||
|
buff = string[:inx]
|
||||||
|
buff += self.head_insert
|
||||||
|
buff += string[inx:]
|
||||||
|
return buff
|
||||||
|
else:
|
||||||
|
return string
|
||||||
|
|
||||||
|
|
@ -19,7 +19,7 @@ from six import text_type
|
|||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class HTMLRewriterMixin(object):
|
class HTMLRewriterMixin(StreamingRewriter):
|
||||||
"""
|
"""
|
||||||
HTML-Parsing Rewriter for custom rewriting, also delegates
|
HTML-Parsing Rewriter for custom rewriting, also delegates
|
||||||
to rewriters for script and css
|
to rewriters for script and css
|
||||||
@ -98,7 +98,7 @@ class HTMLRewriterMixin(object):
|
|||||||
defmod='',
|
defmod='',
|
||||||
parse_comments=False):
|
parse_comments=False):
|
||||||
|
|
||||||
self.url_rewriter = url_rewriter
|
super(HTMLRewriterMixin, self).__init__(url_rewriter, False)
|
||||||
self._wb_parse_context = None
|
self._wb_parse_context = None
|
||||||
|
|
||||||
if js_rewriter:
|
if js_rewriter:
|
||||||
@ -443,7 +443,7 @@ class HTMLRewriterMixin(object):
|
|||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class HTMLRewriter(HTMLRewriterMixin, StreamingRewriter, HTMLParser):
|
class HTMLRewriter(HTMLRewriterMixin, HTMLParser):
|
||||||
PARSETAG = re.compile('[<]')
|
PARSETAG = re.compile('[<]')
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
@ -453,8 +453,6 @@ class HTMLRewriter(HTMLRewriterMixin, StreamingRewriter, HTMLParser):
|
|||||||
HTMLParser.__init__(self)
|
HTMLParser.__init__(self)
|
||||||
|
|
||||||
super(HTMLRewriter, self).__init__(*args, **kwargs)
|
super(HTMLRewriter, self).__init__(*args, **kwargs)
|
||||||
# for StreamingRewriter
|
|
||||||
self.align_to_line = False
|
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
HTMLParser.reset(self)
|
HTMLParser.reset(self)
|
||||||
|
@ -7,10 +7,6 @@ class JSONPRewriter(StreamingRewriter):
|
|||||||
JSONP = re.compile(r'^(\w+)\(\{')
|
JSONP = re.compile(r'^(\w+)\(\{')
|
||||||
CALLBACK = re.compile(r'[?].*callback=([^&]+)')
|
CALLBACK = re.compile(r'[?].*callback=([^&]+)')
|
||||||
|
|
||||||
def __init__(self, urlrewriter):
|
|
||||||
super(JSONPRewriter, self).__init__()
|
|
||||||
self.urlrewriter = urlrewriter
|
|
||||||
|
|
||||||
def rewrite(self, string):
|
def rewrite(self, string):
|
||||||
# see if json is jsonp, starts with callback func
|
# see if json is jsonp, starts with callback func
|
||||||
m_json = self.JSONP.search(string)
|
m_json = self.JSONP.search(string)
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from pywb.rewrite.url_rewriter import UrlRewriter
|
|
||||||
from pywb.rewrite.content_rewriter import StreamingRewriter
|
from pywb.rewrite.content_rewriter import StreamingRewriter
|
||||||
|
|
||||||
|
|
||||||
@ -44,7 +43,7 @@ class RegexRewriter(StreamingRewriter):
|
|||||||
#DEFAULT_OP = add_prefix
|
#DEFAULT_OP = add_prefix
|
||||||
|
|
||||||
def __init__(self, rewriter, rules):
|
def __init__(self, rewriter, rules):
|
||||||
super(RegexRewriter, self).__init__()
|
super(RegexRewriter, self).__init__(rewriter)
|
||||||
#rules = self.create_rules(http_prefix)
|
#rules = self.create_rules(http_prefix)
|
||||||
|
|
||||||
# Build regexstr, concatenating regex list
|
# Build regexstr, concatenating regex list
|
||||||
|
@ -1,11 +1,13 @@
|
|||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from six.moves import zip
|
from six.moves import zip
|
||||||
|
|
||||||
|
from pywb.rewrite.content_rewriter import BufferedRewriter
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# Experimental: not fully tested
|
# Experimental: not fully tested
|
||||||
class RewriteAMF(object): #pragma: no cover
|
class RewriteAMF(BufferedRewriter): #pragma: no cover
|
||||||
def __call__(self, rwinfo):
|
def rewrite_stream(self, stream):
|
||||||
try:
|
try:
|
||||||
from pyamf import remoting
|
from pyamf import remoting
|
||||||
|
|
||||||
@ -20,7 +22,7 @@ class RewriteAMF(object): #pragma: no cover
|
|||||||
res = remoting.decode(iobuff)
|
res = remoting.decode(iobuff)
|
||||||
|
|
||||||
# TODO: revisit this
|
# TODO: revisit this
|
||||||
inputdata = rwinfo.url_rewriter.rewrite_opts.get('pywb.inputdata')
|
inputdata = url_rewriter.rewrite_opts.get('pywb.inputdata')
|
||||||
|
|
||||||
if inputdata:
|
if inputdata:
|
||||||
new_list = []
|
new_list = []
|
||||||
@ -42,3 +44,5 @@ class RewriteAMF(object): #pragma: no cover
|
|||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
print(e)
|
print(e)
|
||||||
return stream
|
return stream
|
||||||
|
|
||||||
|
|
||||||
|
@ -4,24 +4,14 @@ import json
|
|||||||
|
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
|
|
||||||
from pywb.webagg.utils import StreamIter
|
from pywb.rewrite.content_rewriter import BufferedRewriter
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
class RewriteDASH(object):
|
class RewriteDASH(BufferedRewriter):
|
||||||
def __call__(self, rwinfo):
|
def rewrite_stream(self, stream):
|
||||||
buff_io = BytesIO()
|
res_buff, best_ids = self.rewrite_dash(stream)
|
||||||
with closing(rwinfo.content_stream) as fh:
|
return res_buff
|
||||||
while True:
|
|
||||||
buff = fh.read()
|
|
||||||
if not buff:
|
|
||||||
break
|
|
||||||
|
|
||||||
buff_io.write(buff)
|
|
||||||
|
|
||||||
buff_io.seek(0)
|
|
||||||
res_buff, best_ids = self.rewrite_dash(buff_io)
|
|
||||||
return StreamIter(res_buff)
|
|
||||||
|
|
||||||
def rewrite_dash(self, stream):
|
def rewrite_dash(self, stream):
|
||||||
ET.register_namespace('', 'urn:mpeg:dash:schema:mpd:2011')
|
ET.register_namespace('', 'urn:mpeg:dash:schema:mpd:2011')
|
||||||
@ -70,7 +60,7 @@ def rewrite_fb_dash(string):
|
|||||||
buff = string.encode('utf-8').decode('unicode-escape')
|
buff = string.encode('utf-8').decode('unicode-escape')
|
||||||
buff = buff.encode('utf-8')
|
buff = buff.encode('utf-8')
|
||||||
io = BytesIO(buff)
|
io = BytesIO(buff)
|
||||||
io, best_ids = RewriteDASHMixin().rewrite_dash(io)
|
io, best_ids = RewriteDASH().rewrite_dash(io)
|
||||||
string = json.dumps(io.read().decode('utf-8'))
|
string = json.dumps(io.read().decode('utf-8'))
|
||||||
string = string[1:-1].replace('<', r'\x3C')
|
string = string[1:-1].replace('<', r'\x3C')
|
||||||
|
|
||||||
|
@ -1,16 +1,14 @@
|
|||||||
import re
|
import re
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from pywb.webagg.utils import StreamIter
|
|
||||||
|
from pywb.rewrite.content_rewriter import BufferedRewriter
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
class RewriteHLS(object):
|
class RewriteHLS(BufferedRewriter):
|
||||||
EXT_INF = re.compile('#EXT-X-STREAM-INF:(?:.*[,])?BANDWIDTH=([\d]+)')
|
EXT_INF = re.compile('#EXT-X-STREAM-INF:(?:.*[,])?BANDWIDTH=([\d]+)')
|
||||||
|
|
||||||
def __call__(self, rwinfo):
|
def rewrite_stream(self, stream):
|
||||||
return StreamIter(self.rewrite_m3u8(rwinfo.content_stream))
|
|
||||||
|
|
||||||
def rewrite_m3u8(self, stream):
|
|
||||||
buff = stream.read()
|
buff = stream.read()
|
||||||
|
|
||||||
lines = buff.decode('utf-8').split('\n')
|
lines = buff.decode('utf-8').split('\n')
|
||||||
|
@ -17,6 +17,9 @@ class PrefixHeaderRewriter(object):
|
|||||||
'content-location': 'url-rewrite',
|
'content-location': 'url-rewrite',
|
||||||
'content-base': 'url-rewrite',
|
'content-base': 'url-rewrite',
|
||||||
|
|
||||||
|
'transfer-encoding': 'prefix',
|
||||||
|
'connection': 'prefix',
|
||||||
|
|
||||||
'content-encoding': 'keep-if-no-content-rewrite',
|
'content-encoding': 'keep-if-no-content-rewrite',
|
||||||
'content-length': 'content-length',
|
'content-length': 'content-length',
|
||||||
|
|
||||||
@ -24,13 +27,16 @@ class PrefixHeaderRewriter(object):
|
|||||||
'cookie': 'cookie',
|
'cookie': 'cookie',
|
||||||
}
|
}
|
||||||
|
|
||||||
default_rule = 'prefix'
|
|
||||||
|
|
||||||
def __init__(self, rwinfo, header_prefix='X-Archive-Orig-'):
|
def __init__(self, rwinfo, header_prefix='X-Archive-Orig-'):
|
||||||
self.header_prefix = header_prefix
|
self.header_prefix = header_prefix
|
||||||
self.rwinfo = rwinfo
|
self.rwinfo = rwinfo
|
||||||
self.http_headers = rwinfo.record.http_headers
|
self.http_headers = rwinfo.record.http_headers
|
||||||
|
|
||||||
|
if rwinfo.is_url_rw():
|
||||||
|
self.default_rule = 'prefix'
|
||||||
|
else:
|
||||||
|
self.default_rule = 'keep'
|
||||||
|
|
||||||
def __call__(self):
|
def __call__(self):
|
||||||
new_headers_list = []
|
new_headers_list = []
|
||||||
for name, value in self.http_headers.headers:
|
for name, value in self.http_headers.headers:
|
||||||
@ -54,14 +60,14 @@ class PrefixHeaderRewriter(object):
|
|||||||
return (name, self.rwinfo.url_rewriter.rewrite(value))
|
return (name, self.rwinfo.url_rewriter.rewrite(value))
|
||||||
|
|
||||||
elif rule == 'keep-if-no-content-rewrite':
|
elif rule == 'keep-if-no-content-rewrite':
|
||||||
if not self.rwinfo.is_content_rw():
|
if not self.rwinfo.is_content_rw:
|
||||||
return (name, value)
|
return (name, value)
|
||||||
|
|
||||||
elif rule == 'content-length':
|
elif rule == 'content-length':
|
||||||
if value == '0':
|
if value == '0':
|
||||||
return (name, value)
|
return (name, value)
|
||||||
|
|
||||||
if not self.rwinfo.is_content_rw():
|
if not self.rwinfo.is_content_rw:
|
||||||
try:
|
try:
|
||||||
if int(value) >= 0:
|
if int(value) >= 0:
|
||||||
return (name, value)
|
return (name, value)
|
||||||
@ -92,11 +98,3 @@ class PrefixHeaderRewriter(object):
|
|||||||
new_headers.append(('Expires', datetime_to_http_date(dt)))
|
new_headers.append(('Expires', datetime_to_http_date(dt)))
|
||||||
|
|
||||||
|
|
||||||
#=============================================================================
|
|
||||||
class ProxyHeaderRewriter(PrefixHeaderRewriter):
|
|
||||||
header_rules = {
|
|
||||||
'transfer-encoding': 'prefix',
|
|
||||||
'connection': 'prefix',
|
|
||||||
}
|
|
||||||
|
|
||||||
default_rule = 'keep'
|
|
||||||
|
@ -1,12 +1,13 @@
|
|||||||
from pywb.rewrite.content_rewriter import BaseContentRewriter
|
from pywb.rewrite.content_rewriter import BaseContentRewriter
|
||||||
|
|
||||||
from pywb.rewrite.html_rewriter import HTMLRewriter
|
from pywb.rewrite.html_rewriter import HTMLRewriter
|
||||||
|
from pywb.rewrite.html_insert_rewriter import HTMLInsertOnlyRewriter
|
||||||
|
|
||||||
from pywb.rewrite.regex_rewriters import RegexRewriter, CSSRewriter, XMLRewriter
|
from pywb.rewrite.regex_rewriters import RegexRewriter, CSSRewriter, XMLRewriter
|
||||||
from pywb.rewrite.regex_rewriters import JSLinkAndLocationRewriter, JSLinkOnlyRewriter
|
from pywb.rewrite.regex_rewriters import JSLinkAndLocationRewriter, JSLinkOnlyRewriter
|
||||||
from pywb.rewrite.regex_rewriters import JSLocationOnlyRewriter, JSNoneRewriter
|
from pywb.rewrite.regex_rewriters import JSLocationOnlyRewriter, JSNoneRewriter
|
||||||
|
|
||||||
from pywb.urlrewrite.header_rewriter import PrefixHeaderRewriter, ProxyHeaderRewriter
|
from pywb.urlrewrite.header_rewriter import PrefixHeaderRewriter
|
||||||
|
|
||||||
from pywb.rewrite.jsonp_rewriter import JSONPRewriter
|
from pywb.rewrite.jsonp_rewriter import JSONPRewriter
|
||||||
|
|
||||||
@ -19,9 +20,9 @@ from pywb.rewrite.rewrite_amf import RewriteAMF
|
|||||||
class DefaultRewriter(BaseContentRewriter):
|
class DefaultRewriter(BaseContentRewriter):
|
||||||
all_rewriters = {
|
all_rewriters = {
|
||||||
'header': PrefixHeaderRewriter,
|
'header': PrefixHeaderRewriter,
|
||||||
'header-proxy': ProxyHeaderRewriter,
|
|
||||||
|
|
||||||
'html': HTMLRewriter,
|
'html': HTMLRewriter,
|
||||||
|
'html-banner-only': HTMLInsertOnlyRewriter,
|
||||||
|
|
||||||
'css': CSSRewriter,
|
'css': CSSRewriter,
|
||||||
|
|
||||||
|
@ -152,7 +152,7 @@ class TestWbIntegration(BaseConfigTest):
|
|||||||
assert len(lines) == 17
|
assert len(lines) == 17
|
||||||
assert lines[0].startswith('org,iana)/_css/2013.1/print.css 20140127171239')
|
assert lines[0].startswith('org,iana)/_css/2013.1/print.css 20140127171239')
|
||||||
|
|
||||||
def _test_replay_banner_only(self):
|
def test_replay_banner_only(self):
|
||||||
resp = self.testapp.get('/pywb/20140126201054bn_/http://www.iana.org/domains/reserved')
|
resp = self.testapp.get('/pywb/20140126201054bn_/http://www.iana.org/domains/reserved')
|
||||||
|
|
||||||
# wb.js header insertion
|
# wb.js header insertion
|
||||||
|
Loading…
x
Reference in New Issue
Block a user