1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

rewrite dash: support for using custom rewriting function (for FB)

rewrite_fb_dash() added for rewriting dash xml, embedded in js, embedded in html
todo: refactor to make more general support for custom rewriting functions
regex_rewriter: add ':' to exclude from rewrite again
This commit is contained in:
Ilya Kreymer 2017-03-21 11:18:53 -07:00
parent a20480b9ab
commit 15ad56c024
3 changed files with 36 additions and 1 deletions

View File

@ -3,6 +3,15 @@ import re
from pywb.rewrite.url_rewriter import UrlRewriter
#=================================================================
def load_function(string):
import importlib
string = string.split(':', 1)
mod = importlib.import_module(string[0])
return getattr(mod, string[1])
#=================================================================
class RegexRewriter(object):
#@staticmethod
@ -93,6 +102,8 @@ class RegexRewriter(object):
match = obj.get('match')
if 'rewrite' in obj:
replace = RegexRewriter.archival_rewrite(rewriter)
elif 'function' in obj:
replace = load_function(obj['function'])
else:
replace = RegexRewriter.format(obj.get('replace', '{0}'))
group = obj.get('group', 0)
@ -132,7 +143,7 @@ class JSLocationRewriterMixin(object):
def __init__(self, rewriter, rules=[], prefix='WB_wombat_'):
rules = rules + [
# (r'(?<![/$])\blocation\b(?!\":)', RegexRewriter.add_prefix(prefix), 0),
(r'(?<![$\'"])\b(?:location|top)\b(?![$\'"])', RegexRewriter.add_prefix(prefix), 0),
(r'(?<![$\'"])\b(?:location|top)\b(?![$\'":])', RegexRewriter.add_prefix(prefix), 0),
(r'(?<=\.)postMessage\b\(', RegexRewriter.add_prefix('__WB_pmw(window).'), 0),

View File

@ -105,3 +105,23 @@ class RewriteDASHMixin(object):
buff_io.seek(0)
return buff_io
def rewrite_fb_dash(string):
DASH_SPLIT = r'\n",dash_prefetched_representation_ids:'
inx = string.find(DASH_SPLIT)
if inx < 0:
return string
string = string[:inx]
buff = string.encode('utf-8').decode('unicode-escape')
buff = buff.encode('utf-8')
io = BytesIO(buff)
io, best_ids = RewriteDASHMixin.rewrite_dash(io)
string = json.dumps(io.read().decode('utf-8'))
string = string[1:-1].replace('<', r'\x3C')
string += DASH_SPLIT
string += json.dumps(best_ids)
return string

View File

@ -58,6 +58,10 @@ rules:
- match: 'Bootloader\.configurePage.*?;'
replace: '/* {0} */'
- match: 'dash_manifest:"(.*",dash_prefetched_representation_ids:.*?])'
group: 1
function: 'pywb.rewrite.rewrite_dash:rewrite_fb_dash'
parse_comments: true
- url_prefix: 'com,facebook'