mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
rewrite dash: support for using custom rewriting function (for FB)
rewrite_fb_dash() added for rewriting dash xml, embedded in js, embedded in html todo: refactor to make more general support for custom rewriting functions regex_rewriter: add ':' to exclude from rewrite again
This commit is contained in:
parent
a20480b9ab
commit
15ad56c024
@ -3,6 +3,15 @@ import re
|
||||
from pywb.rewrite.url_rewriter import UrlRewriter
|
||||
|
||||
|
||||
#=================================================================
|
||||
def load_function(string):
|
||||
import importlib
|
||||
|
||||
string = string.split(':', 1)
|
||||
mod = importlib.import_module(string[0])
|
||||
return getattr(mod, string[1])
|
||||
|
||||
|
||||
#=================================================================
|
||||
class RegexRewriter(object):
|
||||
#@staticmethod
|
||||
@ -93,6 +102,8 @@ class RegexRewriter(object):
|
||||
match = obj.get('match')
|
||||
if 'rewrite' in obj:
|
||||
replace = RegexRewriter.archival_rewrite(rewriter)
|
||||
elif 'function' in obj:
|
||||
replace = load_function(obj['function'])
|
||||
else:
|
||||
replace = RegexRewriter.format(obj.get('replace', '{0}'))
|
||||
group = obj.get('group', 0)
|
||||
@ -132,7 +143,7 @@ class JSLocationRewriterMixin(object):
|
||||
def __init__(self, rewriter, rules=[], prefix='WB_wombat_'):
|
||||
rules = rules + [
|
||||
# (r'(?<![/$])\blocation\b(?!\":)', RegexRewriter.add_prefix(prefix), 0),
|
||||
(r'(?<![$\'"])\b(?:location|top)\b(?![$\'"])', RegexRewriter.add_prefix(prefix), 0),
|
||||
(r'(?<![$\'"])\b(?:location|top)\b(?![$\'":])', RegexRewriter.add_prefix(prefix), 0),
|
||||
|
||||
(r'(?<=\.)postMessage\b\(', RegexRewriter.add_prefix('__WB_pmw(window).'), 0),
|
||||
|
||||
|
@ -105,3 +105,23 @@ class RewriteDASHMixin(object):
|
||||
buff_io.seek(0)
|
||||
return buff_io
|
||||
|
||||
|
||||
def rewrite_fb_dash(string):
|
||||
DASH_SPLIT = r'\n",dash_prefetched_representation_ids:'
|
||||
inx = string.find(DASH_SPLIT)
|
||||
if inx < 0:
|
||||
return string
|
||||
|
||||
string = string[:inx]
|
||||
|
||||
buff = string.encode('utf-8').decode('unicode-escape')
|
||||
buff = buff.encode('utf-8')
|
||||
io = BytesIO(buff)
|
||||
io, best_ids = RewriteDASHMixin.rewrite_dash(io)
|
||||
string = json.dumps(io.read().decode('utf-8'))
|
||||
string = string[1:-1].replace('<', r'\x3C')
|
||||
|
||||
string += DASH_SPLIT
|
||||
string += json.dumps(best_ids)
|
||||
return string
|
||||
|
||||
|
@ -58,6 +58,10 @@ rules:
|
||||
- match: 'Bootloader\.configurePage.*?;'
|
||||
replace: '/* {0} */'
|
||||
|
||||
- match: 'dash_manifest:"(.*",dash_prefetched_representation_ids:.*?])'
|
||||
group: 1
|
||||
function: 'pywb.rewrite.rewrite_dash:rewrite_fb_dash'
|
||||
|
||||
parse_comments: true
|
||||
|
||||
- url_prefix: 'com,facebook'
|
||||
|
Loading…
x
Reference in New Issue
Block a user