mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
rewrite dash: support for using custom rewriting function (for FB)
rewrite_fb_dash() added for rewriting dash xml, embedded in js, embedded in html todo: refactor to make more general support for custom rewriting functions regex_rewriter: add ':' to exclude from rewrite again
This commit is contained in:
parent
a20480b9ab
commit
15ad56c024
@ -3,6 +3,15 @@ import re
|
|||||||
from pywb.rewrite.url_rewriter import UrlRewriter
|
from pywb.rewrite.url_rewriter import UrlRewriter
|
||||||
|
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
def load_function(string):
|
||||||
|
import importlib
|
||||||
|
|
||||||
|
string = string.split(':', 1)
|
||||||
|
mod = importlib.import_module(string[0])
|
||||||
|
return getattr(mod, string[1])
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class RegexRewriter(object):
|
class RegexRewriter(object):
|
||||||
#@staticmethod
|
#@staticmethod
|
||||||
@ -93,6 +102,8 @@ class RegexRewriter(object):
|
|||||||
match = obj.get('match')
|
match = obj.get('match')
|
||||||
if 'rewrite' in obj:
|
if 'rewrite' in obj:
|
||||||
replace = RegexRewriter.archival_rewrite(rewriter)
|
replace = RegexRewriter.archival_rewrite(rewriter)
|
||||||
|
elif 'function' in obj:
|
||||||
|
replace = load_function(obj['function'])
|
||||||
else:
|
else:
|
||||||
replace = RegexRewriter.format(obj.get('replace', '{0}'))
|
replace = RegexRewriter.format(obj.get('replace', '{0}'))
|
||||||
group = obj.get('group', 0)
|
group = obj.get('group', 0)
|
||||||
@ -132,7 +143,7 @@ class JSLocationRewriterMixin(object):
|
|||||||
def __init__(self, rewriter, rules=[], prefix='WB_wombat_'):
|
def __init__(self, rewriter, rules=[], prefix='WB_wombat_'):
|
||||||
rules = rules + [
|
rules = rules + [
|
||||||
# (r'(?<![/$])\blocation\b(?!\":)', RegexRewriter.add_prefix(prefix), 0),
|
# (r'(?<![/$])\blocation\b(?!\":)', RegexRewriter.add_prefix(prefix), 0),
|
||||||
(r'(?<![$\'"])\b(?:location|top)\b(?![$\'"])', RegexRewriter.add_prefix(prefix), 0),
|
(r'(?<![$\'"])\b(?:location|top)\b(?![$\'":])', RegexRewriter.add_prefix(prefix), 0),
|
||||||
|
|
||||||
(r'(?<=\.)postMessage\b\(', RegexRewriter.add_prefix('__WB_pmw(window).'), 0),
|
(r'(?<=\.)postMessage\b\(', RegexRewriter.add_prefix('__WB_pmw(window).'), 0),
|
||||||
|
|
||||||
|
@ -105,3 +105,23 @@ class RewriteDASHMixin(object):
|
|||||||
buff_io.seek(0)
|
buff_io.seek(0)
|
||||||
return buff_io
|
return buff_io
|
||||||
|
|
||||||
|
|
||||||
|
def rewrite_fb_dash(string):
|
||||||
|
DASH_SPLIT = r'\n",dash_prefetched_representation_ids:'
|
||||||
|
inx = string.find(DASH_SPLIT)
|
||||||
|
if inx < 0:
|
||||||
|
return string
|
||||||
|
|
||||||
|
string = string[:inx]
|
||||||
|
|
||||||
|
buff = string.encode('utf-8').decode('unicode-escape')
|
||||||
|
buff = buff.encode('utf-8')
|
||||||
|
io = BytesIO(buff)
|
||||||
|
io, best_ids = RewriteDASHMixin.rewrite_dash(io)
|
||||||
|
string = json.dumps(io.read().decode('utf-8'))
|
||||||
|
string = string[1:-1].replace('<', r'\x3C')
|
||||||
|
|
||||||
|
string += DASH_SPLIT
|
||||||
|
string += json.dumps(best_ids)
|
||||||
|
return string
|
||||||
|
|
||||||
|
@ -58,6 +58,10 @@ rules:
|
|||||||
- match: 'Bootloader\.configurePage.*?;'
|
- match: 'Bootloader\.configurePage.*?;'
|
||||||
replace: '/* {0} */'
|
replace: '/* {0} */'
|
||||||
|
|
||||||
|
- match: 'dash_manifest:"(.*",dash_prefetched_representation_ids:.*?])'
|
||||||
|
group: 1
|
||||||
|
function: 'pywb.rewrite.rewrite_dash:rewrite_fb_dash'
|
||||||
|
|
||||||
parse_comments: true
|
parse_comments: true
|
||||||
|
|
||||||
- url_prefix: 'com,facebook'
|
- url_prefix: 'com,facebook'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user