mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
rewrite: module rewriting: fix module detection by peeking the buffer in streaming rewrite
bump wombat to latest
This commit is contained in:
parent
aad10d45fb
commit
f48a92af3e
@ -134,6 +134,7 @@ class HTMLRewriterMixin(StreamingRewriter):
|
||||
super(HTMLRewriterMixin, self).__init__(url_rewriter, False)
|
||||
self.charset = charset
|
||||
self._wb_parse_context = None
|
||||
self._wb_parse_module = False
|
||||
|
||||
if js_rewriter:
|
||||
self.js_rewriter = js_rewriter
|
||||
@ -308,7 +309,8 @@ class HTMLRewriterMixin(StreamingRewriter):
|
||||
return ''
|
||||
|
||||
content = self.js_rewriter.rewrite_complete(script_content,
|
||||
inline_attr=inline_attr)
|
||||
inline_attr=inline_attr,
|
||||
is_module=self._wb_parse_module)
|
||||
if inline_attr:
|
||||
content = self.ADD_WINDOW.sub('window.\\1', content)
|
||||
|
||||
@ -433,7 +435,7 @@ class HTMLRewriterMixin(StreamingRewriter):
|
||||
# URL not skipped, likely src='js/....', forcing abs to make sure, cause PHP MIME(JS) === HTML
|
||||
attr_value = self._rewrite_url(attr_value, rw_mod, True)
|
||||
self._write_attr('__wb_orig_src', ov, empty_attr=None)
|
||||
|
||||
|
||||
elif attr_name == 'target':
|
||||
target = attr_value
|
||||
if target in ('_blank', '_parent', '_top'):
|
||||
@ -484,24 +486,30 @@ class HTMLRewriterMixin(StreamingRewriter):
|
||||
self._wb_parse_context = 'style'
|
||||
|
||||
elif tag == 'script':
|
||||
if self._allow_js_type(tag_attrs):
|
||||
result = self._allow_js_type(tag_attrs)
|
||||
if result:
|
||||
self._wb_parse_context = 'script'
|
||||
self._wb_parse_module = (result == 'script-module')
|
||||
|
||||
|
||||
def _allow_js_type(self, tag_attrs):
|
||||
type_value = self.get_attr(tag_attrs, 'type')
|
||||
|
||||
if not type_value:
|
||||
return True
|
||||
return 'script'
|
||||
|
||||
type_value = type_value.lower()
|
||||
|
||||
if 'javascript' in type_value:
|
||||
return True
|
||||
return 'script'
|
||||
|
||||
if 'ecmascript' in type_value:
|
||||
return True
|
||||
return 'script'
|
||||
|
||||
return False
|
||||
if type_value == 'module':
|
||||
return 'script-module'
|
||||
|
||||
return None
|
||||
|
||||
def _rewrite_head(self, start_end):
|
||||
# special case: head tag
|
||||
|
@ -1,6 +1,7 @@
|
||||
import re
|
||||
from pywb.rewrite.content_rewriter import StreamingRewriter
|
||||
from pywb.utils.loaders import load_py_name
|
||||
from pywb.utils.io import BUFF_SIZE
|
||||
from six.moves.urllib.parse import unquote
|
||||
|
||||
|
||||
@ -283,32 +284,53 @@ class JSWombatProxyRewriter(RegexRewriter):
|
||||
self.last_buff = self.rules_factory.last_buff
|
||||
self.local_objs = self.rules_factory.local_objs
|
||||
|
||||
self._is_module_check = None
|
||||
|
||||
def set_as_module(self):
|
||||
self.first_buff = "\nimport {{ {0} }} from '/static/__wb_module_decl.js';\n".format(
|
||||
", ".join(obj for obj in self.local_objs)
|
||||
)
|
||||
self.last_buff = ""
|
||||
self._is_module_check = True
|
||||
|
||||
def __call__(self, rwinfo):
|
||||
if self._is_module_check == None:
|
||||
buf = rwinfo.read_and_keep(BUFF_SIZE * 4)
|
||||
|
||||
if self.is_module(buf):
|
||||
self.set_as_module()
|
||||
else:
|
||||
self._is_module_check = False
|
||||
|
||||
return super(JSWombatProxyRewriter, self).__call__(rwinfo)
|
||||
|
||||
@staticmethod
|
||||
def is_module(string):
|
||||
"""Return boolean indicating whether import or export statement is found."""
|
||||
IMPORT_REGEX = r"^\s*?import\s*?[{\"']"
|
||||
EXPORT_REGEX = r"^\s*?export\s*?({([\s\w,$\n]+?)}[\s;]*|default|class)\s+"
|
||||
IMPORT_REGEX = re.compile(br"^\s*?import\s*?[{\"']")
|
||||
EXPORT_REGEX = re.compile(br"^\s*?export\s*?({([\s\w,$\n]+?)}[\s;]*|default|class)\s+", re.M)
|
||||
|
||||
if not string:
|
||||
return False
|
||||
|
||||
if "import" in string and re.search(IMPORT_REGEX, string):
|
||||
if isinstance(string, str):
|
||||
string = string.encode("utf-8")
|
||||
|
||||
if b"import" in string and re.search(IMPORT_REGEX, string):
|
||||
return True
|
||||
|
||||
if "export" in string and re.search(EXPORT_REGEX, string):
|
||||
if b"export" in string and re.search(EXPORT_REGEX, string):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def rewrite_complete(self, string, **kwargs):
|
||||
if not kwargs.get('inline_attr'):
|
||||
if self.is_module(string):
|
||||
first_buff = "\nimport {} from '/static/__wb_module_decl.js';\n".format(
|
||||
", ".join(obj for obj in self.local_objs)
|
||||
)
|
||||
super(JSWombatProxyRewriter, self).__init__(self.rewriter, extra_rules=self.extra_rules, first_buff=first_buff)
|
||||
return super(JSWombatProxyRewriter, self).rewrite_complete(string)
|
||||
if kwargs.get('is_module'):
|
||||
self.set_as_module()
|
||||
|
||||
return super(JSWombatProxyRewriter, self).rewrite_complete(string)
|
||||
|
||||
# check if any of the wrapped objects are used in the script
|
||||
# if not, don't rewrite
|
||||
if not any(obj in string for obj in self.local_objs):
|
||||
|
Loading…
x
Reference in New Issue
Block a user