diff --git a/pywb/rewrite/html_rewriter.py b/pywb/rewrite/html_rewriter.py
index f10d861a..aa3af667 100644
--- a/pywb/rewrite/html_rewriter.py
+++ b/pywb/rewrite/html_rewriter.py
@@ -134,6 +134,7 @@ class HTMLRewriterMixin(StreamingRewriter):
super(HTMLRewriterMixin, self).__init__(url_rewriter, False)
self.charset = charset
self._wb_parse_context = None
+ self._wb_parse_module = False
if js_rewriter:
self.js_rewriter = js_rewriter
@@ -308,7 +309,8 @@ class HTMLRewriterMixin(StreamingRewriter):
return ''
content = self.js_rewriter.rewrite_complete(script_content,
- inline_attr=inline_attr)
+ inline_attr=inline_attr,
+ is_module=self._wb_parse_module)
if inline_attr:
content = self.ADD_WINDOW.sub('window.\\1', content)
@@ -433,7 +435,7 @@ class HTMLRewriterMixin(StreamingRewriter):
# URL not skipped, likely src='js/....', forcing abs to make sure, cause PHP MIME(JS) === HTML
attr_value = self._rewrite_url(attr_value, rw_mod, True)
self._write_attr('__wb_orig_src', ov, empty_attr=None)
-
+
elif attr_name == 'target':
target = attr_value
if target in ('_blank', '_parent', '_top'):
@@ -484,24 +486,30 @@ class HTMLRewriterMixin(StreamingRewriter):
self._wb_parse_context = 'style'
elif tag == 'script':
- if self._allow_js_type(tag_attrs):
+ result = self._allow_js_type(tag_attrs)
+ if result:
self._wb_parse_context = 'script'
+ self._wb_parse_module = (result == 'script-module')
+
def _allow_js_type(self, tag_attrs):
type_value = self.get_attr(tag_attrs, 'type')
if not type_value:
- return True
+ return 'script'
type_value = type_value.lower()
if 'javascript' in type_value:
- return True
+ return 'script'
if 'ecmascript' in type_value:
- return True
+ return 'script'
- return False
+ if type_value == 'module':
+ return 'script-module'
+
+ return None
def _rewrite_head(self, start_end):
# special case: head tag
diff --git a/pywb/rewrite/regex_rewriters.py b/pywb/rewrite/regex_rewriters.py
index 4e331370..de158584 100644
--- a/pywb/rewrite/regex_rewriters.py
+++ b/pywb/rewrite/regex_rewriters.py
@@ -1,6 +1,7 @@
import re
from pywb.rewrite.content_rewriter import StreamingRewriter
from pywb.utils.loaders import load_py_name
+from pywb.utils.io import BUFF_SIZE
from six.moves.urllib.parse import unquote
@@ -283,32 +284,53 @@ class JSWombatProxyRewriter(RegexRewriter):
self.last_buff = self.rules_factory.last_buff
self.local_objs = self.rules_factory.local_objs
+ self._is_module_check = None
+
+ def set_as_module(self):
+ self.first_buff = "\nimport {{ {0} }} from '/static/__wb_module_decl.js';\n".format(
+ ", ".join(obj for obj in self.local_objs)
+ )
+ self.last_buff = ""
+ self._is_module_check = True
+
+ def __call__(self, rwinfo):
+ if self._is_module_check == None:
+ buf = rwinfo.read_and_keep(BUFF_SIZE * 4)
+
+ if self.is_module(buf):
+ self.set_as_module()
+ else:
+ self._is_module_check = False
+
+ return super(JSWombatProxyRewriter, self).__call__(rwinfo)
+
@staticmethod
def is_module(string):
"""Return boolean indicating whether import or export statement is found."""
- IMPORT_REGEX = r"^\s*?import\s*?[{\"']"
- EXPORT_REGEX = r"^\s*?export\s*?({([\s\w,$\n]+?)}[\s;]*|default|class)\s+"
+ IMPORT_REGEX = re.compile(br"^\s*?import\s*?[{\"']")
+ EXPORT_REGEX = re.compile(br"^\s*?export\s*?({([\s\w,$\n]+?)}[\s;]*|default|class)\s+", re.M)
if not string:
return False
- if "import" in string and re.search(IMPORT_REGEX, string):
+ if isinstance(string, str):
+ string = string.encode("utf-8")
+
+ if b"import" in string and re.search(IMPORT_REGEX, string):
return True
- if "export" in string and re.search(EXPORT_REGEX, string):
+ if b"export" in string and re.search(EXPORT_REGEX, string):
return True
return False
def rewrite_complete(self, string, **kwargs):
if not kwargs.get('inline_attr'):
- if self.is_module(string):
- first_buff = "\nimport {} from '/static/__wb_module_decl.js';\n".format(
- ", ".join(obj for obj in self.local_objs)
- )
- super(JSWombatProxyRewriter, self).__init__(self.rewriter, extra_rules=self.extra_rules, first_buff=first_buff)
- return super(JSWombatProxyRewriter, self).rewrite_complete(string)
+ if kwargs.get('is_module'):
+ self.set_as_module()
+
return super(JSWombatProxyRewriter, self).rewrite_complete(string)
+
# check if any of the wrapped objects are used in the script
# if not, don't rewrite
if not any(obj in string for obj in self.local_objs):