From dbf52c2579133391e2f163e3f49fb314f1560574 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Thu, 29 Sep 2022 17:00:39 -0400 Subject: [PATCH] WIP: Add JS module handling code --- pywb/rewrite/regex_rewriters.py | 23 ++++++++++++++++++++++- pywb/rewrite/test/test_regex_rewriters.py | 19 +++++++++++++++++++ pywb/static/__wb_module_decl.js | 12 ++++++++++++ 3 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 pywb/static/__wb_module_decl.js diff --git a/pywb/rewrite/regex_rewriters.py b/pywb/rewrite/regex_rewriters.py index e862eabe..d8f06663 100644 --- a/pywb/rewrite/regex_rewriters.py +++ b/pywb/rewrite/regex_rewriters.py @@ -280,10 +280,31 @@ class JSWombatProxyRewriter(RegexRewriter): self.last_buff = self.rules_factory.last_buff self.local_objs = self.rules_factory.local_objs + @staticmethod + def is_module(string): + """Return boolean indicating whether import or export statement is found.""" + IMPORT_REGEX = r"^\s*?import\s*?[{\"']" + EXPORT_REGEX = r"^\s*?export\s*?({([\s\w,$\n]+?)}[\s;]*|default|class)\s+" + + if not string: + return False + + if "import" in string and re.search(IMPORT_REGEX, string): + return True + + if "export" in string and re.search(EXPORT_REGEX, string): + return True + + return False + def rewrite_complete(self, string, **kwargs): if not kwargs.get('inline_attr'): + if self.is_module(string): + first_buff = "\nimport {} from '/static/__wb_module_decl.js';\n".format( + ", ".join(obj for obj in self.local_objs) + ) + return super(JSWombatProxyRewriter, self).rewrite_complete(string, first_buff=first_buff) return super(JSWombatProxyRewriter, self).rewrite_complete(string) - # check if any of the wrapped objects are used in the script # if not, don't rewrite if not any(obj in string for obj in self.local_objs): diff --git a/pywb/rewrite/test/test_regex_rewriters.py b/pywb/rewrite/test/test_regex_rewriters.py index 48c49a06..26a11cdd 100644 --- a/pywb/rewrite/test/test_regex_rewriters.py +++ b/pywb/rewrite/test/test_regex_rewriters.py @@ -348,6 +348,7 @@ from pywb.rewrite.url_rewriter import UrlRewriter from pywb.rewrite.regex_rewriters import RegexRewriter, JSRewriter, CSSRewriter, XMLRewriter, RxRules from pywb.rewrite.regex_rewriters import JSWombatProxyRewriter +import pytest urlrewriter = UrlRewriter('20131010/http://example.com/', '/web/', 'https://localhost/web/') @@ -367,6 +368,24 @@ def _test_xml(string): def _test_css(string): return CSSRewriter(urlrewriter).rewrite(string) +@pytest.mark.parametrize( + "string, expected_return", + [ + # imports + ("import './a-module.js'\n", True), + # exports + ("export { name1 };\n", True), + ("export default function functionName() { /* … */ }", True), + ("export class ClassName { /* … */ };", True), + # not a module + ("let counter = 0;\nconsole.log(counter);", False), + ("", False), + (None, False) + ] +) +def test_is_module(string, expected_return): + assert JSWombatProxyRewriter.is_module(string) == expected_return + if __name__ == "__main__": import doctest doctest.testmod() diff --git a/pywb/static/__wb_module_decl.js b/pywb/static/__wb_module_decl.js new file mode 100644 index 00000000..1058c189 --- /dev/null +++ b/pywb/static/__wb_module_decl.js @@ -0,0 +1,12 @@ +var wrapObj = function(name) {return (self._wb_wombat && self._wb_wombat.local_init && self._wb_wombat.local_init(name)) || self[name]; }; +if (!self.__WB_pmw) { self.__WB_pmw = function(obj) { this.__WB_source = obj; return this; } } +const window = wrapObj("window"); +const document = wrapObj("document"); +const location = wrapObj("location"); +const top = wrapObj("top"); +const parent = wrapObj("parent"); +const frames = wrapObj("frames"); +const opener = wrapObj("opener"); +const __self = wrapObj("self"); +const __globalThis = wrapObj("globalThis"); +export { window, document, location, top, parent, frames, opener, __self as self, __globalThis as globalThis };