From 1156032e0eee182d5868cc8c8eaeab3e3a7534ee Mon Sep 17 00:00:00 2001 From: John Berlin Date: Mon, 6 Aug 2018 13:12:16 -0400 Subject: [PATCH] wombat.js: (#351) - improved worker rewriting: updated worker rewriting handles non-blob urls, added SharedWorker override ww_rw.js: - updated to be a much more complete rewriting system: overrides for importScripts, and fetch content_rewriter.py: - added wkr_ mod for handling Worker/SharedWorker, follows convention of service worker test_content_rewriter.py - added test for content rewriting of Worker/SharedWorker --- pywb/rewrite/content_rewriter.py | 4 +- pywb/rewrite/test/test_content_rewriter.py | 9 ++ pywb/static/wombat.js | 108 ++++++++++++++------- pywb/static/ww_rw.js | 41 +++++++- 4 files changed, 121 insertions(+), 41 deletions(-) diff --git a/pywb/rewrite/content_rewriter.py b/pywb/rewrite/content_rewriter.py index b255fcdd..b0bbdda4 100644 --- a/pywb/rewrite/content_rewriter.py +++ b/pywb/rewrite/content_rewriter.py @@ -381,7 +381,7 @@ class RewriteInfo(object): def _resolve_text_type(self, text_type): mod = self.url_rewriter.wburl.mod - if mod == 'sw_': + if mod == 'sw_' or mod == 'wkr_': return None if text_type == 'css' and mod == 'js_': @@ -449,7 +449,7 @@ class RewriteInfo(object): return True def is_url_rw(self): - if self.url_rewriter.wburl.mod in ('id_', 'bn_', 'sw_'): + if self.url_rewriter.wburl.mod in ('id_', 'bn_', 'sw_', 'wkr_'): return False return True diff --git a/pywb/rewrite/test/test_content_rewriter.py b/pywb/rewrite/test/test_content_rewriter.py index 9a732087..6e09dfb1 100644 --- a/pywb/rewrite/test/test_content_rewriter.py +++ b/pywb/rewrite/test/test_content_rewriter.py @@ -176,6 +176,15 @@ class TestContentRewriter(object): exp = 'function() { location.href = "http://example.com/"; }' assert b''.join(gen).decode('utf-8') == exp + def test_rewrite_worker(self): + headers = {'Content-Type': 'application/x-javascript'} + content = 'importScripts("http://example.com/js.js")' + + rwheaders, gen, is_rw = self.rewrite_record(headers, content, ts='201701wkr_') + + exp = 'importScripts("http://example.com/js.js")' + assert b''.join(gen).decode('utf-8') == exp + def test_banner_only_no_cookie_rewrite(self): headers = {'Set-Cookie': 'foo=bar; Expires=Wed, 13 Jan 2021 22:23:01 GMT; Path=/', 'Content-Type': 'text/javascript'} diff --git a/pywb/static/wombat.js b/pywb/static/wombat.js index 2802311c..89780bd5 100644 --- a/pywb/static/wombat.js +++ b/pywb/static/wombat.js @@ -1326,58 +1326,92 @@ var _WBWombat = function($wbwindow, wbinfo) { } //============================================ + function rewriteWorker(workerUrl) { + var fetch = true; + var makeBlob = false; + var rwURL; + if (!starts_with(workerUrl, 'blob:')) { + if (starts_with(workerUrl, 'javascript:')) { + // JS url, just strip javascript: + fetch = false; + rwURL = workerUrl.replace('javascript:', ''); + } else if (!starts_with(workerUrl, VALID_PREFIXES.concat('/')) && + !starts_with(workerUrl, BAD_PREFIXES)) { + // super relative url assets/js/xyz.js + var rurl = resolve_rel_url(workerUrl, $wbwindow.document); + rwURL = rewrite_url(rurl, false, 'wkr_'); + } else { + // just rewrite it + rwURL = rewrite_url(workerUrl, false, 'wkr_'); + } + } else { + // blob + rwURL = workerUrl; + } + + var workerCode; + if (fetch) { + // fetching only skipped if it was JS url + var x = new XMLHttpRequest(); + // use sync ajax request to get the contents, remove postMessage() rewriting + x.open("GET", rwURL, false); + x.send(); + workerCode = x.responseText.replace(/__WB_pmw\(.*?\)\.(?=postMessage\()/g, ""); + } else { + // was JS url, simply make workerCode the JS string + workerCode = workerUrl; + } + + if (wbinfo.static_prefix || wbinfo.ww_rw_script) { + // if we are here we can must return blob so set makeBlob to true + var ww_rw = wbinfo.ww_rw_script || wbinfo.static_prefix + "ww_rw.js"; + var rw = "(function() { " + "self.importScripts('" + ww_rw + "');" + + "new WBWombat({'prefix': '" + wb_abs_prefix + 'wkr_' + "/'}); " + "})();"; + workerCode = rw + workerCode; + makeBlob = true; + } + + if (makeBlob) { + var blob = new Blob([workerCode], {"type": "text/javascript"}); + return URL.createObjectURL(blob); + } else { + return workerUrl; + } + } + function init_web_worker_override() { if (!$wbwindow.Worker) { return; } - // for now, disabling workers until override of worker content can be supported - // hopefully, pages depending on workers will have a fallback - //$wbwindow.Worker = undefined; - // Worker unrewrite postMessage var orig_worker = $wbwindow.Worker; - function rewrite_blob(url) { - // use sync ajax request to get the contents, remove postMessage() rewriting - var x = new XMLHttpRequest(); - x.open("GET", url, false); - x.send(); - - var resp = x.responseText.replace(/__WB_pmw\(.*?\)\.(?=postMessage\()/g, ""); - - if (wbinfo.static_prefix || wbinfo.ww_rw_script) { - var ww_rw = wbinfo.ww_rw_script || wbinfo.static_prefix + "ww_rw.js"; - var rw = "(function() { " + -"self.importScripts('" + ww_rw + "');" + - -"new WBWombat({'prefix': '" + wb_abs_prefix + wb_info.mod + "/'}); " + - -"})();"; - resp = rw + resp; - } - - if (resp != x.responseText) { - var blob = new Blob([resp], {"type": "text/javascript"}); - return URL.createObjectURL(blob); - } else { - return url; - } - } - $wbwindow.Worker = (function (Worker) { return function (url) { - if (starts_with(url, "blob:")) { - url = rewrite_blob(url); - } - return new Worker(url); + return new Worker(rewriteWorker(url)); } - })($wbwindow.Worker); + })(orig_worker); $wbwindow.Worker.prototype = orig_worker.prototype; } + function initSharedWorkerOverride() { + if (!$wbwindow.SharedWorker) { + return; + } + // per https://html.spec.whatwg.org/multipage/workers.html#sharedworker + var oSharedWorker = $wbwindow.SharedWorker; + + $wbwindow.SharedWorker = (function(SharedWorker) { + return function(url) { + return new SharedWorker(rewriteWorker(url)); + }; + })(oSharedWorker); + + $wbwindow.SharedWorker.prototype = oSharedWorker.prototype; + } //============================================ function init_service_worker_override() { @@ -3432,6 +3466,8 @@ var _WBWombat = function($wbwindow, wbinfo) { // Worker override (experimental) init_web_worker_override(); init_service_worker_override(); + initSharedWorkerOverride(); + // innerHTML can be overriden on prototype! override_html_assign($wbwindow.HTMLElement, "innerHTML", true); diff --git a/pywb/static/ww_rw.js b/pywb/static/ww_rw.js index dfeb1fc3..e9e36d08 100644 --- a/pywb/static/ww_rw.js +++ b/pywb/static/ww_rw.js @@ -21,9 +21,9 @@ function WBWombat(info) { async = true; } - result = orig.call(this, method, url, async, user, password); + var result = orig.call(this, method, url, async, user, password); - if (url.indexOf("data:") != 0) { + if (url.indexOf('data:') !== 0) { this.setRequestHeader('X-Pywb-Requested-With', 'XMLHttpRequest'); } } @@ -32,6 +32,41 @@ function WBWombat(info) { } init_ajax_rewrite(); + + function rewriteArgs(argsObj) { + // recreate the original arguments object just with URLs rewritten + var newArgObj = {length: argsObj.length}; + for (var i = 0; i < argsObj.length; i++) { + var arg = argsObj[i]; + newArgObj[i] = rewrite_url(arg); + } + return newArgObj; + } + + var origImportScripts = self.importScripts; + self.importScripts = function importScripts() { + // rewrite the arguments object and call original function via fn.apply + var rwArgs = rewriteArgs(arguments); + return origImportScripts.apply(this, rwArgs); + }; + + if (self.fetch != null) { + // this fetch is Worker.fetch + var orig_fetch = self.fetch; + self.fetch = function(input, init_opts) { + var inputType = typeof(input); + if (inputType === 'string') { + input = rewrite_url(input); + } else if (inputType === 'object' && input.url) { + var new_url = rewrite_url(input.url); + if (new_url !== input.url) { + input = new Request(new_url, input); + } + } + init_opts = init_opts || {}; + init_opts['credentials'] = 'include'; + return orig_fetch.call(this, input, init_opts); + }; + } } -