mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
wombat.js: (#351)
- improved worker rewriting: updated worker rewriting handles non-blob urls, added SharedWorker override ww_rw.js: - updated to be a much more complete rewriting system: overrides for importScripts, and fetch content_rewriter.py: - added wkr_ mod for handling Worker/SharedWorker, follows convention of service worker test_content_rewriter.py - added test for content rewriting of Worker/SharedWorker
This commit is contained in:
parent
ac930c340a
commit
1156032e0e
@ -381,7 +381,7 @@ class RewriteInfo(object):
|
||||
def _resolve_text_type(self, text_type):
|
||||
mod = self.url_rewriter.wburl.mod
|
||||
|
||||
if mod == 'sw_':
|
||||
if mod == 'sw_' or mod == 'wkr_':
|
||||
return None
|
||||
|
||||
if text_type == 'css' and mod == 'js_':
|
||||
@ -449,7 +449,7 @@ class RewriteInfo(object):
|
||||
return True
|
||||
|
||||
def is_url_rw(self):
|
||||
if self.url_rewriter.wburl.mod in ('id_', 'bn_', 'sw_'):
|
||||
if self.url_rewriter.wburl.mod in ('id_', 'bn_', 'sw_', 'wkr_'):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
@ -176,6 +176,15 @@ class TestContentRewriter(object):
|
||||
exp = 'function() { location.href = "http://example.com/"; }'
|
||||
assert b''.join(gen).decode('utf-8') == exp
|
||||
|
||||
def test_rewrite_worker(self):
|
||||
headers = {'Content-Type': 'application/x-javascript'}
|
||||
content = 'importScripts("http://example.com/js.js")'
|
||||
|
||||
rwheaders, gen, is_rw = self.rewrite_record(headers, content, ts='201701wkr_')
|
||||
|
||||
exp = 'importScripts("http://example.com/js.js")'
|
||||
assert b''.join(gen).decode('utf-8') == exp
|
||||
|
||||
def test_banner_only_no_cookie_rewrite(self):
|
||||
headers = {'Set-Cookie': 'foo=bar; Expires=Wed, 13 Jan 2021 22:23:01 GMT; Path=/',
|
||||
'Content-Type': 'text/javascript'}
|
||||
|
@ -1326,58 +1326,92 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
}
|
||||
|
||||
//============================================
|
||||
function rewriteWorker(workerUrl) {
|
||||
var fetch = true;
|
||||
var makeBlob = false;
|
||||
var rwURL;
|
||||
if (!starts_with(workerUrl, 'blob:')) {
|
||||
if (starts_with(workerUrl, 'javascript:')) {
|
||||
// JS url, just strip javascript:
|
||||
fetch = false;
|
||||
rwURL = workerUrl.replace('javascript:', '');
|
||||
} else if (!starts_with(workerUrl, VALID_PREFIXES.concat('/')) &&
|
||||
!starts_with(workerUrl, BAD_PREFIXES)) {
|
||||
// super relative url assets/js/xyz.js
|
||||
var rurl = resolve_rel_url(workerUrl, $wbwindow.document);
|
||||
rwURL = rewrite_url(rurl, false, 'wkr_');
|
||||
} else {
|
||||
// just rewrite it
|
||||
rwURL = rewrite_url(workerUrl, false, 'wkr_');
|
||||
}
|
||||
} else {
|
||||
// blob
|
||||
rwURL = workerUrl;
|
||||
}
|
||||
|
||||
var workerCode;
|
||||
if (fetch) {
|
||||
// fetching only skipped if it was JS url
|
||||
var x = new XMLHttpRequest();
|
||||
// use sync ajax request to get the contents, remove postMessage() rewriting
|
||||
x.open("GET", rwURL, false);
|
||||
x.send();
|
||||
workerCode = x.responseText.replace(/__WB_pmw\(.*?\)\.(?=postMessage\()/g, "");
|
||||
} else {
|
||||
// was JS url, simply make workerCode the JS string
|
||||
workerCode = workerUrl;
|
||||
}
|
||||
|
||||
if (wbinfo.static_prefix || wbinfo.ww_rw_script) {
|
||||
// if we are here we can must return blob so set makeBlob to true
|
||||
var ww_rw = wbinfo.ww_rw_script || wbinfo.static_prefix + "ww_rw.js";
|
||||
var rw = "(function() { " + "self.importScripts('" + ww_rw + "');" +
|
||||
"new WBWombat({'prefix': '" + wb_abs_prefix + 'wkr_' + "/'}); " + "})();";
|
||||
workerCode = rw + workerCode;
|
||||
makeBlob = true;
|
||||
}
|
||||
|
||||
if (makeBlob) {
|
||||
var blob = new Blob([workerCode], {"type": "text/javascript"});
|
||||
return URL.createObjectURL(blob);
|
||||
} else {
|
||||
return workerUrl;
|
||||
}
|
||||
}
|
||||
|
||||
function init_web_worker_override() {
|
||||
if (!$wbwindow.Worker) {
|
||||
return;
|
||||
}
|
||||
|
||||
// for now, disabling workers until override of worker content can be supported
|
||||
// hopefully, pages depending on workers will have a fallback
|
||||
//$wbwindow.Worker = undefined;
|
||||
|
||||
// Worker unrewrite postMessage
|
||||
var orig_worker = $wbwindow.Worker;
|
||||
|
||||
function rewrite_blob(url) {
|
||||
// use sync ajax request to get the contents, remove postMessage() rewriting
|
||||
var x = new XMLHttpRequest();
|
||||
x.open("GET", url, false);
|
||||
x.send();
|
||||
|
||||
var resp = x.responseText.replace(/__WB_pmw\(.*?\)\.(?=postMessage\()/g, "");
|
||||
|
||||
if (wbinfo.static_prefix || wbinfo.ww_rw_script) {
|
||||
var ww_rw = wbinfo.ww_rw_script || wbinfo.static_prefix + "ww_rw.js";
|
||||
var rw = "(function() { " +
|
||||
"self.importScripts('" + ww_rw + "');" +
|
||||
|
||||
"new WBWombat({'prefix': '" + wb_abs_prefix + wb_info.mod + "/'}); " +
|
||||
|
||||
"})();";
|
||||
resp = rw + resp;
|
||||
}
|
||||
|
||||
if (resp != x.responseText) {
|
||||
var blob = new Blob([resp], {"type": "text/javascript"});
|
||||
return URL.createObjectURL(blob);
|
||||
} else {
|
||||
return url;
|
||||
}
|
||||
}
|
||||
|
||||
$wbwindow.Worker = (function (Worker) {
|
||||
return function (url) {
|
||||
if (starts_with(url, "blob:")) {
|
||||
url = rewrite_blob(url);
|
||||
}
|
||||
return new Worker(url);
|
||||
return new Worker(rewriteWorker(url));
|
||||
}
|
||||
|
||||
})($wbwindow.Worker);
|
||||
})(orig_worker);
|
||||
|
||||
$wbwindow.Worker.prototype = orig_worker.prototype;
|
||||
}
|
||||
|
||||
function initSharedWorkerOverride() {
|
||||
if (!$wbwindow.SharedWorker) {
|
||||
return;
|
||||
}
|
||||
// per https://html.spec.whatwg.org/multipage/workers.html#sharedworker
|
||||
var oSharedWorker = $wbwindow.SharedWorker;
|
||||
|
||||
$wbwindow.SharedWorker = (function(SharedWorker) {
|
||||
return function(url) {
|
||||
return new SharedWorker(rewriteWorker(url));
|
||||
};
|
||||
})(oSharedWorker);
|
||||
|
||||
$wbwindow.SharedWorker.prototype = oSharedWorker.prototype;
|
||||
}
|
||||
|
||||
//============================================
|
||||
function init_service_worker_override() {
|
||||
@ -3432,6 +3466,8 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
// Worker override (experimental)
|
||||
init_web_worker_override();
|
||||
init_service_worker_override();
|
||||
initSharedWorkerOverride();
|
||||
|
||||
|
||||
// innerHTML can be overriden on prototype!
|
||||
override_html_assign($wbwindow.HTMLElement, "innerHTML", true);
|
||||
|
@ -21,9 +21,9 @@ function WBWombat(info) {
|
||||
async = true;
|
||||
}
|
||||
|
||||
result = orig.call(this, method, url, async, user, password);
|
||||
var result = orig.call(this, method, url, async, user, password);
|
||||
|
||||
if (url.indexOf("data:") != 0) {
|
||||
if (url.indexOf('data:') !== 0) {
|
||||
this.setRequestHeader('X-Pywb-Requested-With', 'XMLHttpRequest');
|
||||
}
|
||||
}
|
||||
@ -32,6 +32,41 @@ function WBWombat(info) {
|
||||
}
|
||||
|
||||
init_ajax_rewrite();
|
||||
|
||||
function rewriteArgs(argsObj) {
|
||||
// recreate the original arguments object just with URLs rewritten
|
||||
var newArgObj = {length: argsObj.length};
|
||||
for (var i = 0; i < argsObj.length; i++) {
|
||||
var arg = argsObj[i];
|
||||
newArgObj[i] = rewrite_url(arg);
|
||||
}
|
||||
return newArgObj;
|
||||
}
|
||||
|
||||
var origImportScripts = self.importScripts;
|
||||
self.importScripts = function importScripts() {
|
||||
// rewrite the arguments object and call original function via fn.apply
|
||||
var rwArgs = rewriteArgs(arguments);
|
||||
return origImportScripts.apply(this, rwArgs);
|
||||
};
|
||||
|
||||
if (self.fetch != null) {
|
||||
// this fetch is Worker.fetch
|
||||
var orig_fetch = self.fetch;
|
||||
self.fetch = function(input, init_opts) {
|
||||
var inputType = typeof(input);
|
||||
if (inputType === 'string') {
|
||||
input = rewrite_url(input);
|
||||
} else if (inputType === 'object' && input.url) {
|
||||
var new_url = rewrite_url(input.url);
|
||||
if (new_url !== input.url) {
|
||||
input = new Request(new_url, input);
|
||||
}
|
||||
}
|
||||
init_opts = init_opts || {};
|
||||
init_opts['credentials'] = 'include';
|
||||
return orig_fetch.call(this, input, init_opts);
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user