1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

wombat.js: (#351)

- improved worker rewriting: updated worker rewriting handles non-blob urls, added SharedWorker override
ww_rw.js:
- updated to be a much more complete rewriting system: overrides for importScripts, and fetch
content_rewriter.py:
- added wkr_ mod for handling Worker/SharedWorker, follows convention of service worker
test_content_rewriter.py
- added test for content rewriting of Worker/SharedWorker
This commit is contained in:
John Berlin 2018-08-06 13:12:16 -04:00 committed by Ilya Kreymer
parent ac930c340a
commit 1156032e0e
4 changed files with 121 additions and 41 deletions

View File

@ -381,7 +381,7 @@ class RewriteInfo(object):
def _resolve_text_type(self, text_type):
mod = self.url_rewriter.wburl.mod
if mod == 'sw_':
if mod == 'sw_' or mod == 'wkr_':
return None
if text_type == 'css' and mod == 'js_':
@ -449,7 +449,7 @@ class RewriteInfo(object):
return True
def is_url_rw(self):
if self.url_rewriter.wburl.mod in ('id_', 'bn_', 'sw_'):
if self.url_rewriter.wburl.mod in ('id_', 'bn_', 'sw_', 'wkr_'):
return False
return True

View File

@ -176,6 +176,15 @@ class TestContentRewriter(object):
exp = 'function() { location.href = "http://example.com/"; }'
assert b''.join(gen).decode('utf-8') == exp
def test_rewrite_worker(self):
headers = {'Content-Type': 'application/x-javascript'}
content = 'importScripts("http://example.com/js.js")'
rwheaders, gen, is_rw = self.rewrite_record(headers, content, ts='201701wkr_')
exp = 'importScripts("http://example.com/js.js")'
assert b''.join(gen).decode('utf-8') == exp
def test_banner_only_no_cookie_rewrite(self):
headers = {'Set-Cookie': 'foo=bar; Expires=Wed, 13 Jan 2021 22:23:01 GMT; Path=/',
'Content-Type': 'text/javascript'}

View File

@ -1326,58 +1326,92 @@ var _WBWombat = function($wbwindow, wbinfo) {
}
//============================================
function rewriteWorker(workerUrl) {
var fetch = true;
var makeBlob = false;
var rwURL;
if (!starts_with(workerUrl, 'blob:')) {
if (starts_with(workerUrl, 'javascript:')) {
// JS url, just strip javascript:
fetch = false;
rwURL = workerUrl.replace('javascript:', '');
} else if (!starts_with(workerUrl, VALID_PREFIXES.concat('/')) &&
!starts_with(workerUrl, BAD_PREFIXES)) {
// super relative url assets/js/xyz.js
var rurl = resolve_rel_url(workerUrl, $wbwindow.document);
rwURL = rewrite_url(rurl, false, 'wkr_');
} else {
// just rewrite it
rwURL = rewrite_url(workerUrl, false, 'wkr_');
}
} else {
// blob
rwURL = workerUrl;
}
var workerCode;
if (fetch) {
// fetching only skipped if it was JS url
var x = new XMLHttpRequest();
// use sync ajax request to get the contents, remove postMessage() rewriting
x.open("GET", rwURL, false);
x.send();
workerCode = x.responseText.replace(/__WB_pmw\(.*?\)\.(?=postMessage\()/g, "");
} else {
// was JS url, simply make workerCode the JS string
workerCode = workerUrl;
}
if (wbinfo.static_prefix || wbinfo.ww_rw_script) {
// if we are here we can must return blob so set makeBlob to true
var ww_rw = wbinfo.ww_rw_script || wbinfo.static_prefix + "ww_rw.js";
var rw = "(function() { " + "self.importScripts('" + ww_rw + "');" +
"new WBWombat({'prefix': '" + wb_abs_prefix + 'wkr_' + "/'}); " + "})();";
workerCode = rw + workerCode;
makeBlob = true;
}
if (makeBlob) {
var blob = new Blob([workerCode], {"type": "text/javascript"});
return URL.createObjectURL(blob);
} else {
return workerUrl;
}
}
function init_web_worker_override() {
if (!$wbwindow.Worker) {
return;
}
// for now, disabling workers until override of worker content can be supported
// hopefully, pages depending on workers will have a fallback
//$wbwindow.Worker = undefined;
// Worker unrewrite postMessage
var orig_worker = $wbwindow.Worker;
function rewrite_blob(url) {
// use sync ajax request to get the contents, remove postMessage() rewriting
var x = new XMLHttpRequest();
x.open("GET", url, false);
x.send();
var resp = x.responseText.replace(/__WB_pmw\(.*?\)\.(?=postMessage\()/g, "");
if (wbinfo.static_prefix || wbinfo.ww_rw_script) {
var ww_rw = wbinfo.ww_rw_script || wbinfo.static_prefix + "ww_rw.js";
var rw = "(function() { " +
"self.importScripts('" + ww_rw + "');" +
"new WBWombat({'prefix': '" + wb_abs_prefix + wb_info.mod + "/'}); " +
"})();";
resp = rw + resp;
}
if (resp != x.responseText) {
var blob = new Blob([resp], {"type": "text/javascript"});
return URL.createObjectURL(blob);
} else {
return url;
}
}
$wbwindow.Worker = (function (Worker) {
return function (url) {
if (starts_with(url, "blob:")) {
url = rewrite_blob(url);
}
return new Worker(url);
return new Worker(rewriteWorker(url));
}
})($wbwindow.Worker);
})(orig_worker);
$wbwindow.Worker.prototype = orig_worker.prototype;
}
function initSharedWorkerOverride() {
if (!$wbwindow.SharedWorker) {
return;
}
// per https://html.spec.whatwg.org/multipage/workers.html#sharedworker
var oSharedWorker = $wbwindow.SharedWorker;
$wbwindow.SharedWorker = (function(SharedWorker) {
return function(url) {
return new SharedWorker(rewriteWorker(url));
};
})(oSharedWorker);
$wbwindow.SharedWorker.prototype = oSharedWorker.prototype;
}
//============================================
function init_service_worker_override() {
@ -3432,6 +3466,8 @@ var _WBWombat = function($wbwindow, wbinfo) {
// Worker override (experimental)
init_web_worker_override();
init_service_worker_override();
initSharedWorkerOverride();
// innerHTML can be overriden on prototype!
override_html_assign($wbwindow.HTMLElement, "innerHTML", true);

View File

@ -21,9 +21,9 @@ function WBWombat(info) {
async = true;
}
result = orig.call(this, method, url, async, user, password);
var result = orig.call(this, method, url, async, user, password);
if (url.indexOf("data:") != 0) {
if (url.indexOf('data:') !== 0) {
this.setRequestHeader('X-Pywb-Requested-With', 'XMLHttpRequest');
}
}
@ -32,6 +32,41 @@ function WBWombat(info) {
}
init_ajax_rewrite();
function rewriteArgs(argsObj) {
// recreate the original arguments object just with URLs rewritten
var newArgObj = {length: argsObj.length};
for (var i = 0; i < argsObj.length; i++) {
var arg = argsObj[i];
newArgObj[i] = rewrite_url(arg);
}
return newArgObj;
}
var origImportScripts = self.importScripts;
self.importScripts = function importScripts() {
// rewrite the arguments object and call original function via fn.apply
var rwArgs = rewriteArgs(arguments);
return origImportScripts.apply(this, rwArgs);
};
if (self.fetch != null) {
// this fetch is Worker.fetch
var orig_fetch = self.fetch;
self.fetch = function(input, init_opts) {
var inputType = typeof(input);
if (inputType === 'string') {
input = rewrite_url(input);
} else if (inputType === 'object' && input.url) {
var new_url = rewrite_url(input.url);
if (new_url !== input.url) {
input = new Request(new_url, input);
}
}
init_opts = init_opts || {};
init_opts['credentials'] = 'include';
return orig_fetch.call(this, input, init_opts);
};
}
}