mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
Exposed AutoFetchWorker on window in proxy-mode (#389)
Added methods to AutoFetchWorker in proxy mode that allow external JS to initiate checks Updated the actual proxy mode worker implementation to match the functionality added
This commit is contained in:
parent
2c8d607b18
commit
9597a632c8
@ -8,6 +8,7 @@ var FullImgQDrainLen = 10;
|
|||||||
var DefaultNumAvFetches = 5;
|
var DefaultNumAvFetches = 5;
|
||||||
var FullAVQDrainLen = 5;
|
var FullAVQDrainLen = 5;
|
||||||
var DataURLPrefix = 'data:';
|
var DataURLPrefix = 'data:';
|
||||||
|
var FetchDelay = 1000;
|
||||||
// the autofetcher instance for this worker
|
// the autofetcher instance for this worker
|
||||||
var autofetcher = null;
|
var autofetcher = null;
|
||||||
|
|
||||||
@ -49,6 +50,9 @@ self.onmessage = function (event) {
|
|||||||
case 'values':
|
case 'values':
|
||||||
autofetcher.autofetchMediaSrcset(data);
|
autofetcher.autofetchMediaSrcset(data);
|
||||||
break;
|
break;
|
||||||
|
case 'fetch-all':
|
||||||
|
autofetcher.justFetch(data);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -74,9 +78,8 @@ function AutoFetcher() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
AutoFetcher.prototype.delay = function () {
|
AutoFetcher.prototype.delay = function () {
|
||||||
// 2 second delay seem reasonable
|
|
||||||
return new Promise(function (resolve, reject) {
|
return new Promise(function (resolve, reject) {
|
||||||
setTimeout(resolve, 2000);
|
setTimeout(resolve, FetchDelay);
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -270,4 +273,13 @@ AutoFetcher.prototype.autofetchMediaSrcset = function (data) {
|
|||||||
this.fetchAV();
|
this.fetchAV();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
AutoFetcher.prototype.justFetch = function (data) {
|
||||||
|
// we got a message containing only urls to be fetched
|
||||||
|
if (data == null || data.values == null) return;
|
||||||
|
for (var i = 0; i < data.values.length; ++i) {
|
||||||
|
this.queueNonAVURL(data.values[i]);
|
||||||
|
}
|
||||||
|
this.fetchImgs();
|
||||||
|
};
|
||||||
|
|
||||||
autofetcher = new AutoFetcher();
|
autofetcher = new AutoFetcher();
|
||||||
|
@ -174,8 +174,8 @@ var _WBWombat = function ($wbwindow, wbinfo) {
|
|||||||
if (!(this instanceof AutoFetchWorkerProxyMode)) {
|
if (!(this instanceof AutoFetchWorkerProxyMode)) {
|
||||||
return new AutoFetchWorkerProxyMode();
|
return new AutoFetchWorkerProxyMode();
|
||||||
}
|
}
|
||||||
this.checkIntervalTime = 15000;
|
|
||||||
this.checkIntervalCB = this.checkIntervalCB.bind(this);
|
this.checkIntervalCB = this.checkIntervalCB.bind(this);
|
||||||
|
this.checkIntervalTime = 15000;
|
||||||
this.elemSelector = ['img', 'source', 'video', 'audio'].map(function (which) {
|
this.elemSelector = ['img', 'source', 'video', 'audio'].map(function (which) {
|
||||||
if (which === 'source') {
|
if (which === 'source') {
|
||||||
return ['picture > ', 'video > ', 'audio >'].map(function (parent) {
|
return ['picture > ', 'video > ', 'audio >'].map(function (parent) {
|
||||||
@ -185,21 +185,20 @@ var _WBWombat = function ($wbwindow, wbinfo) {
|
|||||||
return which + '[srcset], ' + which + '[data-srcset], ' + which + '[data-src]';
|
return which + '[srcset], ' + which + '[data-srcset], ' + which + '[data-src]';
|
||||||
}
|
}
|
||||||
}).join(', ');
|
}).join(', ');
|
||||||
|
// use our origins reference to the document in order for us to parse stylesheets :/
|
||||||
|
this.styleTag = document.createElement('style');
|
||||||
|
this.styleTag.id = '$wrStyleParser$';
|
||||||
|
document.documentElement.appendChild(this.styleTag);
|
||||||
if (isTop) {
|
if (isTop) {
|
||||||
// Cannot directly load our worker from the proxy origin into the current origin
|
// Cannot directly load our worker from the proxy origin into the current origin
|
||||||
// however we fetch it from proxy origin and can blob it into the current origin :)
|
// however we fetch it from proxy origin and can blob it into the current origin :)
|
||||||
var self = this;
|
var afwpm = this;
|
||||||
fetch(wbAutoFetchWorkerPrefix)
|
fetch(wbAutoFetchWorkerPrefix)
|
||||||
.then(function (res) {
|
.then(function (res) {
|
||||||
return res.text().then(function (text) {
|
return res.text().then(function (text) {
|
||||||
var blob = new Blob([text], {"type": "text/javascript"});
|
var blob = new Blob([text], { "type": "text/javascript" });
|
||||||
self.worker = new $wbwindow.Worker(URL.createObjectURL(blob));
|
afwpm.worker = new $wbwindow.Worker(URL.createObjectURL(blob));
|
||||||
// use our origins reference to the document in order for us to parse stylesheets :/
|
afwpm.startCheckingInterval();
|
||||||
self.styleTag = document.createElement('style');
|
|
||||||
self.styleTag.id = '$wrStyleParser$';
|
|
||||||
document.documentElement.appendChild(self.styleTag);
|
|
||||||
self.startCheckingInterval();
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
@ -211,26 +210,40 @@ var _WBWombat = function ($wbwindow, wbinfo) {
|
|||||||
}
|
}
|
||||||
$wbwindow.top.postMessage(msg, '*');
|
$wbwindow.top.postMessage(msg, '*');
|
||||||
},
|
},
|
||||||
"terminate": function () {
|
"terminate": function () {}
|
||||||
}
|
|
||||||
};
|
};
|
||||||
this.startCheckingInterval();
|
this.startCheckingInterval();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AutoFetchWorkerProxyMode.prototype.resumeCheckInterval = function () {
|
||||||
|
// if the checkInterval is null (it is not active) restart the check interval
|
||||||
|
if (this.checkInterval == null) {
|
||||||
|
this.checkInterval = setInterval(this.checkIntervalCB, this.checkIntervalTime);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
AutoFetchWorkerProxyMode.prototype.pauseCheckInterval = function () {
|
||||||
|
// if the checkInterval is not null (it is active) clear the check interval
|
||||||
|
if (this.checkInterval != null) {
|
||||||
|
clearInterval(this.checkInterval);
|
||||||
|
this.checkInterval = null;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
AutoFetchWorkerProxyMode.prototype.startCheckingInterval = function () {
|
AutoFetchWorkerProxyMode.prototype.startCheckingInterval = function () {
|
||||||
// if document ready state is complete do first extraction and start check polling
|
// if document ready state is complete do first extraction and start check polling
|
||||||
// otherwise wait for document ready state to complete to extract and start check polling
|
// otherwise wait for document ready state to complete to extract and start check polling
|
||||||
var self = this;
|
var afwpm = this;
|
||||||
if ($wbwindow.document.readyState === "complete") {
|
if ($wbwindow.document.readyState === "complete") {
|
||||||
this.extractFromLocalDoc();
|
this.extractFromLocalDoc();
|
||||||
setInterval(this.checkIntervalCB, this.checkIntervalTime);
|
this.checkInterval = setInterval(this.checkIntervalCB, this.checkIntervalTime);
|
||||||
} else {
|
} else {
|
||||||
var i = setInterval(function () {
|
var i = setInterval(function () {
|
||||||
if ($wbwindow.document.readyState === "complete") {
|
if ($wbwindow.document.readyState === "complete") {
|
||||||
self.extractFromLocalDoc();
|
afwpm.extractFromLocalDoc();
|
||||||
clearInterval(i);
|
clearInterval(i);
|
||||||
setInterval(self.checkIntervalCB, self.checkIntervalTime);
|
afwpm.checkInterval = setInterval(afwpm.checkIntervalCB, afwpm.checkIntervalTime);
|
||||||
}
|
}
|
||||||
}, 1000);
|
}, 1000);
|
||||||
}
|
}
|
||||||
@ -245,147 +258,161 @@ var _WBWombat = function ($wbwindow, wbinfo) {
|
|||||||
this.worker.terminate();
|
this.worker.terminate();
|
||||||
};
|
};
|
||||||
|
|
||||||
AutoFetchWorkerProxyMode.prototype.postMessage = function (msg, deferred) {
|
AutoFetchWorkerProxyMode.prototype.justFetch = function (urls) {
|
||||||
if (deferred) {
|
this.worker.postMessage({ 'type': 'fetch-all', 'values': urls });
|
||||||
var self = this;
|
};
|
||||||
return Promise.resolve().then(function () {
|
|
||||||
self.worker.postMessage(msg);
|
AutoFetchWorkerProxyMode.prototype.postMessage = function (msg) {
|
||||||
});
|
|
||||||
}
|
|
||||||
this.worker.postMessage(msg);
|
this.worker.postMessage(msg);
|
||||||
};
|
};
|
||||||
|
|
||||||
AutoFetchWorkerProxyMode.prototype.extractMediaRules = function (rules, href) {
|
|
||||||
// We are in proxy mode and must include a URL to resolve relative URLs in media rules
|
|
||||||
if (!rules) return [];
|
|
||||||
var rvlen = rules.length;
|
|
||||||
var text = [];
|
|
||||||
var rule;
|
|
||||||
for (var i = 0; i < rvlen; ++i) {
|
|
||||||
rule = rules[i];
|
|
||||||
if (rule.type === CSSRule.MEDIA_RULE) {
|
|
||||||
text.push({"cssText": rule.cssText, "resolve": href});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return text;
|
|
||||||
};
|
|
||||||
|
|
||||||
AutoFetchWorkerProxyMode.prototype.corsCSSFetch = function (href) {
|
|
||||||
// because this JS in proxy mode operates as it would on the live web
|
|
||||||
// the rules of CORS apply and we cannot rely on URLs being rewritten correctly
|
|
||||||
// fetch the cross origin css file and then parse it using a style tag to get the rules
|
|
||||||
var url = location.protocol + '//' + wb_info.proxy_magic + '/proxy-fetch/' + href;
|
|
||||||
var aaw = this;
|
|
||||||
return fetch(url).then(function (res) {
|
|
||||||
return res.text().then(function (text) {
|
|
||||||
aaw.styleTag.textContent = text;
|
|
||||||
var sheet = aaw.styleTag.sheet || {};
|
|
||||||
return aaw.extractMediaRules(sheet.cssRules || sheet.rules, href);
|
|
||||||
});
|
|
||||||
}).catch(function (error) {
|
|
||||||
return [];
|
|
||||||
});
|
|
||||||
};
|
|
||||||
|
|
||||||
AutoFetchWorkerProxyMode.prototype.shouldSkipSheet = function (sheet) {
|
AutoFetchWorkerProxyMode.prototype.shouldSkipSheet = function (sheet) {
|
||||||
// we skip extracting rules from sheets if they are from our parsing style or come from pywb
|
// we skip extracting rules from sheets if they are from our parsing style or come from pywb
|
||||||
if (sheet.id === '$wrStyleParser$') return true;
|
if (sheet.id === '$wrStyleParser$') return true;
|
||||||
return !!(sheet.href && sheet.href.indexOf(wb_info.proxy_magic) !== -1);
|
return !!(sheet.href && sheet.href.indexOf(wb_info.proxy_magic) !== -1);
|
||||||
};
|
};
|
||||||
|
|
||||||
AutoFetchWorkerProxyMode.prototype.getImgAVElems = function () {
|
AutoFetchWorkerProxyMode.prototype.validateSrcV = function (srcV) {
|
||||||
var elem, srcv, mod;
|
// returns null if the supplied value is not usable for resolving rel URLs
|
||||||
var results = { 'srcset': [], 'src': []} ;
|
// otherwise returns the supplied value
|
||||||
var baseURI = $wbwindow.document.baseURI;
|
if (!srcV || srcV.indexOf('data:') === 0 || srcV.indexOf('blob:') === 0) return null;
|
||||||
var elems = $wbwindow.document.querySelectorAll(this.elemSelector);
|
return srcV;
|
||||||
for (var i = 0; i < elems.length; i++) {
|
};
|
||||||
elem = elems[i];
|
|
||||||
// we want the original src value in order to resolve URLs in the worker when needed
|
AutoFetchWorkerProxyMode.prototype.fetchCSSAndExtract = function (cssURL) {
|
||||||
srcv = elem.src ? elem.src : null;
|
// because this JS in proxy mode operates as it would on the live web
|
||||||
// get the correct mod in order to inform the backing worker where the URL(s) are from
|
// the rules of CORS apply and we cannot rely on URLs being rewritten correctly
|
||||||
mod = elem.tagName === "SOURCE" ?
|
// fetch the cross origin css file and then parse it using a style tag to get the rules
|
||||||
elem.parentElement.tagName === "PICTURE" ? 'im_' : 'oe_'
|
var url = location.protocol + '//' + wb_info.proxy_magic + '/proxy-fetch/' + cssURL;
|
||||||
: elem.tagName === "IMG" ? 'im_' : 'oe_';
|
var afwpm = this;
|
||||||
if (elem.srcset) {
|
return fetch(url).then(function (res) {
|
||||||
results.srcset.push({ 'srcset': elem.srcset, 'resolve': srcv || baseURI, 'mod': mod });
|
return res.text().then(function (text) {
|
||||||
}
|
afwpm.styleTag.textContent = text;
|
||||||
if (elem.dataset.srcset) {
|
return afwpm.extractMediaRules(afwpm.styleTag.sheet, cssURL);
|
||||||
results.srcset.push({ 'srcset': elem.dataset.srcset, 'resolve': srcv || baseURI, 'mod': mod });
|
});
|
||||||
}
|
}).catch(function (error) {
|
||||||
if (elem.dataset.src) {
|
return [];
|
||||||
results.src.push({'src': elem.dataset.src, 'resolve': srcv || baseURI, 'mod': mod});
|
});
|
||||||
}
|
};
|
||||||
if (elem.tagName === "SOURCE" && srcv) {
|
|
||||||
results.src.push({'src': srcv, 'resolve': baseURI, 'mod': mod});
|
AutoFetchWorkerProxyMode.prototype.extractMediaRules = function (sheet, baseURI) {
|
||||||
|
// We are in proxy mode and must include a URL to resolve relative URLs in media rules
|
||||||
|
var results = [];
|
||||||
|
if (!sheet) return results;
|
||||||
|
var rules = sheet.cssRules || sheet.rules;
|
||||||
|
if (!rules || rules.length === 0) return results;
|
||||||
|
var len = rules.length;
|
||||||
|
var resolve = sheet.href || baseURI;
|
||||||
|
for (var i = 0; i < len; ++i) {
|
||||||
|
var rule = rules[i];
|
||||||
|
if (rule.type === CSSRule.MEDIA_RULE) {
|
||||||
|
results.push({ "cssText": rule.cssText, "resolve": resolve });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return results;
|
return results;
|
||||||
};
|
};
|
||||||
|
|
||||||
AutoFetchWorkerProxyMode.prototype.extractFromLocalDoc = function () {
|
AutoFetchWorkerProxyMode.prototype.extractSrcSrcsetFrom = function (fromElem, baseURI) {
|
||||||
|
// retrieve the auto-fetched elements from the supplied dom node
|
||||||
|
var elems = fromElem.querySelectorAll(this.elemSelector);
|
||||||
|
var len = elems.length;
|
||||||
|
var msg = {'type': 'values', 'srcset': [], 'src': []};
|
||||||
|
for (var i = 0; i < len; i++) {
|
||||||
|
var elem = elems[i];
|
||||||
|
// we want the original src value in order to resolve URLs in the worker when needed
|
||||||
|
var srcv = this.validateSrcV(elem.src);
|
||||||
|
var resolve = srcv || baseURI;
|
||||||
|
// get the correct mod in order to inform the backing worker where the URL(s) are from
|
||||||
|
var mod = elem.tagName === "SOURCE" ?
|
||||||
|
elem.parentElement.tagName === "PICTURE" ? 'im_' : 'oe_'
|
||||||
|
: elem.tagName === "IMG" ? 'im_' : 'oe_';
|
||||||
|
if (elem.srcset) {
|
||||||
|
msg.srcset.push({'srcset': elem.srcset, 'resolve': resolve, 'mod': mod});
|
||||||
|
}
|
||||||
|
if (elem.dataset.srcset) {
|
||||||
|
msg.srcset.push({'srcset': elem.dataset.srcset, 'resolve': resolve, 'mod': mod});
|
||||||
|
}
|
||||||
|
if (elem.dataset.src) {
|
||||||
|
msg.src.push({'src': elem.dataset.src, 'resolve': resolve, 'mod': mod});
|
||||||
|
}
|
||||||
|
if (elem.tagName === "SOURCE" && srcv) {
|
||||||
|
msg.src.push({'src': srcv, 'resolve': baseURI, 'mod': mod});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// send what we have extracted, if anything, to the worker for processing
|
||||||
|
if (msg.srcset.length || msg.src.length) {
|
||||||
|
this.postMessage(msg);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
AutoFetchWorkerProxyMode.prototype.checkStyleSheets = function (doc) {
|
||||||
var media = [];
|
var media = [];
|
||||||
var deferredMediaURLS = [];
|
var deferredMediaExtraction = [];
|
||||||
var sheet;
|
var styleSheets = doc.styleSheets;
|
||||||
var resolve;
|
var sheetLen = styleSheets.length;
|
||||||
// We must use the window reference passed to us to access this origins stylesheets
|
|
||||||
var styleSheets = $wbwindow.document.styleSheets;
|
for (var i = 0; i < sheetLen; i++) {
|
||||||
for (var i = 0; i < styleSheets.length; i++) {
|
var sheet = styleSheets[i];
|
||||||
sheet = styleSheets[i];
|
|
||||||
// if the sheet belongs to our parser node we must skip it
|
// if the sheet belongs to our parser node we must skip it
|
||||||
if (!this.shouldSkipSheet(sheet)) {
|
if (!this.shouldSkipSheet(sheet)) {
|
||||||
try {
|
try {
|
||||||
// if no error is thrown due to cross origin sheet the urls then just add
|
// if no error is thrown due to cross origin sheet the urls then just add
|
||||||
// the resolved URLS if any to the media urls array
|
// the resolved URLS if any to the media urls array
|
||||||
if (sheet.cssRules != null) {
|
if (sheet.cssRules || sheet.rules) {
|
||||||
resolve = sheet.href || $wbwindow.document.baseURI;
|
var extracted = this.extractMediaRules(sheet, doc.baseURI);
|
||||||
media = media.concat(this.extractMediaRules(sheet.cssRules, resolve));
|
if (extracted.length) {
|
||||||
|
media = media.concat(extracted);
|
||||||
|
}
|
||||||
} else if (sheet.href != null) {
|
} else if (sheet.href != null) {
|
||||||
// depending on the browser cross origin stylesheets will have their
|
// depending on the browser cross origin stylesheets will have their
|
||||||
// cssRules property null but href non-null
|
// cssRules property null but href non-null
|
||||||
deferredMediaURLS.push(this.corsCSSFetch(sheet.href));
|
deferredMediaExtraction.push(this.fetchCSSAndExtract(sheet.href));
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
// the stylesheet is cross origin and we must re-fetch via PYWB to get the contents for checking
|
// the stylesheet is cross origin and we must re-fetch via PYWB to get the contents for checking
|
||||||
deferredMediaURLS.push(this.corsCSSFetch(sheet.href));
|
if (sheet.href != null) {
|
||||||
|
deferredMediaExtraction.push(this.fetchCSSAndExtract(sheet.href));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// We must use the window reference passed to us to access this origins elements with srcset attr
|
|
||||||
// like cssRule handling we must include a URL to resolve relative URLs by
|
if (media.length) {
|
||||||
var results = this.getImgAVElems();
|
// send
|
||||||
var msg = { 'type': 'values' };
|
this.postMessage({'type': 'values', 'media': media});
|
||||||
// send what we have extracted, if anything, to the worker for processing
|
|
||||||
if (media.length > 0) {
|
|
||||||
msg.media = media;
|
|
||||||
}
|
|
||||||
if (results.srcset) {
|
|
||||||
msg.srcset = results.srcset;
|
|
||||||
}
|
|
||||||
if (results.src) {
|
|
||||||
msg.src = results.src;
|
|
||||||
}
|
|
||||||
if (msg.media || msg.srcset || msg.src) {
|
|
||||||
this.postMessage(msg);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (deferredMediaURLS.length > 0) {
|
if (deferredMediaExtraction.length) {
|
||||||
// wait for all our deferred fetching and extraction of cross origin
|
// wait for all our deferred fetching and extraction of cross origin
|
||||||
// stylesheets to complete and then send those values, if any, to the worker
|
// stylesheets to complete and then send those values, if any, to the worker
|
||||||
var aaw = this;
|
var afwpm = this;
|
||||||
Promise.all(deferredMediaURLS).then(function (values) {
|
Promise.all(deferredMediaExtraction).then(function (results) {
|
||||||
var results = [];
|
if (results.length === 0) return;
|
||||||
while (values.length > 0) {
|
var len = results.length;
|
||||||
results = results.concat(values.shift());
|
var media = [];
|
||||||
}
|
for (var i = 0; i < len; ++i) {
|
||||||
if (results.length > 0) {
|
media = media.concat(results[i]);
|
||||||
aaw.postMessage({'type': 'values', 'media': results});
|
|
||||||
}
|
}
|
||||||
|
afwpm.postMessage({'type': 'values', 'media': media });
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
AutoFetchWorkerProxyMode.prototype.extractFromLocalDoc = function () {
|
||||||
|
// check for data-[src,srcset] and auto-fetched elems with srcset first
|
||||||
|
this.extractSrcSrcsetFrom($wbwindow.document, $wbwindow.document.baseURI);
|
||||||
|
// we must use the window reference passed to us to access this origins stylesheets
|
||||||
|
this.checkStyleSheets($wbwindow.document);
|
||||||
|
};
|
||||||
|
|
||||||
WBAutoFetchWorker = new AutoFetchWorkerProxyMode();
|
WBAutoFetchWorker = new AutoFetchWorkerProxyMode();
|
||||||
|
|
||||||
|
// expose AutoFetchWorkerProxyMode
|
||||||
|
Object.defineProperty(window, '$WBAutoFetchWorker$', {
|
||||||
|
'enumerable': false,
|
||||||
|
'value': WBAutoFetchWorker
|
||||||
|
});
|
||||||
|
|
||||||
if (isTop) {
|
if (isTop) {
|
||||||
$wbwindow.addEventListener("message", function (event) {
|
$wbwindow.addEventListener("message", function (event) {
|
||||||
if (event.data && event.data.wb_type === 'aaworker') {
|
if (event.data && event.data.wb_type === 'aaworker') {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user