mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
enabled auto-fetching of video, audio resources in wombat in non-proxy mode and proxy mode (#427)
This commit is contained in:
parent
3235c382a5
commit
323edcf47c
@ -3,6 +3,12 @@
|
||||
var STYLE_REGEX = /(url\s*\(\s*[\\"']*)([^)'"]+)([\\"']*\s*\))/gi;
|
||||
var IMPORT_REGEX = /(@import\s+[\\"']*)([^)'";]+)([\\"']*\s*;?)/gi;
|
||||
var srcsetSplit = /\s*(\S*\s+[\d.]+[wx]),|(?:\s*,(?:\s+|(?=https?:)))/;
|
||||
var DefaultNumImFetches = 30;
|
||||
var FullImgQDrainLen = 10;
|
||||
var DefaultNumAvFetches = 5;
|
||||
var FullAVQDrainLen = 5;
|
||||
var DataURLPrefix = 'data:';
|
||||
|
||||
// the autofetcher instance for this worker
|
||||
var autofetcher = null;
|
||||
|
||||
@ -41,7 +47,7 @@ self.onmessage = function (event) {
|
||||
var data = event.data;
|
||||
switch (data.type) {
|
||||
case 'values':
|
||||
autofetcher.autofetchMediaSrcset(data);
|
||||
autofetcher.autoFetch(data);
|
||||
break;
|
||||
}
|
||||
};
|
||||
@ -53,53 +59,24 @@ function AutoFetcher(init) {
|
||||
this.prefix = init.prefix;
|
||||
this.mod = init.mod;
|
||||
this.prefixMod = init.prefix + init.mod;
|
||||
this.rwRe = new RegExp(init.rwRe);
|
||||
// relative url, WorkerLocation is set by owning document
|
||||
this.relative = init.prefix.split(location.origin)[1];
|
||||
// schemeless url
|
||||
this.schemeless = '/' + this.relative;
|
||||
// local cache of URLs fetched, to reduce server load
|
||||
this.seen = {};
|
||||
// array of URL to be fetched
|
||||
// array of URLs to be fetched
|
||||
this.queue = [];
|
||||
this.avQueue = [];
|
||||
// should we queue a URL or not
|
||||
this.queuing = false;
|
||||
this.queuingAV = false;
|
||||
this.urlExtractor = this.urlExtractor.bind(this);
|
||||
this.fetchDone = this.fetchDone.bind(this);
|
||||
this.imgFetchDone = this.imgFetchDone.bind(this);
|
||||
this.avFetchDone = this.avFetchDone.bind(this);
|
||||
}
|
||||
|
||||
AutoFetcher.prototype.fixupURL = function (url) {
|
||||
// attempt to fix up the url and do our best to ensure we can get dat 200 OK!
|
||||
if (url.indexOf(this.prefixMod) === 0) {
|
||||
return url;
|
||||
}
|
||||
if (url.indexOf(this.relative) === 0) {
|
||||
return url.replace(this.relative, this.prefix);
|
||||
}
|
||||
if (url.indexOf(this.schemeless) === 0) {
|
||||
return url.replace(this.schemeless, this.prefix);
|
||||
}
|
||||
if (url.indexOf(this.prefix) !== 0) {
|
||||
return this.prefix + url;
|
||||
}
|
||||
return url;
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.queueURL = function (url) {
|
||||
// ensure we do not request data urls
|
||||
if (url.indexOf('data:') === 0) return;
|
||||
// check to see if we have seen this url before in order
|
||||
// to lessen the load against the server content is fetched from
|
||||
if (this.seen[url] != null) return;
|
||||
this.seen[url] = true;
|
||||
this.queue.push(url);
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.urlExtractor = function (match, n1, n2, n3, offset, string) {
|
||||
// Same function as style_replacer in wombat.rewrite_style, n2 is our URL
|
||||
this.queueURL(this.fixupURL(n2));
|
||||
return n1 + n2 + n3;
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.delay = function () {
|
||||
// 2 second delay seem reasonable
|
||||
return new Promise(function (resolve, reject) {
|
||||
@ -107,47 +84,105 @@ AutoFetcher.prototype.delay = function () {
|
||||
});
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.fetchDone = function () {
|
||||
this.queuing = false;
|
||||
AutoFetcher.prototype.imgFetchDone = function () {
|
||||
if (this.queue.length > 0) {
|
||||
// we have a Q of some length drain it
|
||||
var autofetcher = this;
|
||||
this.delay().then(function () {
|
||||
autofetcher.fetchAll();
|
||||
autofetcher.queuing = false;
|
||||
autofetcher.fetchImgs();
|
||||
});
|
||||
} else {
|
||||
this.queuing = false;
|
||||
}
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.fetchAll = function () {
|
||||
AutoFetcher.prototype.avFetchDone = function () {
|
||||
if (this.avQueue.length > 0) {
|
||||
// we have a Q of some length drain it
|
||||
var autofetcher = this;
|
||||
this.delay().then(function () {
|
||||
autofetcher.queuingAV = false;
|
||||
autofetcher.fetchAV();
|
||||
});
|
||||
} else {
|
||||
this.queuingAV = false;
|
||||
}
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.fetchAV = function () {
|
||||
if (this.queuingAV || this.avQueue.length === 0) {
|
||||
return;
|
||||
}
|
||||
// the number of fetches is limited to a maximum of DefaultNumAvFetches + FullAVQDrainLen outstanding fetches
|
||||
// the baseline maximum number of fetches is DefaultNumAvFetches but if the size(avQueue) <= FullAVQDrainLen
|
||||
// we add them to the current batch. Because audio video resources might be big
|
||||
// we limit how many we fetch at a time drastically
|
||||
this.queuingAV = true;
|
||||
var runningFetchers = [];
|
||||
while (this.avQueue.length > 0 && runningFetchers.length <= DefaultNumAvFetches) {
|
||||
runningFetchers.push(fetch(this.avQueue.shift()).catch(noop))
|
||||
}
|
||||
if (this.avQueue.length <= FullAVQDrainLen) {
|
||||
while (this.avQueue.length > 0) {
|
||||
runningFetchers.push(fetch(this.avQueue.shift()).catch(noop))
|
||||
}
|
||||
}
|
||||
Promise.all(runningFetchers)
|
||||
.then(this.avFetchDone)
|
||||
.catch(this.avFetchDone);
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.fetchImgs = function () {
|
||||
if (this.queuing || this.queue.length === 0) {
|
||||
return;
|
||||
}
|
||||
// the number of fetches is limited to a maximum of 60 outstanding fetches
|
||||
// the baseline maximum number of fetches is 50 but if the size(queue) <= 10
|
||||
// the number of fetches is limited to a maximum of DefaultNumImFetches + FullImgQDrainLen outstanding fetches
|
||||
// the baseline maximum number of fetches is DefaultNumImFetches but if the size(queue) <= FullImgQDrainLen
|
||||
// we add them to the current batch
|
||||
this.queuing = true;
|
||||
var runningFetchers = [];
|
||||
while (this.queue.length > 0 && runningFetchers.length <= 50) {
|
||||
while (this.queue.length > 0 && runningFetchers.length <= DefaultNumImFetches) {
|
||||
runningFetchers.push(fetch(this.queue.shift()).catch(noop))
|
||||
}
|
||||
if (this.queue.length <= 10) {
|
||||
if (this.queue.length <= FullImgQDrainLen) {
|
||||
while (this.queue.length > 0) {
|
||||
runningFetchers.push(fetch(this.queue.shift()).catch(noop))
|
||||
}
|
||||
}
|
||||
Promise.all(runningFetchers)
|
||||
.then(this.fetchDone)
|
||||
.catch(this.fetchDone);
|
||||
.then(this.imgFetchDone)
|
||||
.catch(this.imgFetchDone);
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.extractMedia = function (mediaRules) {
|
||||
// this is a broken down rewrite_style
|
||||
if (mediaRules == null || mediaRules.values === null) return;
|
||||
var rules = mediaRules.values;
|
||||
for (var i = 0; i < rules.length; i++) {
|
||||
var rule = rules[i];
|
||||
rule.replace(STYLE_REGEX, this.urlExtractor)
|
||||
.replace(IMPORT_REGEX, this.urlExtractor);
|
||||
AutoFetcher.prototype.queueNonAVURL = function (url) {
|
||||
// ensure we do not request data urls
|
||||
if (url.indexOf(DataURLPrefix) === 0) return;
|
||||
// check to see if we have seen this url before in order
|
||||
// to lessen the load against the server content is fetched from
|
||||
if (this.seen[url] != null) return;
|
||||
this.seen[url] = true;
|
||||
this.queue.push(url);
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.queueAVURL = function (url) {
|
||||
// ensure we do not request data urls
|
||||
if (url.indexOf(DataURLPrefix) === 0) return;
|
||||
// check to see if we have seen this url before in order
|
||||
// to lessen the load against the server content is fetched from
|
||||
if (this.seen[url] != null) return;
|
||||
this.seen[url] = true;
|
||||
this.avQueue.push(url);
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.maybeResolveURL = function (url, base) {
|
||||
// given a url and base url returns a resolved full URL or
|
||||
// null if resolution was unsuccessful
|
||||
try {
|
||||
var _url = new URL(url, base);
|
||||
return _url.href;
|
||||
} catch (e) {
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
@ -163,85 +198,149 @@ AutoFetcher.prototype.maybeFixUpRelSchemelessPrefix = function (url) {
|
||||
return null;
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.maybeResolveURL = function (url, base) {
|
||||
// given a url and base url returns a resolved full URL or
|
||||
// null if resolution was unsuccessful
|
||||
try {
|
||||
var _url = new URL(url, base);
|
||||
return _url.href;
|
||||
} catch (e) {
|
||||
return null;
|
||||
AutoFetcher.prototype.maybeFixUpURL = function (url, resolveOpts) {
|
||||
// attempt to fix up the url and do our best to ensure we can get dat 200 OK!
|
||||
if (this.rwRe.test(url)) {
|
||||
return url;
|
||||
}
|
||||
var mod = resolveOpts.mod || 'mp_';
|
||||
// first check for / (relative) or // (schemeless) rewritten urls
|
||||
var maybeFixed = this.maybeFixUpRelSchemelessPrefix(url);
|
||||
if (maybeFixed != null) {
|
||||
return maybeFixed;
|
||||
}
|
||||
// resolve URL against tag src
|
||||
if (resolveOpts.tagSrc != null) {
|
||||
maybeFixed = this.maybeResolveURL(url, resolveOpts.tagSrc);
|
||||
if (maybeFixed != null) {
|
||||
return this.prefix + mod + '/' + maybeFixed;
|
||||
}
|
||||
}
|
||||
// finally last attempt resolve the originating documents base URI
|
||||
if (resolveOpts.docBaseURI) {
|
||||
maybeFixed = this.maybeResolveURL(url, resolveOpts.docBaseURI);
|
||||
if (maybeFixed != null) {
|
||||
return this.prefix + mod + '/' + maybeFixed;
|
||||
}
|
||||
}
|
||||
// not much to do now.....
|
||||
return this.prefixMod + '/' + url;
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.urlExtractor = function (match, n1, n2, n3, offset, string) {
|
||||
// Same function as style_replacer in wombat.rewrite_style, n2 is our URL
|
||||
this.queueNonAVURL(n2);
|
||||
return n1 + n2 + n3;
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.handleMedia = function (mediaRules) {
|
||||
// this is a broken down rewrite_style
|
||||
if (mediaRules == null || mediaRules.length === 0) return;
|
||||
// var rules = mediaRules.values;
|
||||
for (var i = 0; i < mediaRules.length; i++) {
|
||||
mediaRules[i]
|
||||
.replace(STYLE_REGEX, this.urlExtractor)
|
||||
.replace(IMPORT_REGEX, this.urlExtractor);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
AutoFetcher.prototype.fixupURLSrcSet = function (url, tagSrc, context) {
|
||||
// attempt to fix up the url and do our best to ensure we can get dat 200 OK!
|
||||
if (url.indexOf(this.prefix) !== 0) {
|
||||
// first check for / (relative) or // (schemeless) rewritten urls
|
||||
var maybeFixed = this.maybeFixUpRelSchemelessPrefix(url);
|
||||
if (maybeFixed != null) {
|
||||
return maybeFixed;
|
||||
AutoFetcher.prototype.handleSrc = function (srcValues, context) {
|
||||
var resolveOpts = { 'docBaseURI': context.docBaseURI };
|
||||
if (srcValues.value) {
|
||||
resolveOpts.mod = srcValues.mod;
|
||||
if (resolveOpts.mod === 1) {
|
||||
return this.queueNonAVURL(this.maybeFixUpURL(srcValues.value.trim(), resolveOpts));
|
||||
}
|
||||
// resolve URL against tag src
|
||||
if (tagSrc != null) {
|
||||
maybeFixed = this.maybeResolveURL(url, tagSrc);
|
||||
if (maybeFixed != null) {
|
||||
return this.prefix + 'im_/' + maybeFixed;
|
||||
return this.queueAVURL(this.maybeFixUpURL(srcValues.value.trim(), resolveOpts));
|
||||
}
|
||||
var len = srcValues.values.length;
|
||||
for (var i = 0; i < len; i++) {
|
||||
var value = srcValues.values[i];
|
||||
resolveOpts.mod = value.mod;
|
||||
if (resolveOpts.mod === 'im_') {
|
||||
this.queueNonAVURL(this.maybeFixUpURL(value.src, resolveOpts));
|
||||
} else {
|
||||
this.queueAVURL(this.maybeFixUpURL(value.src, resolveOpts));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.extractSrcSetNotPreSplit = function (ssV, resolveOpts) {
|
||||
// was from extract from local doc so we need to duplicate work
|
||||
var srcsetValues = ssV.split(srcsetSplit);
|
||||
for (var i = 0; i < srcsetValues.length; i++) {
|
||||
// grab the URL not width/height key
|
||||
if (srcsetValues[i]) {
|
||||
var value = srcsetValues[i].trim().split(' ')[0];
|
||||
var maybeResolvedURL = this.maybeFixUpURL(value.trim(), resolveOpts);
|
||||
if (resolveOpts.mod === 'im_') {
|
||||
this.queueNonAVURL(maybeResolvedURL);
|
||||
} else {
|
||||
this.queueAVURL(maybeResolvedURL);
|
||||
}
|
||||
}
|
||||
// finally last attempt resolve the originating documents base URI
|
||||
maybeFixed = this.maybeResolveURL(url, context.docBaseURI);
|
||||
if (maybeFixed != null) {
|
||||
return this.prefix + 'im_/' + maybeFixed;
|
||||
}
|
||||
// not much to do now.....
|
||||
return this.prefixMod + '/' + url;
|
||||
}
|
||||
return url;
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.extractSrcset = function (srcsets, context) {
|
||||
if (srcsets == null || srcsets.values == null) return;
|
||||
var srcsetValues = srcsets.values;
|
||||
if (!srcsets.presplit) {
|
||||
// was from extract from local doc so we need to duplicate work
|
||||
return this.srcsetNotPreSplit(srcsetValues, context);
|
||||
}
|
||||
// was rewrite_srcset so just ensure we just
|
||||
for (var i = 0; i < srcsetValues.length; i++) {
|
||||
// was rewrite_srcset and only need to q
|
||||
for (var i = 0; i < srcsets.length; i++) {
|
||||
// grab the URL not width/height key
|
||||
this.queueURL(srcsetValues[i].split(' ')[0]);
|
||||
}
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.srcsetNotPreSplit = function (values, context) {
|
||||
// was from extract from local doc so we need to duplicate work
|
||||
var j;
|
||||
for (var i = 0; i < values.length; i++) {
|
||||
var srcsetValues = values[i].srcset.split(srcsetSplit);
|
||||
var tagSrc = values[i].tagSrc;
|
||||
for (j = 0; j < srcsetValues.length; j++) {
|
||||
// grab the URL not width/height key
|
||||
if (Boolean(srcsetValues[j])) {
|
||||
var value = srcsetValues[j].trim().split(' ')[0];
|
||||
this.queueURL(this.fixupURLSrcSet(value, tagSrc, context));
|
||||
}
|
||||
var url = srcsets[i].split(' ')[0];
|
||||
if (context.mod === 'im_') {
|
||||
this.queueNonAVURL(url);
|
||||
} else {
|
||||
this.queueAVURL(url);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.autofetchMediaSrcset = function (data) {
|
||||
AutoFetcher.prototype.handleSrcset = function (srcset, context) {
|
||||
var resolveOpts = { 'docBaseURI': context.docBaseURI };
|
||||
if (srcset.value) {
|
||||
// we have a single value, this srcset came from either
|
||||
// preserveDataSrcset (not presplit) preserveSrcset (presplit)
|
||||
resolveOpts.mod = srcset.mod;
|
||||
if (!srcset.presplit) {
|
||||
// extract URLs from the srcset string
|
||||
return this.extractSrcSetNotPreSplit(srcset.value, resolveOpts);
|
||||
}
|
||||
// we have an array of srcset URL strings
|
||||
return this.extractSrcset(srcset.value, resolveOpts);
|
||||
}
|
||||
// we have an array of values, these srcsets came from extractFromLocalDoc
|
||||
var len = srcset.values.length;
|
||||
for (var i = 0; i < len; i++) {
|
||||
var ssv = srcset.values[i];
|
||||
resolveOpts.mod = ssv.mod;
|
||||
resolveOpts.tagSrc = ssv.tagSrc;
|
||||
this.extractSrcSetNotPreSplit(ssv.srcset, resolveOpts);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
AutoFetcher.prototype.autoFetch = function (data) {
|
||||
// we got a message and now we autofetch!
|
||||
// these calls turn into no ops if they have no work
|
||||
this.extractMedia(data.media);
|
||||
this.extractSrcset(data.srcset, data.context);
|
||||
this.fetchAll();
|
||||
if (data.media) {
|
||||
this.handleMedia(data.media);
|
||||
}
|
||||
|
||||
if (data.src) {
|
||||
this.handleSrc(data.src, data.context || {});
|
||||
}
|
||||
|
||||
if (data.srcset) {
|
||||
this.handleSrcset(data.srcset, data.context || {});
|
||||
}
|
||||
|
||||
this.fetchImgs();
|
||||
this.fetchAV();
|
||||
};
|
||||
|
||||
// initialize ourselves from the query params :)
|
||||
try {
|
||||
var loc = new self.URL(location);
|
||||
var loc = new self.URL(location.href);
|
||||
autofetcher = new AutoFetcher(JSON.parse(loc.searchParams.get('init')));
|
||||
} catch (e) {
|
||||
// likely we are in an older version of safari
|
||||
|
@ -3,6 +3,11 @@
|
||||
var STYLE_REGEX = /(url\s*\(\s*[\\"']*)([^)'"]+)([\\"']*\s*\))/gi;
|
||||
var IMPORT_REGEX = /(@import\s+[\\"']*)([^)'";]+)([\\"']*\s*;?)/gi;
|
||||
var srcsetSplit = /\s*(\S*\s+[\d.]+[wx]),|(?:\s*,(?:\s+|(?=https?:)))/;
|
||||
var DefaultNumImFetches = 30;
|
||||
var FullImgQDrainLen = 10;
|
||||
var DefaultNumAvFetches = 5;
|
||||
var FullAVQDrainLen = 5;
|
||||
var DataURLPrefix = 'data:';
|
||||
// the autofetcher instance for this worker
|
||||
var autofetcher = null;
|
||||
|
||||
@ -53,26 +58,119 @@ function AutoFetcher() {
|
||||
}
|
||||
// local cache of URLs fetched, to reduce server load
|
||||
this.seen = {};
|
||||
// array of URL to be fetched
|
||||
// array of URLs to be fetched
|
||||
this.queue = [];
|
||||
this.avQueue = [];
|
||||
// should we queue a URL or not
|
||||
this.queuing = false;
|
||||
// a URL to resolve relative URLs found in the cssText of CSSMedia rules.
|
||||
this.currentResolver = null;
|
||||
// should we queue a URL or not
|
||||
this.queuing = false;
|
||||
this.queuingAV = false;
|
||||
this.urlExtractor = this.urlExtractor.bind(this);
|
||||
this.fetchDone = this.fetchDone.bind(this);
|
||||
this.imgFetchDone = this.imgFetchDone.bind(this);
|
||||
this.avFetchDone = this.avFetchDone.bind(this);
|
||||
}
|
||||
|
||||
AutoFetcher.prototype.queueURL = function (url) {
|
||||
AutoFetcher.prototype.delay = function () {
|
||||
// 2 second delay seem reasonable
|
||||
return new Promise(function (resolve, reject) {
|
||||
setTimeout(resolve, 2000);
|
||||
});
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.imgFetchDone = function () {
|
||||
if (this.queue.length > 0) {
|
||||
// we have a Q of some length drain it
|
||||
var autofetcher = this;
|
||||
this.delay().then(function () {
|
||||
autofetcher.queuing = false;
|
||||
autofetcher.fetchImgs();
|
||||
});
|
||||
} else {
|
||||
this.queuing = false;
|
||||
}
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.avFetchDone = function () {
|
||||
if (this.avQueue.length > 0) {
|
||||
// we have a Q of some length drain it
|
||||
var autofetcher = this;
|
||||
this.delay().then(function () {
|
||||
autofetcher.queuingAV = false;
|
||||
autofetcher.fetchAV();
|
||||
});
|
||||
} else {
|
||||
this.queuingAV = false;
|
||||
}
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.fetchAV = function () {
|
||||
if (this.queuingAV || this.avQueue.length === 0) {
|
||||
return;
|
||||
}
|
||||
// the number of fetches is limited to a maximum of DefaultNumAvFetches + FullAVQDrainLen outstanding fetches
|
||||
// the baseline maximum number of fetches is DefaultNumAvFetches but if the size(avQueue) <= FullAVQDrainLen
|
||||
// we add them to the current batch. Because audio video resources might be big
|
||||
// we limit how many we fetch at a time drastically
|
||||
this.queuingAV = true;
|
||||
var runningFetchers = [];
|
||||
while (this.avQueue.length > 0 && runningFetchers.length <= DefaultNumAvFetches) {
|
||||
runningFetchers.push(fetch(this.avQueue.shift()).catch(noop))
|
||||
}
|
||||
if (this.avQueue.length <= FullAVQDrainLen) {
|
||||
while (this.avQueue.length > 0) {
|
||||
runningFetchers.push(fetch(this.avQueue.shift()).catch(noop))
|
||||
}
|
||||
}
|
||||
Promise.all(runningFetchers)
|
||||
.then(this.avFetchDone)
|
||||
.catch(this.avFetchDone);
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.fetchImgs = function () {
|
||||
if (this.queuing || this.queue.length === 0) {
|
||||
return;
|
||||
}
|
||||
// the number of fetches is limited to a maximum of DefaultNumImFetches + FullImgQDrainLen outstanding fetches
|
||||
// the baseline maximum number of fetches is DefaultNumImFetches but if the size(queue) <= FullImgQDrainLen
|
||||
// we add them to the current batch
|
||||
this.queuing = true;
|
||||
var runningFetchers = [];
|
||||
while (this.queue.length > 0 && runningFetchers.length <= DefaultNumImFetches) {
|
||||
runningFetchers.push(fetch(this.queue.shift()).catch(noop))
|
||||
}
|
||||
if (this.queue.length <= FullImgQDrainLen) {
|
||||
while (this.queue.length > 0) {
|
||||
runningFetchers.push(fetch(this.queue.shift()).catch(noop))
|
||||
}
|
||||
}
|
||||
Promise.all(runningFetchers)
|
||||
.then(this.imgFetchDone)
|
||||
.catch(this.imgFetchDone);
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.queueNonAVURL = function (url) {
|
||||
// ensure we do not request data urls
|
||||
if (url.indexOf('data:') === 0) return;
|
||||
if (url.indexOf(DataURLPrefix) === 0) return;
|
||||
// check to see if we have seen this url before in order
|
||||
// to lessen the load against the server content is autofetchd from
|
||||
// to lessen the load against the server content is fetched from
|
||||
if (this.seen[url] != null) return;
|
||||
this.seen[url] = true;
|
||||
this.queue.push(url);
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.queueAVURL = function (url) {
|
||||
// ensure we do not request data urls
|
||||
if (url.indexOf(DataURLPrefix) === 0) return;
|
||||
// check to see if we have seen this url before in order
|
||||
// to lessen the load against the server content is fetched from
|
||||
if (this.seen[url] != null) return;
|
||||
this.seen[url] = true;
|
||||
this.avQueue.push(url);
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.safeResolve = function (url, resolver) {
|
||||
// Guard against the exception thrown by the URL constructor if the URL or resolver is bad
|
||||
// if resolver is undefined/null then this function passes url through
|
||||
@ -95,52 +193,11 @@ AutoFetcher.prototype.urlExtractor = function (match, n1, n2, n3, offset, string
|
||||
// (resolvedURL will be undefined if an error occurred)
|
||||
var resolvedURL = this.safeResolve(n2, this.currentResolver);
|
||||
if (resolvedURL) {
|
||||
this.queueURL(resolvedURL);
|
||||
this.queueNonAVURL(resolvedURL);
|
||||
}
|
||||
return n1 + n2 + n3;
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.delay = function () {
|
||||
// 2 second delay seem reasonable
|
||||
return new Promise(function (resolve, reject) {
|
||||
setTimeout(resolve, 2000);
|
||||
});
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.fetchDone = function () {
|
||||
this.queuing = false;
|
||||
if (this.queue.length > 0) {
|
||||
// we have a Q of some length drain it
|
||||
var autofetcher = this;
|
||||
// wait 2 seconds before doing another batch
|
||||
this.delay().then(function () {
|
||||
autofetcher.fetchAll();
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.fetchAll = function () {
|
||||
if (this.queuing || this.queue.length === 0) {
|
||||
return;
|
||||
}
|
||||
// the number of fetches is limited to a maximum of 60 outstanding fetches
|
||||
// the baseline maximum number of fetches is 50 but if the size(queue) <= 10
|
||||
// we add them to the current batch this.queuing = true;
|
||||
this.queuing = true;
|
||||
var runningFetchers = [];
|
||||
while (this.queue.length > 0 && runningFetchers.length <= 50) {
|
||||
runningFetchers.push(fetch(this.queue.shift()).catch(noop))
|
||||
}
|
||||
if (this.queue.length <= 10) {
|
||||
while (this.queue.length > 0) {
|
||||
runningFetchers.push(fetch(this.queue.shift()).catch(noop))
|
||||
}
|
||||
}
|
||||
Promise.all(runningFetchers)
|
||||
.then(this.fetchDone)
|
||||
.catch(this.fetchDone);
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.extractMedia = function (mediaRules) {
|
||||
// this is a broken down rewrite_style
|
||||
if (mediaRules == null) return;
|
||||
@ -165,13 +222,17 @@ AutoFetcher.prototype.extractSrcset = function (srcsets) {
|
||||
extractedSrcSet = srcsets[i];
|
||||
ssSplit = extractedSrcSet.srcset.split(srcsetSplit);
|
||||
for (j = 0; j < ssSplit.length; j++) {
|
||||
if (Boolean(ssSplit[j])) {
|
||||
if (ssSplit[j]) {
|
||||
srcsetValue = ssSplit[j].trim();
|
||||
if (srcsetValue.length > 0) {
|
||||
// resolve the URL in an exceptionless manner (resolvedURL will be undefined if an error occurred)
|
||||
var resolvedURL = this.safeResolve(srcsetValue.split(' ')[0], extractedSrcSet.resolve);
|
||||
if (resolvedURL) {
|
||||
this.queueURL(resolvedURL);
|
||||
if (extractedSrcSet.mod === 'im_') {
|
||||
this.queueNonAVURL(resolvedURL);
|
||||
} else {
|
||||
this.queueAVURL(resolvedURL);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -179,12 +240,34 @@ AutoFetcher.prototype.extractSrcset = function (srcsets) {
|
||||
}
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.extractSrc = function (srcVals) {
|
||||
// preservation worker in proxy mode sends us the value of the srcset attribute of an element
|
||||
// and a URL to correctly resolve relative URLS. Thus we must recreate rewrite_srcset logic here
|
||||
if (srcVals == null || srcVals.length === 0) return;
|
||||
var length = srcVals.length;
|
||||
var srcVal;
|
||||
for (var i = 0; i < length; i++) {
|
||||
srcVal = srcVals[i];
|
||||
var resolvedURL = this.safeResolve(srcVal.src, srcVal.resolve);
|
||||
if (resolvedURL) {
|
||||
if (srcVal.mod === 'im_') {
|
||||
this.queueNonAVURL(resolvedURL);
|
||||
} else {
|
||||
this.queueAVURL(resolvedURL);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
AutoFetcher.prototype.autofetchMediaSrcset = function (data) {
|
||||
// we got a message and now we autofetch!
|
||||
// these calls turn into no ops if they have no work
|
||||
this.extractMedia(data.media);
|
||||
this.extractSrcset(data.srcset);
|
||||
this.fetchAll();
|
||||
this.extractSrc(data.src);
|
||||
this.fetchImgs();
|
||||
this.fetchAV();
|
||||
};
|
||||
|
||||
autofetcher = new AutoFetcher();
|
||||
|
@ -186,13 +186,30 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
}
|
||||
}
|
||||
|
||||
function isImageSrcset(elem) {
|
||||
if (elem.tagName === 'IMG') return true;
|
||||
return elem.tagName === 'SOURCE' && elem.parentElement && elem.parentElement.tagName === 'PICTURE';
|
||||
function isSavedSrcSrcset(elem) {
|
||||
// returns true or false to indicate if the supplied element may have attributes that are auto-fetched
|
||||
switch (elem.tagName) {
|
||||
case 'IMG':
|
||||
case 'VIDEO':
|
||||
case 'AUDIO':
|
||||
return true;
|
||||
case 'SOURCE':
|
||||
if (!elem.parentElement) return false;
|
||||
switch (elem.parentElement.tagName) {
|
||||
case 'PICTURE':
|
||||
case 'VIDEO':
|
||||
case 'AUDIO':
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function isImageDataSrcset(elem) {
|
||||
if (isImageSrcset(elem)) return elem.dataset.srcset != null;
|
||||
function isSavedDataSrcSrcset(elem) {
|
||||
if (elem.dataset.srcset != null) return isSavedSrcSrcset(elem);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -1162,7 +1179,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
} else if (lowername == "style") {
|
||||
value = rewrite_style(value);
|
||||
} else if (lowername == "srcset") {
|
||||
value = rewrite_srcset(value, isImageSrcset(this));
|
||||
value = rewrite_srcset(value, this);
|
||||
}
|
||||
}
|
||||
orig_setAttribute.call(this, name, value);
|
||||
@ -1347,25 +1364,35 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
}
|
||||
|
||||
//============================================
|
||||
function initAutoFetchWorker() {
|
||||
function initAutoFetchWorker(rwRe) {
|
||||
if (!wbUseAFWorker) {
|
||||
return;
|
||||
}
|
||||
|
||||
var isTop = $wbwindow === $wbwindow.__WB_replay_top;
|
||||
|
||||
function AutoFetchWorker(prefix, mod) {
|
||||
function AutoFetchWorker(opts) {
|
||||
if (!(this instanceof AutoFetchWorker)) {
|
||||
return new AutoFetchWorker(prefix, mod);
|
||||
return new AutoFetchWorker(opts);
|
||||
}
|
||||
this.checkIntervalCB = this.checkIntervalCB.bind(this);
|
||||
// specifically target the elements we desire
|
||||
this.elemSelector = ['img', 'source', 'video', 'audio'].map(function (which) {
|
||||
if (which === 'source') {
|
||||
return ['picture > ', 'video > ', 'audio >'].map(function (parent) {
|
||||
return parent + which + '[srcset], ' + parent + which + '[data-srcset], ' + parent + which + '[data-src]'
|
||||
}).join(', ');
|
||||
} else {
|
||||
return which + '[srcset], ' + which + '[data-srcset], ' + which + '[data-src]';
|
||||
}
|
||||
}).join(', ');
|
||||
|
||||
if (isTop) {
|
||||
// we are top and can will own this worker
|
||||
// setup URL for the kewl case
|
||||
// Normal replay and preservation mode pworker setup, its all one origin so YAY!
|
||||
var workerURL = wbinfo.static_prefix +
|
||||
var workerURL = (wbinfo.auto_fetch_worker_prefix || wbinfo.static_prefix) +
|
||||
'autoFetchWorker.js?init='+
|
||||
encodeURIComponent(JSON.stringify({ 'mod': mod, 'prefix': prefix }));
|
||||
encodeURIComponent(JSON.stringify(opts));
|
||||
this.worker = new $wbwindow.Worker(workerURL);
|
||||
} else {
|
||||
// add only the portions of the worker interface we use since we are not top and if in proxy mode start check polling
|
||||
@ -1381,20 +1408,17 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
}
|
||||
}
|
||||
|
||||
AutoFetchWorker.prototype.checkIntervalCB = function () {
|
||||
this.extractFromLocalDoc();
|
||||
};
|
||||
|
||||
AutoFetchWorker.prototype.deferredSheetExtraction = function (sheet) {
|
||||
var rules = sheet.cssRules || sheet.rules;
|
||||
// if no rules this a no op
|
||||
if (!rules || rules.length === 0) return;
|
||||
var self = this;
|
||||
function extract() {
|
||||
var afw = this;
|
||||
// defer things until next time the Promise.resolve Qs are cleared
|
||||
$wbwindow.Promise.resolve().then(function () {
|
||||
// loop through each rule of the stylesheet
|
||||
var media = [];
|
||||
for (var j = 0; j < rules.length; ++j) {
|
||||
var rule = rules[j];
|
||||
for (var i = 0; i < rules.length; ++i) {
|
||||
var rule = rules[i];
|
||||
if (rule.type === CSSRule.MEDIA_RULE) {
|
||||
// we are a media rule so get its text
|
||||
media.push(rule.cssText);
|
||||
@ -1402,11 +1426,9 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
}
|
||||
if (media.length > 0) {
|
||||
// we have some media rules to preserve
|
||||
self.preserveMedia(media);
|
||||
afw.preserveMedia(media);
|
||||
}
|
||||
}
|
||||
// defer things until next time the Promise.resolve Qs are cleared
|
||||
$wbwindow.Promise.resolve().then(extract);
|
||||
});
|
||||
};
|
||||
|
||||
AutoFetchWorker.prototype.terminate = function () {
|
||||
@ -1416,29 +1438,29 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
|
||||
AutoFetchWorker.prototype.postMessage = function (msg, deferred) {
|
||||
if (deferred) {
|
||||
var self = this;
|
||||
var afw = this;
|
||||
return Promise.resolve().then(function () {
|
||||
self.worker.postMessage(msg);
|
||||
afw.worker.postMessage(msg);
|
||||
});
|
||||
}
|
||||
this.worker.postMessage(msg);
|
||||
};
|
||||
|
||||
AutoFetchWorker.prototype.preserveSrcset = function (srcset) {
|
||||
AutoFetchWorker.prototype.preserveSrcset = function (srcset, mod) {
|
||||
// send values from rewrite_srcset to the worker deferred
|
||||
// to ensure the page viewer sees the images first
|
||||
this.postMessage({
|
||||
'type': 'values',
|
||||
'srcset': {'values': srcset, 'presplit': true},
|
||||
'srcset': { 'value': srcset, 'mod': mod, 'presplit': true },
|
||||
}, true);
|
||||
};
|
||||
|
||||
AutoFetchWorker.prototype.preserveDataSrcset = function (srcset) {
|
||||
AutoFetchWorker.prototype.preserveDataSrcset = function (elem) {
|
||||
// send values from rewrite_attr srcset to the worker deferred
|
||||
// to ensure the page viewer sees the images first
|
||||
this.postMessage({
|
||||
'type': 'values',
|
||||
'srcset': {'values': srcset, 'presplit': false},
|
||||
'srcset': {'value': elem.dataset.srcset, 'mod': this.rwMod(elem), 'presplit': false},
|
||||
}, true);
|
||||
};
|
||||
|
||||
@ -1447,91 +1469,86 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
this.postMessage({'type': 'values', 'media': media}, true);
|
||||
};
|
||||
|
||||
AutoFetchWorker.prototype.extractSrcset = function (elem) {
|
||||
AutoFetchWorker.prototype.getSrcset = function (elem) {
|
||||
if (wb_getAttribute) {
|
||||
return wb_getAttribute.call(elem, 'srcset');
|
||||
}
|
||||
return elem.getAttribute('srcset');
|
||||
};
|
||||
|
||||
AutoFetchWorker.prototype.checkForPictureSourceDataSrcsets = function () {
|
||||
var dataSS = $wbwindow.document.querySelectorAll('img[data-srcset], source[data-srcset]');
|
||||
var elem;
|
||||
var srcset = [];
|
||||
for (var i = 0; i < dataSS.length; i++) {
|
||||
elem = dataSS[i];
|
||||
if (elem.tagName === 'SOURCE') {
|
||||
if (elem.parentElement && elem.parentElement.tagName === 'PICTURE' && elem.dataset.srcset) {
|
||||
srcset.push({srcset: elem.dataset.srcset});
|
||||
}
|
||||
} else if (elem.dataset.srcset) {
|
||||
srcset.push({srcset: elem.dataset.srcset});
|
||||
}
|
||||
}
|
||||
if (srcset.length) {
|
||||
this.postMessage({
|
||||
'type': 'values',
|
||||
'srcset': {'values': srcset, 'presplit': false},
|
||||
'context': {
|
||||
'docBaseURI': $wbwindow.document.baseURI
|
||||
}
|
||||
}, true);
|
||||
}
|
||||
};
|
||||
|
||||
AutoFetchWorker.prototype.extractImgPictureSourceSrcsets = function () {
|
||||
var i;
|
||||
var elem = null;
|
||||
var srcset = [];
|
||||
var ssElements = $wbwindow.document.querySelectorAll('img[srcset], source[srcset]');
|
||||
for (i = 0; i < ssElements.length; i++) {
|
||||
elem = ssElements[i];
|
||||
if (elem.tagName === 'SOURCE') {
|
||||
if (elem.parentElement && elem.parentElement.tagName === 'PICTURE') {
|
||||
srcset.push({srcset: this.extractSrcset(elem)});
|
||||
}
|
||||
} else {
|
||||
srcset.push({tagSrc: elem.src, srcset: this.extractSrcset(elem)});
|
||||
}
|
||||
}
|
||||
return srcset;
|
||||
AutoFetchWorker.prototype.rwMod = function (elem) {
|
||||
return elem.tagName === "SOURCE" ?
|
||||
elem.parentElement.tagName === "PICTURE" ? 'im_' : 'oe_'
|
||||
: elem.tagName === "IMG" ? 'im_' : 'oe_';
|
||||
};
|
||||
|
||||
AutoFetchWorker.prototype.extractFromLocalDoc = function () {
|
||||
// get the values to be preserved from the documents stylesheets
|
||||
// and all elements with a srcset
|
||||
var media = [];
|
||||
var sheets = $wbwindow.document.styleSheets;
|
||||
var i = 0;
|
||||
for (; i < sheets.length; ++i) {
|
||||
var rules = sheets[i].cssRules;
|
||||
for (var j = 0; j < rules.length; ++j) {
|
||||
var rule = rules[j];
|
||||
if (rule.type === CSSRule.MEDIA_RULE) {
|
||||
media.push(rule.cssText);
|
||||
// get the values to be preserved from the documents stylesheets
|
||||
// and all img, video, audio elements with (data-)?srcset or data-src
|
||||
var afw = this;
|
||||
Promise.resolve().then(function () {
|
||||
var msg = { 'type': 'values', 'context': { 'docBaseURI': $wbwindow.document.baseURI } };
|
||||
var media = [];
|
||||
var i = 0;
|
||||
var sheets = $wbwindow.document.styleSheets;
|
||||
for (; i < sheets.length; ++i) {
|
||||
var rules = sheets[i].cssRules;
|
||||
for (var j = 0; j < rules.length; ++j) {
|
||||
var rule = rules[j];
|
||||
if (rule.type === CSSRule.MEDIA_RULE) {
|
||||
media.push(rule.cssText);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
var srcset = this.extractImgPictureSourceSrcsets();
|
||||
// send the extracted values to the worker deferred
|
||||
// to ensure the page viewer sees the images first
|
||||
this.postMessage({
|
||||
'type': 'values',
|
||||
'media': media,
|
||||
'srcset': {'values': srcset, 'presplit': false},
|
||||
'context': {
|
||||
'docBaseURI': $wbwindow.document.baseURI
|
||||
var elems = $wbwindow.document.querySelectorAll(afw.elemSelector);
|
||||
var srcset = { 'values': [], 'presplit': false };
|
||||
var src = { 'values': [] };
|
||||
var elem, srcv, mod;
|
||||
for (i = 0; i < elems.length; ++i) {
|
||||
elem = elems[i];
|
||||
// we want the original src value in order to resolve URLs in the worker when needed
|
||||
srcv = elem.src ? elem.src : null;
|
||||
// a from value of 1 indicates images and a 2 indicates audio/video
|
||||
mod = afw.rwMod(elem);
|
||||
if (elem.srcset) {
|
||||
srcset.values.push({
|
||||
'srcset': afw.getSrcset(elem),
|
||||
'mod': mod,
|
||||
'tagSrc': srcv
|
||||
});
|
||||
}
|
||||
if (elem.dataset.srcset) {
|
||||
srcset.values.push({
|
||||
'srcset': elem.dataset.srcset,
|
||||
'mod': mod,
|
||||
'tagSrc': srcv
|
||||
});
|
||||
}
|
||||
if (elem.dataset.src) {
|
||||
src.values.push({'src': elem.dataset.src, 'mod': mod});
|
||||
}
|
||||
if (elem.tagName === "SOURCE" && srcv) {
|
||||
src.values.push({'src': srcv, 'mod': mod});
|
||||
}
|
||||
}
|
||||
if (media.length) {
|
||||
msg.media = media;
|
||||
}
|
||||
if (srcset.values.length) {
|
||||
msg.srcset = srcset;
|
||||
}
|
||||
if (src.values.length) {
|
||||
msg.src = src;
|
||||
}
|
||||
if (msg.media || msg.srcset || msg.src) {
|
||||
afw.postMessage(msg);
|
||||
}
|
||||
}, true);
|
||||
// deffer the checking of img/source data-srcset
|
||||
// so that we do not clobber the UI thread
|
||||
var self = this;
|
||||
Promise.resolve().then(function () {
|
||||
self.checkForPictureSourceDataSrcsets();
|
||||
});
|
||||
};
|
||||
|
||||
WBAutoFetchWorker = new AutoFetchWorker(wb_abs_prefix, wbinfo.mod);
|
||||
WBAutoFetchWorker = new AutoFetchWorker({
|
||||
'prefix': wb_abs_prefix, 'mod': wbinfo.mod, 'rwRe': rwRe
|
||||
});
|
||||
|
||||
wbSheetMediaQChecker = function checkStyle() {
|
||||
// used only for link[rel='stylesheet'] so we remove our listener
|
||||
@ -1680,7 +1697,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
} else if (name == "style") {
|
||||
new_value = rewrite_style(value);
|
||||
} else if (name == "srcset") {
|
||||
new_value = rewrite_srcset(value, isImageSrcset(elem));
|
||||
new_value = rewrite_srcset(value, elem);
|
||||
} else {
|
||||
// Only rewrite if absolute url
|
||||
if (abs_url_only && !starts_with(value, VALID_PREFIXES)) {
|
||||
@ -1688,8 +1705,8 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
}
|
||||
var mod = rwModForElement(elem, name);
|
||||
new_value = rewrite_url(value, false, mod, elem.ownerDocument);
|
||||
if (wbUseAFWorker && isImageDataSrcset(elem)) {
|
||||
WBAutoFetchWorker.preserveDataSrcset(elem.dataset.srcset);
|
||||
if (wbUseAFWorker && isSavedDataSrcSrcset(elem)) {
|
||||
WBAutoFetchWorker.preserveDataSrcset(elem);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1704,7 +1721,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
function style_replacer(match, n1, n2, n3, offset, string) {
|
||||
return n1 + rewrite_url(n2) + n3;
|
||||
}
|
||||
|
||||
|
||||
function rewrite_style(value)
|
||||
{
|
||||
if (!value) {
|
||||
@ -1725,7 +1742,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
}
|
||||
|
||||
//============================================
|
||||
function rewrite_srcset(value, isImage)
|
||||
function rewrite_srcset(value, elem)
|
||||
{
|
||||
if (!value) {
|
||||
return "";
|
||||
@ -1738,9 +1755,9 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
values[i] = rewrite_url(values[i].trim());
|
||||
}
|
||||
|
||||
if (wbUseAFWorker && isImage) {
|
||||
if (wbUseAFWorker && isSavedSrcSrcset(elem)) {
|
||||
// send post split values to preservation worker
|
||||
WBAutoFetchWorker.preserveSrcset(values);
|
||||
WBAutoFetchWorker.preserveSrcset(values, WBAutoFetchWorker.rwMod(elem));
|
||||
}
|
||||
return values.join(", ");
|
||||
}
|
||||
@ -1869,6 +1886,9 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
changed = rewrite_attr(elem, 'src');
|
||||
changed = rewrite_attr(elem, 'srcset') || changed;
|
||||
changed = rewrite_attr(elem, 'style') || changed;
|
||||
if (wbUseAFWorker && elem.dataset.srcset) {
|
||||
WBAutoFetchWorker.preserveDataSrcset(elem);
|
||||
}
|
||||
break;
|
||||
case 'OBJECT':
|
||||
changed = rewrite_attr(elem, "data", true);
|
||||
@ -2097,7 +2117,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
if (mod == "cs_" && orig.indexOf("data:text/css") == 0) {
|
||||
val = rewrite_inline_style(orig);
|
||||
} else if (attr == "srcset") {
|
||||
val = rewrite_srcset(orig, isImageSrcset(this));
|
||||
val = rewrite_srcset(orig, this);
|
||||
} else if (this.tagName === 'LINK' && attr === 'href') {
|
||||
var relV = this.rel;
|
||||
if (relV === 'import' || relV === 'preload') {
|
||||
@ -2213,7 +2233,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
override_style_attr(style_proto, "borderImageSource", "border-image-source");
|
||||
|
||||
override_style_setProp(style_proto);
|
||||
|
||||
|
||||
if ($wbwindow.CSSStyleSheet && $wbwindow.CSSStyleSheet.prototype) {
|
||||
// https://developer.mozilla.org/en-US/docs/Web/API/CSSStyleSheet/insertRule
|
||||
// ruleText is a string of raw css....
|
||||
@ -2223,7 +2243,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//============================================
|
||||
function override_style_setProp(style_proto) {
|
||||
var orig_setProp = style_proto.setProperty;
|
||||
@ -2439,7 +2459,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
Object.defineProperty($wbwindow.FontFace.prototype, "constructor", {value: $wbwindow.FontFace});
|
||||
$wbwindow.FontFace.__wboverriden__ = true;
|
||||
}
|
||||
|
||||
|
||||
//============================================
|
||||
function overrideTextProtoGetSet(textProto, whichProp) {
|
||||
var orig_getter = get_orig_getter(textProto, whichProp);
|
||||
@ -2464,7 +2484,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
};
|
||||
def_prop(textProto, whichProp, setter, getter);
|
||||
}
|
||||
|
||||
|
||||
function overrideTextProtoFunction(textProto, whichFN) {
|
||||
var original = textProto[whichFN];
|
||||
textProto[whichFN] = function () {
|
||||
@ -2491,7 +2511,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
return original.apply(this, args);
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
function initTextNodeOverrides($wbwindow) {
|
||||
if (!$wbwindow.Text || !$wbwindow.Text.prototype) return;
|
||||
// https://dom.spec.whatwg.org/#characterdata and https://dom.spec.whatwg.org/#interface-text
|
||||
@ -2507,7 +2527,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
overrideTextProtoGetSet(textProto, 'data');
|
||||
overrideTextProtoGetSet(textProto, 'wholeText');
|
||||
}
|
||||
|
||||
|
||||
//============================================
|
||||
function init_wombat_loc(win) {
|
||||
|
||||
@ -3847,14 +3867,14 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
initFontFaceOverride($wbwindow);
|
||||
|
||||
// Worker override (experimental)
|
||||
initAutoFetchWorker();
|
||||
initAutoFetchWorker(rx);
|
||||
init_web_worker_override();
|
||||
init_service_worker_override();
|
||||
initSharedWorkerOverride();
|
||||
|
||||
|
||||
// text node overrides for js frameworks doing funky things with CSS
|
||||
initTextNodeOverrides($wbwindow);
|
||||
|
||||
|
||||
// innerHTML can be overriden on prototype!
|
||||
override_html_assign($wbwindow.HTMLElement, "innerHTML", true);
|
||||
override_html_assign($wbwindow.HTMLElement, "outerHTML", true);
|
||||
|
@ -29,55 +29,56 @@ var _WBWombat = function ($wbwindow, wbinfo) {
|
||||
wbinfo.wombat_opts = wbinfo.wombat_opts || {};
|
||||
var wbAutoFetchWorkerPrefix = (wb_info.auto_fetch_worker_prefix || wb_info.static_prefix) + 'autoFetchWorkerProxyMode.js';
|
||||
var WBAutoFetchWorker;
|
||||
|
||||
|
||||
function init_seeded_random(seed) {
|
||||
// Adapted from:
|
||||
// http://indiegamr.com/generate-repeatable-random-numbers-in-js/
|
||||
|
||||
|
||||
$wbwindow.Math.seed = parseInt(seed);
|
||||
|
||||
|
||||
function seeded_random() {
|
||||
$wbwindow.Math.seed = ($wbwindow.Math.seed * 9301 + 49297) % 233280;
|
||||
var rnd = $wbwindow.Math.seed / 233280;
|
||||
|
||||
|
||||
return rnd;
|
||||
}
|
||||
|
||||
|
||||
$wbwindow.Math.random = seeded_random;
|
||||
}
|
||||
|
||||
|
||||
function init_crypto_random() {
|
||||
if (!$wbwindow.crypto || !$wbwindow.Crypto) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
var orig_getrandom = $wbwindow.Crypto.prototype.getRandomValues;
|
||||
|
||||
|
||||
var new_getrandom = function (array) {
|
||||
for (var i = 0; i < array.length; i++) {
|
||||
array[i] = parseInt($wbwindow.Math.random() * 4294967296);
|
||||
}
|
||||
return array;
|
||||
};
|
||||
|
||||
|
||||
$wbwindow.Crypto.prototype.getRandomValues = new_getrandom;
|
||||
$wbwindow.crypto.getRandomValues = new_getrandom;
|
||||
}
|
||||
|
||||
|
||||
//============================================
|
||||
function init_fixed_ratio() {
|
||||
// otherwise, just set it
|
||||
$wbwindow.devicePixelRatio = 1;
|
||||
|
||||
|
||||
// prevent changing, if possible
|
||||
if (Object.defineProperty) {
|
||||
try {
|
||||
// fixed pix ratio
|
||||
Object.defineProperty($wbwindow, "devicePixelRatio", {value: 1, writable: false});
|
||||
} catch (e) {}
|
||||
} catch (e) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//========================================
|
||||
function init_date_override(timestamp) {
|
||||
timestamp = parseInt(timestamp) * 1000;
|
||||
@ -86,19 +87,19 @@ var _WBWombat = function ($wbwindow, wbinfo) {
|
||||
var timezone = 0;
|
||||
var start_now = $wbwindow.Date.now();
|
||||
var timediff = start_now - (timestamp - timezone);
|
||||
|
||||
|
||||
if ($wbwindow.__wb_Date_now) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
var orig_date = $wbwindow.Date;
|
||||
|
||||
|
||||
var orig_utc = $wbwindow.Date.UTC;
|
||||
var orig_parse = $wbwindow.Date.parse;
|
||||
var orig_now = $wbwindow.Date.now;
|
||||
|
||||
|
||||
$wbwindow.__wb_Date_now = orig_now;
|
||||
|
||||
|
||||
$wbwindow.Date = function (Date) {
|
||||
return function (A, B, C, D, E, F, G) {
|
||||
// Apply doesn't work for constructors and Date doesn't
|
||||
@ -123,21 +124,21 @@ var _WBWombat = function ($wbwindow, wbinfo) {
|
||||
}
|
||||
}
|
||||
}($wbwindow.Date);
|
||||
|
||||
|
||||
$wbwindow.Date.prototype = orig_date.prototype;
|
||||
|
||||
|
||||
$wbwindow.Date.now = function () {
|
||||
return orig_now() - timediff;
|
||||
};
|
||||
|
||||
|
||||
$wbwindow.Date.UTC = orig_utc;
|
||||
$wbwindow.Date.parse = orig_parse;
|
||||
|
||||
|
||||
$wbwindow.Date.__WB_timediff = timediff;
|
||||
|
||||
|
||||
Object.defineProperty($wbwindow.Date.prototype, "constructor", {value: $wbwindow.Date});
|
||||
}
|
||||
|
||||
|
||||
//============================================
|
||||
function init_disable_notifications() {
|
||||
if (window.Notification) {
|
||||
@ -145,36 +146,46 @@ var _WBWombat = function ($wbwindow, wbinfo) {
|
||||
if (callback) {
|
||||
callback("denied");
|
||||
}
|
||||
|
||||
|
||||
return Promise.resolve("denied");
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
if (window.geolocation) {
|
||||
var disabled = function (success, error, options) {
|
||||
if (error) {
|
||||
error({"code": 2, "message": "not available"});
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
window.geolocation.getCurrentPosition = disabled;
|
||||
window.geolocation.watchPosition = disabled;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function initAutoFetchWorker() {
|
||||
if (!$wbwindow.Worker) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
var isTop = $wbwindow.self === $wbwindow.top;
|
||||
|
||||
|
||||
function AutoFetchWorkerProxyMode() {
|
||||
if (!(this instanceof AutoFetchWorkerProxyMode)) {
|
||||
return new AutoFetchWorkerProxyMode();
|
||||
}
|
||||
this.checkIntervalTime = 15000;
|
||||
this.checkIntervalCB = this.checkIntervalCB.bind(this);
|
||||
this.elemSelector = ['img', 'source', 'video', 'audio'].map(function (which) {
|
||||
if (which === 'source') {
|
||||
return ['picture > ', 'video > ', 'audio >'].map(function (parent) {
|
||||
return parent + which + '[srcset], ' + parent + which + '[data-srcset], ' + parent + which + '[data-src]'
|
||||
}).join(', ');
|
||||
} else {
|
||||
return which + '[srcset], ' + which + '[data-srcset], ' + which + '[data-src]';
|
||||
}
|
||||
}).join(', ');
|
||||
|
||||
if (isTop) {
|
||||
// Cannot directly load our worker from the proxy origin into the current origin
|
||||
// however we fetch it from proxy origin and can blob it into the current origin :)
|
||||
@ -200,12 +211,13 @@ var _WBWombat = function ($wbwindow, wbinfo) {
|
||||
}
|
||||
$wbwindow.top.postMessage(msg, '*');
|
||||
},
|
||||
"terminate": function () {}
|
||||
"terminate": function () {
|
||||
}
|
||||
};
|
||||
this.startCheckingInterval();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
AutoFetchWorkerProxyMode.prototype.startCheckingInterval = function () {
|
||||
// if document ready state is complete do first extraction and start check polling
|
||||
// otherwise wait for document ready state to complete to extract and start check polling
|
||||
@ -223,16 +235,16 @@ var _WBWombat = function ($wbwindow, wbinfo) {
|
||||
}, 1000);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
AutoFetchWorkerProxyMode.prototype.checkIntervalCB = function () {
|
||||
this.extractFromLocalDoc();
|
||||
};
|
||||
|
||||
|
||||
AutoFetchWorkerProxyMode.prototype.terminate = function () {
|
||||
// terminate the worker, a no op when not replay top
|
||||
this.worker.terminate();
|
||||
};
|
||||
|
||||
|
||||
AutoFetchWorkerProxyMode.prototype.postMessage = function (msg, deferred) {
|
||||
if (deferred) {
|
||||
var self = this;
|
||||
@ -242,7 +254,7 @@ var _WBWombat = function ($wbwindow, wbinfo) {
|
||||
}
|
||||
this.worker.postMessage(msg);
|
||||
};
|
||||
|
||||
|
||||
AutoFetchWorkerProxyMode.prototype.extractMediaRules = function (rules, href) {
|
||||
// We are in proxy mode and must include a URL to resolve relative URLs in media rules
|
||||
if (!rules) return [];
|
||||
@ -257,7 +269,7 @@ var _WBWombat = function ($wbwindow, wbinfo) {
|
||||
}
|
||||
return text;
|
||||
};
|
||||
|
||||
|
||||
AutoFetchWorkerProxyMode.prototype.corsCSSFetch = function (href) {
|
||||
// because this JS in proxy mode operates as it would on the live web
|
||||
// the rules of CORS apply and we cannot rely on URLs being rewritten correctly
|
||||
@ -274,70 +286,50 @@ var _WBWombat = function ($wbwindow, wbinfo) {
|
||||
return [];
|
||||
});
|
||||
};
|
||||
|
||||
|
||||
AutoFetchWorkerProxyMode.prototype.shouldSkipSheet = function (sheet) {
|
||||
// we skip extracting rules from sheets if they are from our parsing style or come from pywb
|
||||
if (sheet.id === '$wrStyleParser$') return true;
|
||||
return !!(sheet.href && sheet.href.indexOf(wb_info.proxy_magic) !== -1);
|
||||
};
|
||||
|
||||
AutoFetchWorkerProxyMode.prototype.extractImgPictureSourceSrcsets = function () {
|
||||
var i;
|
||||
var elem;
|
||||
var srcset = [];
|
||||
AutoFetchWorkerProxyMode.prototype.getImgAVElems = function () {
|
||||
var elem, srcv, mod;
|
||||
var results = { 'srcset': [], 'src': []} ;
|
||||
var baseURI = $wbwindow.document.baseURI;
|
||||
var ssElements = $wbwindow.document.querySelectorAll('img[srcset], source[srcset]');
|
||||
for (i = 0; i < ssElements.length; i++) {
|
||||
elem = ssElements[i];
|
||||
if (elem.tagName === 'SOURCE') {
|
||||
if (elem.parentElement && elem.parentElement.tagName === 'PICTURE') {
|
||||
srcset.push({srcset: elem.srcset, resolve: baseURI});
|
||||
}
|
||||
} else {
|
||||
srcset.push({
|
||||
srcset: elem.srcset,
|
||||
resolve: elem.src != null && elem.src !== ' ' ? elem.src : baseURI
|
||||
});
|
||||
var elems = $wbwindow.document.querySelectorAll(this.elemSelector);
|
||||
for (var i = 0; i < elems.length; i++) {
|
||||
elem = elems[i];
|
||||
// we want the original src value in order to resolve URLs in the worker when needed
|
||||
srcv = elem.src ? elem.src : null;
|
||||
// get the correct mod in order to inform the backing worker where the URL(s) are from
|
||||
mod = elem.tagName === "SOURCE" ?
|
||||
elem.parentElement.tagName === "PICTURE" ? 'im_' : 'oe_'
|
||||
: elem.tagName === "IMG" ? 'im_' : 'oe_';
|
||||
if (elem.srcset) {
|
||||
results.srcset.push({ 'srcset': elem.srcset, 'resolve': srcv || baseURI, 'mod': mod });
|
||||
}
|
||||
if (elem.dataset.srcset) {
|
||||
results.srcset.push({ 'srcset': elem.dataset.srcset, 'resolve': srcv || baseURI, 'mod': mod });
|
||||
}
|
||||
if (elem.dataset.src) {
|
||||
results.src.push({'src': elem.dataset.src, 'resolve': srcv || baseURI, 'mod': mod});
|
||||
}
|
||||
if (elem.tagName === "SOURCE" && srcv) {
|
||||
results.src.push({'src': srcv, 'resolve': baseURI, 'mod': mod});
|
||||
}
|
||||
}
|
||||
return srcset;
|
||||
return results;
|
||||
};
|
||||
|
||||
AutoFetchWorkerProxyMode.prototype.checkForPictureSourceDataSrcsets = function () {
|
||||
var baseURI = $wbwindow.document.baseURI;
|
||||
var dataSS = $wbwindow.document.querySelectorAll('img[data-srcset], source[data-srcset]');
|
||||
var elem;
|
||||
var srcset = [];
|
||||
for (var i = 0; i < dataSS.length; i++) {
|
||||
elem = dataSS[i];
|
||||
if (elem.tagName === 'SOURCE') {
|
||||
if (elem.parentElement && elem.parentElement.tagName === 'PICTURE' && elem.dataset.srcset) {
|
||||
srcset.push({srcset: elem.dataset.srcset, resolve: baseURI});
|
||||
}
|
||||
} else if (elem.dataset.srcset) {
|
||||
srcset.push({srcset: elem.dataset.srcset, resolve: elem.src != null && elem.src !== ' ' ? elem.src : baseURI});
|
||||
}
|
||||
}
|
||||
if (srcset.length) {
|
||||
this.postMessage({
|
||||
'type': 'values',
|
||||
'srcset': {'values': srcset, 'presplit': false},
|
||||
'context': {
|
||||
'docBaseURI': $wbwindow.document.baseURI
|
||||
}
|
||||
}, true);
|
||||
}
|
||||
};
|
||||
|
||||
AutoFetchWorkerProxyMode.prototype.extractFromLocalDoc = function () {
|
||||
var i = 0;
|
||||
var media = [];
|
||||
var deferredMediaURLS = [];
|
||||
var sheet;
|
||||
var resolve;
|
||||
// We must use the window reference passed to us to access this origins stylesheets
|
||||
var styleSheets = $wbwindow.document.styleSheets;
|
||||
for (; i < styleSheets.length; ++i) {
|
||||
for (var i = 0; i < styleSheets.length; i++) {
|
||||
sheet = styleSheets[i];
|
||||
// if the sheet belongs to our parser node we must skip it
|
||||
if (!this.shouldSkipSheet(sheet)) {
|
||||
@ -360,13 +352,22 @@ var _WBWombat = function ($wbwindow, wbinfo) {
|
||||
}
|
||||
// We must use the window reference passed to us to access this origins elements with srcset attr
|
||||
// like cssRule handling we must include a URL to resolve relative URLs by
|
||||
var srcset = this.extractImgPictureSourceSrcsets();
|
||||
|
||||
var results = this.getImgAVElems();
|
||||
var msg = { 'type': 'values' };
|
||||
// send what we have extracted, if anything, to the worker for processing
|
||||
if (media.length > 0 || srcset.length > 0) {
|
||||
this.postMessage({'type': 'values', 'media': media, 'srcset': srcset}, true);
|
||||
if (media.length > 0) {
|
||||
msg.media = media;
|
||||
}
|
||||
|
||||
if (results.srcset) {
|
||||
msg.srcset = results.srcset;
|
||||
}
|
||||
if (results.src) {
|
||||
msg.src = results.src;
|
||||
}
|
||||
if (msg.media || msg.srcset || msg.src) {
|
||||
this.postMessage(msg);
|
||||
}
|
||||
|
||||
if (deferredMediaURLS.length > 0) {
|
||||
// wait for all our deferred fetching and extraction of cross origin
|
||||
// stylesheets to complete and then send those values, if any, to the worker
|
||||
@ -381,16 +382,10 @@ var _WBWombat = function ($wbwindow, wbinfo) {
|
||||
}
|
||||
});
|
||||
}
|
||||
// deffer the checking of img/source data-srcset
|
||||
// so that we do not clobber the UI thread
|
||||
var self = this;
|
||||
Promise.resolve().then(function () {
|
||||
self.checkForPictureSourceDataSrcsets();
|
||||
});
|
||||
};
|
||||
|
||||
|
||||
WBAutoFetchWorker = new AutoFetchWorkerProxyMode();
|
||||
|
||||
|
||||
if (isTop) {
|
||||
$wbwindow.addEventListener("message", function (event) {
|
||||
if (event.data && event.data.wb_type === 'aaworker') {
|
||||
@ -399,11 +394,11 @@ var _WBWombat = function ($wbwindow, wbinfo) {
|
||||
}, false);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (wbinfo.enable_auto_fetch && wbinfo.is_live) {
|
||||
initAutoFetchWorker();
|
||||
}
|
||||
|
||||
|
||||
// proxy mode overrides
|
||||
// Random
|
||||
init_seeded_random(wbinfo.wombat_sec);
|
||||
@ -425,13 +420,13 @@ var _WBWombat = function ($wbwindow, wbinfo) {
|
||||
|
||||
window._WBWombat = _WBWombat;
|
||||
|
||||
window._WBWombatInit = function(wbinfo) {
|
||||
if (!this._wb_wombat || !this._wb_wombat.actual) {
|
||||
this._wb_wombat = new _WBWombat(this, wbinfo);
|
||||
this._wb_wombat.actual = true;
|
||||
} else if (!this._wb_wombat) {
|
||||
console.warn("_wb_wombat missing!");
|
||||
}
|
||||
window._WBWombatInit = function (wbinfo) {
|
||||
if (!this._wb_wombat || !this._wb_wombat.actual) {
|
||||
this._wb_wombat = new _WBWombat(this, wbinfo);
|
||||
this._wb_wombat.actual = true;
|
||||
} else if (!this._wb_wombat) {
|
||||
console.warn("_wb_wombat missing!");
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user