mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
- added to the auto-fetch worker of both wombat and wombatProxymode - added utility function isImageSrcset to wombat for determining if the srcset values being rewritten are from either a image tag or a source tag within a picture tag - added utility function isImageDataSrcset to wombat to check for img/source data-srcset attributes - reworked the backing auto-fetch worker to now queue all URLs and perform fetch batching with maximum batch size of 60. A delay of 2 seconds is applied after each batch. Ensured that the srcset values sent to the auto-fetch worker can be resolved in non-proxy mode fixes #413 Renamed the auto-fetch class named used in proxy mode from AutoFetchWorker to AutoFetchWorkerProxyMode Added checking of script tage types application/json and text/template to rewrite_script
This commit is contained in:
parent
3e0bb49ae1
commit
f78bac9474
@ -59,8 +59,6 @@ function AutoFetcher(init) {
|
|||||||
this.schemeless = '/' + this.relative;
|
this.schemeless = '/' + this.relative;
|
||||||
// local cache of URLs fetched, to reduce server load
|
// local cache of URLs fetched, to reduce server load
|
||||||
this.seen = {};
|
this.seen = {};
|
||||||
// array of promises returned by fetch(URL)
|
|
||||||
this.fetches = [];
|
|
||||||
// array of URL to be fetched
|
// array of URL to be fetched
|
||||||
this.queue = [];
|
this.queue = [];
|
||||||
// should we queue a URL or not
|
// should we queue a URL or not
|
||||||
@ -86,61 +84,62 @@ AutoFetcher.prototype.fixupURL = function (url) {
|
|||||||
return url;
|
return url;
|
||||||
};
|
};
|
||||||
|
|
||||||
AutoFetcher.prototype.safeFetch = function (url) {
|
AutoFetcher.prototype.queueURL = function (url) {
|
||||||
|
// ensure we do not request data urls
|
||||||
|
if (url.indexOf('data:') === 0) return;
|
||||||
// check to see if we have seen this url before in order
|
// check to see if we have seen this url before in order
|
||||||
// to lessen the load against the server content is fetched from
|
// to lessen the load against the server content is fetched from
|
||||||
if (this.seen[url] != null) return;
|
if (this.seen[url] != null) return;
|
||||||
this.seen[url] = true;
|
this.seen[url] = true;
|
||||||
if (this.queuing) {
|
this.queue.push(url);
|
||||||
// we are currently waiting for a batch of fetches to complete
|
|
||||||
return this.queue.push(url);
|
|
||||||
}
|
|
||||||
// fetch this url
|
|
||||||
this.fetches.push(fetch(url));
|
|
||||||
};
|
};
|
||||||
|
|
||||||
AutoFetcher.prototype.urlExtractor = function (match, n1, n2, n3, offset, string) {
|
AutoFetcher.prototype.urlExtractor = function (match, n1, n2, n3, offset, string) {
|
||||||
// Same function as style_replacer in wombat.rewrite_style, n2 is our URL
|
// Same function as style_replacer in wombat.rewrite_style, n2 is our URL
|
||||||
this.safeFetch(this.fixupURL(n2));
|
this.queueURL(this.fixupURL(n2));
|
||||||
return n1 + n2 + n3;
|
return n1 + n2 + n3;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
AutoFetcher.prototype.delay = function () {
|
||||||
|
// 2 second delay seem reasonable
|
||||||
|
return new Promise(function (resolve, reject) {
|
||||||
|
setTimeout(resolve, 2000);
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
AutoFetcher.prototype.fetchDone = function () {
|
AutoFetcher.prototype.fetchDone = function () {
|
||||||
// indicate we no longer need to Q
|
|
||||||
this.queuing = false;
|
this.queuing = false;
|
||||||
if (this.queue.length > 0) {
|
if (this.queue.length > 0) {
|
||||||
// we have a Q of some length drain it
|
// we have a Q of some length drain it
|
||||||
this.drainQ();
|
var autofetcher = this;
|
||||||
|
this.delay().then(function () {
|
||||||
|
autofetcher.fetchAll();
|
||||||
|
});
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
AutoFetcher.prototype.fetchAll = function () {
|
AutoFetcher.prototype.fetchAll = function () {
|
||||||
// if we are queuing or have no fetches this is a no op
|
if (this.queuing || this.queue.length === 0) {
|
||||||
if (this.queuing) return;
|
return;
|
||||||
if (this.fetches.length === 0) return;
|
}
|
||||||
// we are about to fetch queue anything that comes our way
|
// the number of fetches is limited to a maximum of 60 outstanding fetches
|
||||||
|
// the baseline maximum number of fetches is 50 but if the size(queue) <= 10
|
||||||
|
// we add them to the current batch
|
||||||
this.queuing = true;
|
this.queuing = true;
|
||||||
/// initiate fetches by turning the initial fetch promises
|
|
||||||
// into rejctionless promises and "await" all clearing
|
|
||||||
// our fetches array in place
|
|
||||||
var runningFetchers = [];
|
var runningFetchers = [];
|
||||||
while (this.fetches.length > 0) {
|
while (this.queue.length > 0 && runningFetchers.length <= 50) {
|
||||||
runningFetchers.push(this.fetches.shift().catch(noop))
|
runningFetchers.push(fetch(this.queue.shift()).catch(noop))
|
||||||
|
}
|
||||||
|
if (this.queue.length <= 10) {
|
||||||
|
while (this.queue.length > 0) {
|
||||||
|
runningFetchers.push(fetch(this.queue.shift()).catch(noop))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Promise.all(runningFetchers)
|
Promise.all(runningFetchers)
|
||||||
.then(this.fetchDone)
|
.then(this.fetchDone)
|
||||||
.catch(this.fetchDone);
|
.catch(this.fetchDone);
|
||||||
};
|
};
|
||||||
|
|
||||||
AutoFetcher.prototype.drainQ = function () {
|
|
||||||
// clear our Q in place and fill our fetches array
|
|
||||||
while (this.queue.length > 0) {
|
|
||||||
this.fetches.push(fetch(this.queue.shift()));
|
|
||||||
}
|
|
||||||
// fetch all the things
|
|
||||||
this.fetchAll();
|
|
||||||
};
|
|
||||||
|
|
||||||
AutoFetcher.prototype.extractMedia = function (mediaRules) {
|
AutoFetcher.prototype.extractMedia = function (mediaRules) {
|
||||||
// this is a broken down rewrite_style
|
// this is a broken down rewrite_style
|
||||||
if (mediaRules == null || mediaRules.values === null) return;
|
if (mediaRules == null || mediaRules.values === null) return;
|
||||||
@ -185,9 +184,11 @@ AutoFetcher.prototype.fixupURLSrcSet = function (url, tagSrc, context) {
|
|||||||
return maybeFixed;
|
return maybeFixed;
|
||||||
}
|
}
|
||||||
// resolve URL against tag src
|
// resolve URL against tag src
|
||||||
maybeFixed = this.maybeResolveURL(url, tagSrc);
|
if (tagSrc != null) {
|
||||||
if (maybeFixed != null) {
|
maybeFixed = this.maybeResolveURL(url, tagSrc);
|
||||||
return this.prefix + 'im_/' + maybeFixed;
|
if (maybeFixed != null) {
|
||||||
|
return this.prefix + 'im_/' + maybeFixed;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// finally last attempt resolve the originating documents base URI
|
// finally last attempt resolve the originating documents base URI
|
||||||
maybeFixed = this.maybeResolveURL(url, context.docBaseURI);
|
maybeFixed = this.maybeResolveURL(url, context.docBaseURI);
|
||||||
@ -210,7 +211,7 @@ AutoFetcher.prototype.extractSrcset = function (srcsets, context) {
|
|||||||
// was rewrite_srcset so just ensure we just
|
// was rewrite_srcset so just ensure we just
|
||||||
for (var i = 0; i < srcsetValues.length; i++) {
|
for (var i = 0; i < srcsetValues.length; i++) {
|
||||||
// grab the URL not width/height key
|
// grab the URL not width/height key
|
||||||
this.safeFetch(srcsetValues[i].split(' ')[0]);
|
this.queueURL(srcsetValues[i].split(' ')[0]);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -224,7 +225,7 @@ AutoFetcher.prototype.srcsetNotPreSplit = function (values, context) {
|
|||||||
// grab the URL not width/height key
|
// grab the URL not width/height key
|
||||||
if (Boolean(srcsetValues[j])) {
|
if (Boolean(srcsetValues[j])) {
|
||||||
var value = srcsetValues[j].trim().split(' ')[0];
|
var value = srcsetValues[j].trim().split(' ')[0];
|
||||||
this.safeFetch(this.fixupURLSrcSet(value, tagSrc, context));
|
this.queueURL(this.fixupURLSrcSet(value, tagSrc, context));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -53,8 +53,6 @@ function AutoFetcher() {
|
|||||||
}
|
}
|
||||||
// local cache of URLs fetched, to reduce server load
|
// local cache of URLs fetched, to reduce server load
|
||||||
this.seen = {};
|
this.seen = {};
|
||||||
// array of promises returned by fetch(URL)
|
|
||||||
this.fetches = [];
|
|
||||||
// array of URL to be fetched
|
// array of URL to be fetched
|
||||||
this.queue = [];
|
this.queue = [];
|
||||||
// should we queue a URL or not
|
// should we queue a URL or not
|
||||||
@ -65,19 +63,14 @@ function AutoFetcher() {
|
|||||||
this.fetchDone = this.fetchDone.bind(this);
|
this.fetchDone = this.fetchDone.bind(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
AutoFetcher.prototype.safeFetch = function (url) {
|
AutoFetcher.prototype.queueURL = function (url) {
|
||||||
// ensure we do not request data urls
|
// ensure we do not request data urls
|
||||||
if (url.indexOf('data:') === 0) return;
|
if (url.indexOf('data:') === 0) return;
|
||||||
// check to see if we have seen this url before in order
|
// check to see if we have seen this url before in order
|
||||||
// to lessen the load against the server content is autofetchd from
|
// to lessen the load against the server content is autofetchd from
|
||||||
if (this.seen[url] != null) return;
|
if (this.seen[url] != null) return;
|
||||||
this.seen[url] = true;
|
this.seen[url] = true;
|
||||||
if (this.queuing) {
|
this.queue.push(url);
|
||||||
// we are currently waiting for a batch of fetches to complete
|
|
||||||
return this.queue.push(url);
|
|
||||||
}
|
|
||||||
// fetch this url
|
|
||||||
this.fetches.push(fetch(url));
|
|
||||||
};
|
};
|
||||||
|
|
||||||
AutoFetcher.prototype.safeResolve = function (url, resolver) {
|
AutoFetcher.prototype.safeResolve = function (url, resolver) {
|
||||||
@ -102,47 +95,52 @@ AutoFetcher.prototype.urlExtractor = function (match, n1, n2, n3, offset, string
|
|||||||
// (resolvedURL will be undefined if an error occurred)
|
// (resolvedURL will be undefined if an error occurred)
|
||||||
var resolvedURL = this.safeResolve(n2, this.currentResolver);
|
var resolvedURL = this.safeResolve(n2, this.currentResolver);
|
||||||
if (resolvedURL) {
|
if (resolvedURL) {
|
||||||
this.safeFetch(resolvedURL);
|
this.queueURL(resolvedURL);
|
||||||
}
|
}
|
||||||
return n1 + n2 + n3;
|
return n1 + n2 + n3;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
AutoFetcher.prototype.delay = function () {
|
||||||
|
// 2 second delay seem reasonable
|
||||||
|
return new Promise(function (resolve, reject) {
|
||||||
|
setTimeout(resolve, 2000);
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
AutoFetcher.prototype.fetchDone = function () {
|
AutoFetcher.prototype.fetchDone = function () {
|
||||||
// indicate we no longer need to Q
|
|
||||||
this.queuing = false;
|
this.queuing = false;
|
||||||
if (this.queue.length > 0) {
|
if (this.queue.length > 0) {
|
||||||
// we have a Q of some length drain it
|
// we have a Q of some length drain it
|
||||||
this.drainQ();
|
var autofetcher = this;
|
||||||
|
// wait 2 seconds before doing another batch
|
||||||
|
this.delay().then(function () {
|
||||||
|
autofetcher.fetchAll();
|
||||||
|
});
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
AutoFetcher.prototype.fetchAll = function () {
|
AutoFetcher.prototype.fetchAll = function () {
|
||||||
// if we are queuing or have no fetches this is a no op
|
if (this.queuing || this.queue.length === 0) {
|
||||||
if (this.queuing) return;
|
return;
|
||||||
if (this.fetches.length === 0) return;
|
}
|
||||||
// we are about to fetch queue anything that comes our way
|
// the number of fetches is limited to a maximum of 60 outstanding fetches
|
||||||
|
// the baseline maximum number of fetches is 50 but if the size(queue) <= 10
|
||||||
|
// we add them to the current batch this.queuing = true;
|
||||||
this.queuing = true;
|
this.queuing = true;
|
||||||
// initiate fetches by turning the initial fetch promises
|
|
||||||
// into rejctionless promises and "await" all clearing
|
|
||||||
// our fetches array in place
|
|
||||||
var runningFetchers = [];
|
var runningFetchers = [];
|
||||||
while (this.fetches.length > 0) {
|
while (this.queue.length > 0 && runningFetchers.length <= 50) {
|
||||||
runningFetchers.push(this.fetches.shift().catch(noop))
|
runningFetchers.push(fetch(this.queue.shift()).catch(noop))
|
||||||
|
}
|
||||||
|
if (this.queue.length <= 10) {
|
||||||
|
while (this.queue.length > 0) {
|
||||||
|
runningFetchers.push(fetch(this.queue.shift()).catch(noop))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Promise.all(runningFetchers)
|
Promise.all(runningFetchers)
|
||||||
.then(this.fetchDone)
|
.then(this.fetchDone)
|
||||||
.catch(this.fetchDone);
|
.catch(this.fetchDone);
|
||||||
};
|
};
|
||||||
|
|
||||||
AutoFetcher.prototype.drainQ = function () {
|
|
||||||
// clear our Q in place and fill our fetches array
|
|
||||||
while (this.queue.length > 0) {
|
|
||||||
this.fetches.push(fetch(this.queue.shift()));
|
|
||||||
}
|
|
||||||
// fetch all the things
|
|
||||||
this.fetchAll();
|
|
||||||
};
|
|
||||||
|
|
||||||
AutoFetcher.prototype.extractMedia = function (mediaRules) {
|
AutoFetcher.prototype.extractMedia = function (mediaRules) {
|
||||||
// this is a broken down rewrite_style
|
// this is a broken down rewrite_style
|
||||||
if (mediaRules == null) return;
|
if (mediaRules == null) return;
|
||||||
@ -173,7 +171,7 @@ AutoFetcher.prototype.extractSrcset = function (srcsets) {
|
|||||||
// resolve the URL in an exceptionless manner (resolvedURL will be undefined if an error occurred)
|
// resolve the URL in an exceptionless manner (resolvedURL will be undefined if an error occurred)
|
||||||
var resolvedURL = this.safeResolve(srcsetValue.split(' ')[0], extractedSrcSet.resolve);
|
var resolvedURL = this.safeResolve(srcsetValue.split(' ')[0], extractedSrcSet.resolve);
|
||||||
if (resolvedURL) {
|
if (resolvedURL) {
|
||||||
this.safeFetch(resolvedURL);
|
this.queueURL(resolvedURL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -186,6 +186,16 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function isImageSrcset(elem) {
|
||||||
|
if (elem.tagName === 'IMG') return true;
|
||||||
|
return elem.tagName === 'SOURCE' && elem.parentElement && elem.parentElement.tagName === 'PICTURE';
|
||||||
|
}
|
||||||
|
|
||||||
|
function isImageDataSrcset(elem) {
|
||||||
|
if (isImageSrcset(elem)) return elem.dataset.srcset != null;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
//============================================
|
//============================================
|
||||||
function is_host_url(str) {
|
function is_host_url(str) {
|
||||||
// Good guess that's its a hostname
|
// Good guess that's its a hostname
|
||||||
@ -1152,7 +1162,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
} else if (lowername == "style") {
|
} else if (lowername == "style") {
|
||||||
value = rewrite_style(value);
|
value = rewrite_style(value);
|
||||||
} else if (lowername == "srcset") {
|
} else if (lowername == "srcset") {
|
||||||
value = rewrite_srcset(value, this.tagName === 'IMG');
|
value = rewrite_srcset(value, isImageSrcset(this));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
orig_setAttribute.call(this, name, value);
|
orig_setAttribute.call(this, name, value);
|
||||||
@ -1423,16 +1433,74 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
}, true);
|
}, true);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
AutoFetchWorker.prototype.preserveDataSrcset = function (srcset) {
|
||||||
|
// send values from rewrite_attr srcset to the worker deferred
|
||||||
|
// to ensure the page viewer sees the images first
|
||||||
|
this.postMessage({
|
||||||
|
'type': 'values',
|
||||||
|
'srcset': {'values': srcset, 'presplit': false},
|
||||||
|
}, true);
|
||||||
|
};
|
||||||
|
|
||||||
AutoFetchWorker.prototype.preserveMedia = function (media) {
|
AutoFetchWorker.prototype.preserveMedia = function (media) {
|
||||||
// send CSSMediaRule values to the worker
|
// send CSSMediaRule values to the worker
|
||||||
this.postMessage({'type': 'values', 'media': media})
|
this.postMessage({'type': 'values', 'media': media}, true);
|
||||||
|
};
|
||||||
|
|
||||||
|
AutoFetchWorker.prototype.extractSrcset = function (elem) {
|
||||||
|
if (wb_getAttribute) {
|
||||||
|
return wb_getAttribute.call(elem, 'srcset');
|
||||||
|
}
|
||||||
|
return elem.getAttribute('srcset');
|
||||||
|
};
|
||||||
|
|
||||||
|
AutoFetchWorker.prototype.checkForPictureSourceDataSrcsets = function () {
|
||||||
|
var dataSS = $wbwindow.document.querySelectorAll('img[data-srcset], source[data-srcset]');
|
||||||
|
var elem;
|
||||||
|
var srcset = [];
|
||||||
|
for (var i = 0; i < dataSS.length; i++) {
|
||||||
|
elem = dataSS[i];
|
||||||
|
if (elem.tagName === 'SOURCE') {
|
||||||
|
if (elem.parentElement && elem.parentElement.tagName === 'PICTURE' && elem.dataset.srcset) {
|
||||||
|
srcset.push({srcset: elem.dataset.srcset});
|
||||||
|
}
|
||||||
|
} else if (elem.dataset.srcset) {
|
||||||
|
srcset.push({srcset: elem.dataset.srcset});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (srcset.length) {
|
||||||
|
this.postMessage({
|
||||||
|
'type': 'values',
|
||||||
|
'srcset': {'values': srcset, 'presplit': false},
|
||||||
|
'context': {
|
||||||
|
'docBaseURI': $wbwindow.document.baseURI
|
||||||
|
}
|
||||||
|
}, true);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
AutoFetchWorker.prototype.extractImgPictureSourceSrcsets = function () {
|
||||||
|
var i;
|
||||||
|
var elem = null;
|
||||||
|
var srcset = [];
|
||||||
|
var ssElements = $wbwindow.document.querySelectorAll('img[srcset], source[srcset]');
|
||||||
|
for (i = 0; i < ssElements.length; i++) {
|
||||||
|
elem = ssElements[i];
|
||||||
|
if (elem.tagName === 'SOURCE') {
|
||||||
|
if (elem.parentElement && elem.parentElement.tagName === 'PICTURE') {
|
||||||
|
srcset.push({srcset: this.extractSrcset(elem)});
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
srcset.push({tagSrc: elem.src, srcset: this.extractSrcset(elem)});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return srcset;
|
||||||
};
|
};
|
||||||
|
|
||||||
AutoFetchWorker.prototype.extractFromLocalDoc = function () {
|
AutoFetchWorker.prototype.extractFromLocalDoc = function () {
|
||||||
// get the values to be preserved from the documents stylesheets
|
// get the values to be preserved from the documents stylesheets
|
||||||
// and all elements with a srcset
|
// and all elements with a srcset
|
||||||
var media = [];
|
var media = [];
|
||||||
var srcset = [];
|
|
||||||
var sheets = $wbwindow.document.styleSheets;
|
var sheets = $wbwindow.document.styleSheets;
|
||||||
var i = 0;
|
var i = 0;
|
||||||
for (; i < sheets.length; ++i) {
|
for (; i < sheets.length; ++i) {
|
||||||
@ -1444,16 +1512,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
var srcsetElems = $wbwindow.document.querySelectorAll('img[srcset]');
|
var srcset = this.extractImgPictureSourceSrcsets();
|
||||||
for (i = 0; i < srcsetElems.length; i++) {
|
|
||||||
var ssv = {tagSrc: srcsetElems[i].src};
|
|
||||||
if (wb_getAttribute) {
|
|
||||||
ssv.srcset = wb_getAttribute.call(srcsetElems[i], 'srcset');
|
|
||||||
} else {
|
|
||||||
ssv.srcset = srcsetElems[i].getAttribute('srcset');
|
|
||||||
}
|
|
||||||
srcset.push(ssv);
|
|
||||||
}
|
|
||||||
// send the extracted values to the worker deferred
|
// send the extracted values to the worker deferred
|
||||||
// to ensure the page viewer sees the images first
|
// to ensure the page viewer sees the images first
|
||||||
this.postMessage({
|
this.postMessage({
|
||||||
@ -1464,6 +1523,12 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
'docBaseURI': $wbwindow.document.baseURI
|
'docBaseURI': $wbwindow.document.baseURI
|
||||||
}
|
}
|
||||||
}, true);
|
}, true);
|
||||||
|
// deffer the checking of img/source data-srcset
|
||||||
|
// so that we do not clobber the UI thread
|
||||||
|
var self = this;
|
||||||
|
Promise.resolve().then(function () {
|
||||||
|
self.checkForPictureSourceDataSrcsets();
|
||||||
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
WBAutoFetchWorker = new AutoFetchWorker(wb_abs_prefix, wbinfo.mod);
|
WBAutoFetchWorker = new AutoFetchWorker(wb_abs_prefix, wbinfo.mod);
|
||||||
@ -1615,7 +1680,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
} else if (name == "style") {
|
} else if (name == "style") {
|
||||||
new_value = rewrite_style(value);
|
new_value = rewrite_style(value);
|
||||||
} else if (name == "srcset") {
|
} else if (name == "srcset") {
|
||||||
new_value = rewrite_srcset(value, elem.tagName === 'IMG');
|
new_value = rewrite_srcset(value, isImageSrcset(elem));
|
||||||
} else {
|
} else {
|
||||||
// Only rewrite if absolute url
|
// Only rewrite if absolute url
|
||||||
if (abs_url_only && !starts_with(value, VALID_PREFIXES)) {
|
if (abs_url_only && !starts_with(value, VALID_PREFIXES)) {
|
||||||
@ -1623,6 +1688,9 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
}
|
}
|
||||||
var mod = rwModForElement(elem, name);
|
var mod = rwModForElement(elem, name);
|
||||||
new_value = rewrite_url(value, false, mod, elem.ownerDocument);
|
new_value = rewrite_url(value, false, mod, elem.ownerDocument);
|
||||||
|
if (wbUseAFWorker && isImageDataSrcset(elem)) {
|
||||||
|
WBAutoFetchWorker.preserveDataSrcset(elem.dataset.srcset);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (new_value != value) {
|
if (new_value != value) {
|
||||||
@ -1724,7 +1792,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
if (elem.getAttribute("src") || !elem.textContent || !$wbwindow.Proxy) {
|
if (elem.getAttribute("src") || !elem.textContent || !$wbwindow.Proxy) {
|
||||||
return rewrite_attr(elem, "src");
|
return rewrite_attr(elem, "src");
|
||||||
}
|
}
|
||||||
|
if (elem.type && (elem.type === 'application/json' || elem.type.indexOf('text/template') !== -1)) return;
|
||||||
if (elem.textContent.indexOf("_____WB$wombat$assign$function_____") >= 0) {
|
if (elem.textContent.indexOf("_____WB$wombat$assign$function_____") >= 0) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -2029,7 +2097,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
if (mod == "cs_" && orig.indexOf("data:text/css") == 0) {
|
if (mod == "cs_" && orig.indexOf("data:text/css") == 0) {
|
||||||
val = rewrite_inline_style(orig);
|
val = rewrite_inline_style(orig);
|
||||||
} else if (attr == "srcset") {
|
} else if (attr == "srcset") {
|
||||||
val = rewrite_srcset(orig, this.tagName === 'IMG');
|
val = rewrite_srcset(orig, isImageSrcset(this));
|
||||||
} else if (this.tagName === 'LINK' && attr === 'href') {
|
} else if (this.tagName === 'LINK' && attr === 'href') {
|
||||||
var relV = this.rel;
|
var relV = this.rel;
|
||||||
if (relV === 'import' || relV === 'preload') {
|
if (relV === 'import' || relV === 'preload') {
|
||||||
|
@ -169,9 +169,9 @@ var _WBWombat = function ($wbwindow, wbinfo) {
|
|||||||
|
|
||||||
var isTop = $wbwindow.self === $wbwindow.top;
|
var isTop = $wbwindow.self === $wbwindow.top;
|
||||||
|
|
||||||
function AutoFetchWorker() {
|
function AutoFetchWorkerProxyMode() {
|
||||||
if (!(this instanceof AutoFetchWorker)) {
|
if (!(this instanceof AutoFetchWorkerProxyMode)) {
|
||||||
return new AutoFetchWorker();
|
return new AutoFetchWorkerProxyMode();
|
||||||
}
|
}
|
||||||
this.checkIntervalTime = 15000;
|
this.checkIntervalTime = 15000;
|
||||||
this.checkIntervalCB = this.checkIntervalCB.bind(this);
|
this.checkIntervalCB = this.checkIntervalCB.bind(this);
|
||||||
@ -206,7 +206,7 @@ var _WBWombat = function ($wbwindow, wbinfo) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
AutoFetchWorker.prototype.startCheckingInterval = function () {
|
AutoFetchWorkerProxyMode.prototype.startCheckingInterval = function () {
|
||||||
// if document ready state is complete do first extraction and start check polling
|
// if document ready state is complete do first extraction and start check polling
|
||||||
// otherwise wait for document ready state to complete to extract and start check polling
|
// otherwise wait for document ready state to complete to extract and start check polling
|
||||||
var self = this;
|
var self = this;
|
||||||
@ -224,20 +224,26 @@ var _WBWombat = function ($wbwindow, wbinfo) {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
AutoFetchWorker.prototype.checkIntervalCB = function () {
|
AutoFetchWorkerProxyMode.prototype.checkIntervalCB = function () {
|
||||||
this.extractFromLocalDoc();
|
this.extractFromLocalDoc();
|
||||||
};
|
};
|
||||||
|
|
||||||
AutoFetchWorker.prototype.terminate = function () {
|
AutoFetchWorkerProxyMode.prototype.terminate = function () {
|
||||||
// terminate the worker, a no op when not replay top
|
// terminate the worker, a no op when not replay top
|
||||||
this.worker.terminate();
|
this.worker.terminate();
|
||||||
};
|
};
|
||||||
|
|
||||||
AutoFetchWorker.prototype.postMessage = function (msg) {
|
AutoFetchWorkerProxyMode.prototype.postMessage = function (msg, deferred) {
|
||||||
|
if (deferred) {
|
||||||
|
var self = this;
|
||||||
|
return Promise.resolve().then(function () {
|
||||||
|
self.worker.postMessage(msg);
|
||||||
|
});
|
||||||
|
}
|
||||||
this.worker.postMessage(msg);
|
this.worker.postMessage(msg);
|
||||||
};
|
};
|
||||||
|
|
||||||
AutoFetchWorker.prototype.extractMediaRules = function (rules, href) {
|
AutoFetchWorkerProxyMode.prototype.extractMediaRules = function (rules, href) {
|
||||||
// We are in proxy mode and must include a URL to resolve relative URLs in media rules
|
// We are in proxy mode and must include a URL to resolve relative URLs in media rules
|
||||||
if (!rules) return [];
|
if (!rules) return [];
|
||||||
var rvlen = rules.length;
|
var rvlen = rules.length;
|
||||||
@ -252,7 +258,7 @@ var _WBWombat = function ($wbwindow, wbinfo) {
|
|||||||
return text;
|
return text;
|
||||||
};
|
};
|
||||||
|
|
||||||
AutoFetchWorker.prototype.corsCSSFetch = function (href) {
|
AutoFetchWorkerProxyMode.prototype.corsCSSFetch = function (href) {
|
||||||
// because this JS in proxy mode operates as it would on the live web
|
// because this JS in proxy mode operates as it would on the live web
|
||||||
// the rules of CORS apply and we cannot rely on URLs being rewritten correctly
|
// the rules of CORS apply and we cannot rely on URLs being rewritten correctly
|
||||||
// fetch the cross origin css file and then parse it using a style tag to get the rules
|
// fetch the cross origin css file and then parse it using a style tag to get the rules
|
||||||
@ -269,17 +275,64 @@ var _WBWombat = function ($wbwindow, wbinfo) {
|
|||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
AutoFetchWorker.prototype.shouldSkipSheet = function (sheet) {
|
AutoFetchWorkerProxyMode.prototype.shouldSkipSheet = function (sheet) {
|
||||||
// we skip extracting rules from sheets if they are from our parsing style or come from pywb
|
// we skip extracting rules from sheets if they are from our parsing style or come from pywb
|
||||||
if (sheet.id === '$wrStyleParser$') return true;
|
if (sheet.id === '$wrStyleParser$') return true;
|
||||||
return !!(sheet.href && sheet.href.indexOf(wb_info.proxy_magic) !== -1);
|
return !!(sheet.href && sheet.href.indexOf(wb_info.proxy_magic) !== -1);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
AutoFetchWorkerProxyMode.prototype.extractImgPictureSourceSrcsets = function () {
|
||||||
|
var i;
|
||||||
|
var elem;
|
||||||
|
var srcset = [];
|
||||||
|
var baseURI = $wbwindow.document.baseURI;
|
||||||
|
var ssElements = $wbwindow.document.querySelectorAll('img[srcset], source[srcset]');
|
||||||
|
for (i = 0; i < ssElements.length; i++) {
|
||||||
|
elem = ssElements[i];
|
||||||
|
if (elem.tagName === 'SOURCE') {
|
||||||
|
if (elem.parentElement && elem.parentElement.tagName === 'PICTURE') {
|
||||||
|
srcset.push({srcset: elem.srcset, resolve: baseURI});
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
srcset.push({
|
||||||
|
srcset: elem.srcset,
|
||||||
|
resolve: elem.src != null && elem.src !== ' ' ? elem.src : baseURI
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return srcset;
|
||||||
|
};
|
||||||
|
|
||||||
|
AutoFetchWorkerProxyMode.prototype.checkForPictureSourceDataSrcsets = function () {
|
||||||
|
var baseURI = $wbwindow.document.baseURI;
|
||||||
|
var dataSS = $wbwindow.document.querySelectorAll('img[data-srcset], source[data-srcset]');
|
||||||
|
var elem;
|
||||||
|
var srcset = [];
|
||||||
|
for (var i = 0; i < dataSS.length; i++) {
|
||||||
|
elem = dataSS[i];
|
||||||
|
if (elem.tagName === 'SOURCE') {
|
||||||
|
if (elem.parentElement && elem.parentElement.tagName === 'PICTURE' && elem.dataset.srcset) {
|
||||||
|
srcset.push({srcset: elem.dataset.srcset, resolve: baseURI});
|
||||||
|
}
|
||||||
|
} else if (elem.dataset.srcset) {
|
||||||
|
srcset.push({srcset: elem.dataset.srcset, resolve: elem.src != null && elem.src !== ' ' ? elem.src : baseURI});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (srcset.length) {
|
||||||
|
this.postMessage({
|
||||||
|
'type': 'values',
|
||||||
|
'srcset': {'values': srcset, 'presplit': false},
|
||||||
|
'context': {
|
||||||
|
'docBaseURI': $wbwindow.document.baseURI
|
||||||
|
}
|
||||||
|
}, true);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
AutoFetchWorker.prototype.extractFromLocalDoc = function () {
|
AutoFetchWorkerProxyMode.prototype.extractFromLocalDoc = function () {
|
||||||
var i = 0;
|
var i = 0;
|
||||||
var media = [];
|
var media = [];
|
||||||
var deferredMediaURLS = [];
|
var deferredMediaURLS = [];
|
||||||
var srcset = [];
|
|
||||||
var sheet;
|
var sheet;
|
||||||
var resolve;
|
var resolve;
|
||||||
// We must use the window reference passed to us to access this origins stylesheets
|
// We must use the window reference passed to us to access this origins stylesheets
|
||||||
@ -307,17 +360,11 @@ var _WBWombat = function ($wbwindow, wbinfo) {
|
|||||||
}
|
}
|
||||||
// We must use the window reference passed to us to access this origins elements with srcset attr
|
// We must use the window reference passed to us to access this origins elements with srcset attr
|
||||||
// like cssRule handling we must include a URL to resolve relative URLs by
|
// like cssRule handling we must include a URL to resolve relative URLs by
|
||||||
var srcsetElems = $wbwindow.document.querySelectorAll('img[srcset]');
|
var srcset = this.extractImgPictureSourceSrcsets();
|
||||||
var ssElem, resolveAgainst;
|
|
||||||
for (i = 0; i < srcsetElems.length; i++) {
|
|
||||||
ssElem = srcsetElems[i];
|
|
||||||
resolveAgainst = ssElem.src != null && ssElem.src !== ' ' ? ssElem.src : $wbwindow.document.baseURI;
|
|
||||||
srcset.push({'srcset': ssElem.srcset, 'resolve': resolveAgainst});
|
|
||||||
}
|
|
||||||
|
|
||||||
// send what we have extracted, if anything, to the worker for processing
|
// send what we have extracted, if anything, to the worker for processing
|
||||||
if (media.length > 0 || srcset.length > 0) {
|
if (media.length > 0 || srcset.length > 0) {
|
||||||
this.postMessage({'type': 'values', 'media': media, 'srcset': srcset});
|
this.postMessage({'type': 'values', 'media': media, 'srcset': srcset}, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (deferredMediaURLS.length > 0) {
|
if (deferredMediaURLS.length > 0) {
|
||||||
@ -334,9 +381,15 @@ var _WBWombat = function ($wbwindow, wbinfo) {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
// deffer the checking of img/source data-srcset
|
||||||
|
// so that we do not clobber the UI thread
|
||||||
|
var self = this;
|
||||||
|
Promise.resolve().then(function () {
|
||||||
|
self.checkForPictureSourceDataSrcsets();
|
||||||
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
WBAutoFetchWorker = new AutoFetchWorker();
|
WBAutoFetchWorker = new AutoFetchWorkerProxyMode();
|
||||||
|
|
||||||
if (isTop) {
|
if (isTop) {
|
||||||
$wbwindow.addEventListener("message", function (event) {
|
$wbwindow.addEventListener("message", function (event) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user