From 323edcf47cf8c1b899888b6a8ed0cb4805302311 Mon Sep 17 00:00:00 2001 From: John Berlin Date: Wed, 5 Dec 2018 19:03:00 -0500 Subject: [PATCH] enabled auto-fetching of video, audio resources in wombat in non-proxy mode and proxy mode (#427) --- pywb/static/autoFetchWorker.js | 323 ++++++++++++++++-------- pywb/static/autoFetchWorkerProxyMode.js | 183 ++++++++++---- pywb/static/wombat.js | 254 ++++++++++--------- pywb/static/wombatProxyMode.js | 205 ++++++++------- 4 files changed, 581 insertions(+), 384 deletions(-) diff --git a/pywb/static/autoFetchWorker.js b/pywb/static/autoFetchWorker.js index 5baba7a1..3f1ae5aa 100644 --- a/pywb/static/autoFetchWorker.js +++ b/pywb/static/autoFetchWorker.js @@ -3,6 +3,12 @@ var STYLE_REGEX = /(url\s*\(\s*[\\"']*)([^)'"]+)([\\"']*\s*\))/gi; var IMPORT_REGEX = /(@import\s+[\\"']*)([^)'";]+)([\\"']*\s*;?)/gi; var srcsetSplit = /\s*(\S*\s+[\d.]+[wx]),|(?:\s*,(?:\s+|(?=https?:)))/; +var DefaultNumImFetches = 30; +var FullImgQDrainLen = 10; +var DefaultNumAvFetches = 5; +var FullAVQDrainLen = 5; +var DataURLPrefix = 'data:'; + // the autofetcher instance for this worker var autofetcher = null; @@ -41,7 +47,7 @@ self.onmessage = function (event) { var data = event.data; switch (data.type) { case 'values': - autofetcher.autofetchMediaSrcset(data); + autofetcher.autoFetch(data); break; } }; @@ -53,53 +59,24 @@ function AutoFetcher(init) { this.prefix = init.prefix; this.mod = init.mod; this.prefixMod = init.prefix + init.mod; + this.rwRe = new RegExp(init.rwRe); // relative url, WorkerLocation is set by owning document this.relative = init.prefix.split(location.origin)[1]; // schemeless url this.schemeless = '/' + this.relative; // local cache of URLs fetched, to reduce server load this.seen = {}; - // array of URL to be fetched + // array of URLs to be fetched this.queue = []; + this.avQueue = []; // should we queue a URL or not this.queuing = false; + this.queuingAV = false; this.urlExtractor = this.urlExtractor.bind(this); - this.fetchDone = this.fetchDone.bind(this); + this.imgFetchDone = this.imgFetchDone.bind(this); + this.avFetchDone = this.avFetchDone.bind(this); } -AutoFetcher.prototype.fixupURL = function (url) { - // attempt to fix up the url and do our best to ensure we can get dat 200 OK! - if (url.indexOf(this.prefixMod) === 0) { - return url; - } - if (url.indexOf(this.relative) === 0) { - return url.replace(this.relative, this.prefix); - } - if (url.indexOf(this.schemeless) === 0) { - return url.replace(this.schemeless, this.prefix); - } - if (url.indexOf(this.prefix) !== 0) { - return this.prefix + url; - } - return url; -}; - -AutoFetcher.prototype.queueURL = function (url) { - // ensure we do not request data urls - if (url.indexOf('data:') === 0) return; - // check to see if we have seen this url before in order - // to lessen the load against the server content is fetched from - if (this.seen[url] != null) return; - this.seen[url] = true; - this.queue.push(url); -}; - -AutoFetcher.prototype.urlExtractor = function (match, n1, n2, n3, offset, string) { - // Same function as style_replacer in wombat.rewrite_style, n2 is our URL - this.queueURL(this.fixupURL(n2)); - return n1 + n2 + n3; -}; - AutoFetcher.prototype.delay = function () { // 2 second delay seem reasonable return new Promise(function (resolve, reject) { @@ -107,47 +84,105 @@ AutoFetcher.prototype.delay = function () { }); }; -AutoFetcher.prototype.fetchDone = function () { - this.queuing = false; +AutoFetcher.prototype.imgFetchDone = function () { if (this.queue.length > 0) { // we have a Q of some length drain it var autofetcher = this; this.delay().then(function () { - autofetcher.fetchAll(); + autofetcher.queuing = false; + autofetcher.fetchImgs(); }); + } else { + this.queuing = false; } }; -AutoFetcher.prototype.fetchAll = function () { +AutoFetcher.prototype.avFetchDone = function () { + if (this.avQueue.length > 0) { + // we have a Q of some length drain it + var autofetcher = this; + this.delay().then(function () { + autofetcher.queuingAV = false; + autofetcher.fetchAV(); + }); + } else { + this.queuingAV = false; + } +}; + +AutoFetcher.prototype.fetchAV = function () { + if (this.queuingAV || this.avQueue.length === 0) { + return; + } + // the number of fetches is limited to a maximum of DefaultNumAvFetches + FullAVQDrainLen outstanding fetches + // the baseline maximum number of fetches is DefaultNumAvFetches but if the size(avQueue) <= FullAVQDrainLen + // we add them to the current batch. Because audio video resources might be big + // we limit how many we fetch at a time drastically + this.queuingAV = true; + var runningFetchers = []; + while (this.avQueue.length > 0 && runningFetchers.length <= DefaultNumAvFetches) { + runningFetchers.push(fetch(this.avQueue.shift()).catch(noop)) + } + if (this.avQueue.length <= FullAVQDrainLen) { + while (this.avQueue.length > 0) { + runningFetchers.push(fetch(this.avQueue.shift()).catch(noop)) + } + } + Promise.all(runningFetchers) + .then(this.avFetchDone) + .catch(this.avFetchDone); +}; + +AutoFetcher.prototype.fetchImgs = function () { if (this.queuing || this.queue.length === 0) { return; } - // the number of fetches is limited to a maximum of 60 outstanding fetches - // the baseline maximum number of fetches is 50 but if the size(queue) <= 10 + // the number of fetches is limited to a maximum of DefaultNumImFetches + FullImgQDrainLen outstanding fetches + // the baseline maximum number of fetches is DefaultNumImFetches but if the size(queue) <= FullImgQDrainLen // we add them to the current batch this.queuing = true; var runningFetchers = []; - while (this.queue.length > 0 && runningFetchers.length <= 50) { + while (this.queue.length > 0 && runningFetchers.length <= DefaultNumImFetches) { runningFetchers.push(fetch(this.queue.shift()).catch(noop)) } - if (this.queue.length <= 10) { + if (this.queue.length <= FullImgQDrainLen) { while (this.queue.length > 0) { runningFetchers.push(fetch(this.queue.shift()).catch(noop)) } } Promise.all(runningFetchers) - .then(this.fetchDone) - .catch(this.fetchDone); + .then(this.imgFetchDone) + .catch(this.imgFetchDone); }; -AutoFetcher.prototype.extractMedia = function (mediaRules) { - // this is a broken down rewrite_style - if (mediaRules == null || mediaRules.values === null) return; - var rules = mediaRules.values; - for (var i = 0; i < rules.length; i++) { - var rule = rules[i]; - rule.replace(STYLE_REGEX, this.urlExtractor) - .replace(IMPORT_REGEX, this.urlExtractor); +AutoFetcher.prototype.queueNonAVURL = function (url) { + // ensure we do not request data urls + if (url.indexOf(DataURLPrefix) === 0) return; + // check to see if we have seen this url before in order + // to lessen the load against the server content is fetched from + if (this.seen[url] != null) return; + this.seen[url] = true; + this.queue.push(url); +}; + +AutoFetcher.prototype.queueAVURL = function (url) { + // ensure we do not request data urls + if (url.indexOf(DataURLPrefix) === 0) return; + // check to see if we have seen this url before in order + // to lessen the load against the server content is fetched from + if (this.seen[url] != null) return; + this.seen[url] = true; + this.avQueue.push(url); +}; + +AutoFetcher.prototype.maybeResolveURL = function (url, base) { + // given a url and base url returns a resolved full URL or + // null if resolution was unsuccessful + try { + var _url = new URL(url, base); + return _url.href; + } catch (e) { + return null; } }; @@ -163,85 +198,149 @@ AutoFetcher.prototype.maybeFixUpRelSchemelessPrefix = function (url) { return null; }; -AutoFetcher.prototype.maybeResolveURL = function (url, base) { - // given a url and base url returns a resolved full URL or - // null if resolution was unsuccessful - try { - var _url = new URL(url, base); - return _url.href; - } catch (e) { - return null; +AutoFetcher.prototype.maybeFixUpURL = function (url, resolveOpts) { + // attempt to fix up the url and do our best to ensure we can get dat 200 OK! + if (this.rwRe.test(url)) { + return url; + } + var mod = resolveOpts.mod || 'mp_'; + // first check for / (relative) or // (schemeless) rewritten urls + var maybeFixed = this.maybeFixUpRelSchemelessPrefix(url); + if (maybeFixed != null) { + return maybeFixed; + } + // resolve URL against tag src + if (resolveOpts.tagSrc != null) { + maybeFixed = this.maybeResolveURL(url, resolveOpts.tagSrc); + if (maybeFixed != null) { + return this.prefix + mod + '/' + maybeFixed; + } + } + // finally last attempt resolve the originating documents base URI + if (resolveOpts.docBaseURI) { + maybeFixed = this.maybeResolveURL(url, resolveOpts.docBaseURI); + if (maybeFixed != null) { + return this.prefix + mod + '/' + maybeFixed; + } + } + // not much to do now..... + return this.prefixMod + '/' + url; +}; + +AutoFetcher.prototype.urlExtractor = function (match, n1, n2, n3, offset, string) { + // Same function as style_replacer in wombat.rewrite_style, n2 is our URL + this.queueNonAVURL(n2); + return n1 + n2 + n3; +}; + +AutoFetcher.prototype.handleMedia = function (mediaRules) { + // this is a broken down rewrite_style + if (mediaRules == null || mediaRules.length === 0) return; + // var rules = mediaRules.values; + for (var i = 0; i < mediaRules.length; i++) { + mediaRules[i] + .replace(STYLE_REGEX, this.urlExtractor) + .replace(IMPORT_REGEX, this.urlExtractor); } }; - -AutoFetcher.prototype.fixupURLSrcSet = function (url, tagSrc, context) { - // attempt to fix up the url and do our best to ensure we can get dat 200 OK! - if (url.indexOf(this.prefix) !== 0) { - // first check for / (relative) or // (schemeless) rewritten urls - var maybeFixed = this.maybeFixUpRelSchemelessPrefix(url); - if (maybeFixed != null) { - return maybeFixed; +AutoFetcher.prototype.handleSrc = function (srcValues, context) { + var resolveOpts = { 'docBaseURI': context.docBaseURI }; + if (srcValues.value) { + resolveOpts.mod = srcValues.mod; + if (resolveOpts.mod === 1) { + return this.queueNonAVURL(this.maybeFixUpURL(srcValues.value.trim(), resolveOpts)); } - // resolve URL against tag src - if (tagSrc != null) { - maybeFixed = this.maybeResolveURL(url, tagSrc); - if (maybeFixed != null) { - return this.prefix + 'im_/' + maybeFixed; + return this.queueAVURL(this.maybeFixUpURL(srcValues.value.trim(), resolveOpts)); + } + var len = srcValues.values.length; + for (var i = 0; i < len; i++) { + var value = srcValues.values[i]; + resolveOpts.mod = value.mod; + if (resolveOpts.mod === 'im_') { + this.queueNonAVURL(this.maybeFixUpURL(value.src, resolveOpts)); + } else { + this.queueAVURL(this.maybeFixUpURL(value.src, resolveOpts)); + } + } +}; + +AutoFetcher.prototype.extractSrcSetNotPreSplit = function (ssV, resolveOpts) { + // was from extract from local doc so we need to duplicate work + var srcsetValues = ssV.split(srcsetSplit); + for (var i = 0; i < srcsetValues.length; i++) { + // grab the URL not width/height key + if (srcsetValues[i]) { + var value = srcsetValues[i].trim().split(' ')[0]; + var maybeResolvedURL = this.maybeFixUpURL(value.trim(), resolveOpts); + if (resolveOpts.mod === 'im_') { + this.queueNonAVURL(maybeResolvedURL); + } else { + this.queueAVURL(maybeResolvedURL); } } - // finally last attempt resolve the originating documents base URI - maybeFixed = this.maybeResolveURL(url, context.docBaseURI); - if (maybeFixed != null) { - return this.prefix + 'im_/' + maybeFixed; - } - // not much to do now..... - return this.prefixMod + '/' + url; } - return url; }; AutoFetcher.prototype.extractSrcset = function (srcsets, context) { - if (srcsets == null || srcsets.values == null) return; - var srcsetValues = srcsets.values; - if (!srcsets.presplit) { - // was from extract from local doc so we need to duplicate work - return this.srcsetNotPreSplit(srcsetValues, context); - } - // was rewrite_srcset so just ensure we just - for (var i = 0; i < srcsetValues.length; i++) { + // was rewrite_srcset and only need to q + for (var i = 0; i < srcsets.length; i++) { // grab the URL not width/height key - this.queueURL(srcsetValues[i].split(' ')[0]); - } -}; - -AutoFetcher.prototype.srcsetNotPreSplit = function (values, context) { - // was from extract from local doc so we need to duplicate work - var j; - for (var i = 0; i < values.length; i++) { - var srcsetValues = values[i].srcset.split(srcsetSplit); - var tagSrc = values[i].tagSrc; - for (j = 0; j < srcsetValues.length; j++) { - // grab the URL not width/height key - if (Boolean(srcsetValues[j])) { - var value = srcsetValues[j].trim().split(' ')[0]; - this.queueURL(this.fixupURLSrcSet(value, tagSrc, context)); - } + var url = srcsets[i].split(' ')[0]; + if (context.mod === 'im_') { + this.queueNonAVURL(url); + } else { + this.queueAVURL(url); } } }; -AutoFetcher.prototype.autofetchMediaSrcset = function (data) { +AutoFetcher.prototype.handleSrcset = function (srcset, context) { + var resolveOpts = { 'docBaseURI': context.docBaseURI }; + if (srcset.value) { + // we have a single value, this srcset came from either + // preserveDataSrcset (not presplit) preserveSrcset (presplit) + resolveOpts.mod = srcset.mod; + if (!srcset.presplit) { + // extract URLs from the srcset string + return this.extractSrcSetNotPreSplit(srcset.value, resolveOpts); + } + // we have an array of srcset URL strings + return this.extractSrcset(srcset.value, resolveOpts); + } + // we have an array of values, these srcsets came from extractFromLocalDoc + var len = srcset.values.length; + for (var i = 0; i < len; i++) { + var ssv = srcset.values[i]; + resolveOpts.mod = ssv.mod; + resolveOpts.tagSrc = ssv.tagSrc; + this.extractSrcSetNotPreSplit(ssv.srcset, resolveOpts); + } +}; + + +AutoFetcher.prototype.autoFetch = function (data) { // we got a message and now we autofetch! // these calls turn into no ops if they have no work - this.extractMedia(data.media); - this.extractSrcset(data.srcset, data.context); - this.fetchAll(); + if (data.media) { + this.handleMedia(data.media); + } + + if (data.src) { + this.handleSrc(data.src, data.context || {}); + } + + if (data.srcset) { + this.handleSrcset(data.srcset, data.context || {}); + } + + this.fetchImgs(); + this.fetchAV(); }; // initialize ourselves from the query params :) try { - var loc = new self.URL(location); + var loc = new self.URL(location.href); autofetcher = new AutoFetcher(JSON.parse(loc.searchParams.get('init'))); } catch (e) { // likely we are in an older version of safari diff --git a/pywb/static/autoFetchWorkerProxyMode.js b/pywb/static/autoFetchWorkerProxyMode.js index 813c5994..78931263 100644 --- a/pywb/static/autoFetchWorkerProxyMode.js +++ b/pywb/static/autoFetchWorkerProxyMode.js @@ -3,6 +3,11 @@ var STYLE_REGEX = /(url\s*\(\s*[\\"']*)([^)'"]+)([\\"']*\s*\))/gi; var IMPORT_REGEX = /(@import\s+[\\"']*)([^)'";]+)([\\"']*\s*;?)/gi; var srcsetSplit = /\s*(\S*\s+[\d.]+[wx]),|(?:\s*,(?:\s+|(?=https?:)))/; +var DefaultNumImFetches = 30; +var FullImgQDrainLen = 10; +var DefaultNumAvFetches = 5; +var FullAVQDrainLen = 5; +var DataURLPrefix = 'data:'; // the autofetcher instance for this worker var autofetcher = null; @@ -53,26 +58,119 @@ function AutoFetcher() { } // local cache of URLs fetched, to reduce server load this.seen = {}; - // array of URL to be fetched + // array of URLs to be fetched this.queue = []; + this.avQueue = []; // should we queue a URL or not this.queuing = false; // a URL to resolve relative URLs found in the cssText of CSSMedia rules. this.currentResolver = null; + // should we queue a URL or not + this.queuing = false; + this.queuingAV = false; this.urlExtractor = this.urlExtractor.bind(this); - this.fetchDone = this.fetchDone.bind(this); + this.imgFetchDone = this.imgFetchDone.bind(this); + this.avFetchDone = this.avFetchDone.bind(this); } -AutoFetcher.prototype.queueURL = function (url) { +AutoFetcher.prototype.delay = function () { + // 2 second delay seem reasonable + return new Promise(function (resolve, reject) { + setTimeout(resolve, 2000); + }); +}; + +AutoFetcher.prototype.imgFetchDone = function () { + if (this.queue.length > 0) { + // we have a Q of some length drain it + var autofetcher = this; + this.delay().then(function () { + autofetcher.queuing = false; + autofetcher.fetchImgs(); + }); + } else { + this.queuing = false; + } +}; + +AutoFetcher.prototype.avFetchDone = function () { + if (this.avQueue.length > 0) { + // we have a Q of some length drain it + var autofetcher = this; + this.delay().then(function () { + autofetcher.queuingAV = false; + autofetcher.fetchAV(); + }); + } else { + this.queuingAV = false; + } +}; + +AutoFetcher.prototype.fetchAV = function () { + if (this.queuingAV || this.avQueue.length === 0) { + return; + } + // the number of fetches is limited to a maximum of DefaultNumAvFetches + FullAVQDrainLen outstanding fetches + // the baseline maximum number of fetches is DefaultNumAvFetches but if the size(avQueue) <= FullAVQDrainLen + // we add them to the current batch. Because audio video resources might be big + // we limit how many we fetch at a time drastically + this.queuingAV = true; + var runningFetchers = []; + while (this.avQueue.length > 0 && runningFetchers.length <= DefaultNumAvFetches) { + runningFetchers.push(fetch(this.avQueue.shift()).catch(noop)) + } + if (this.avQueue.length <= FullAVQDrainLen) { + while (this.avQueue.length > 0) { + runningFetchers.push(fetch(this.avQueue.shift()).catch(noop)) + } + } + Promise.all(runningFetchers) + .then(this.avFetchDone) + .catch(this.avFetchDone); +}; + +AutoFetcher.prototype.fetchImgs = function () { + if (this.queuing || this.queue.length === 0) { + return; + } + // the number of fetches is limited to a maximum of DefaultNumImFetches + FullImgQDrainLen outstanding fetches + // the baseline maximum number of fetches is DefaultNumImFetches but if the size(queue) <= FullImgQDrainLen + // we add them to the current batch + this.queuing = true; + var runningFetchers = []; + while (this.queue.length > 0 && runningFetchers.length <= DefaultNumImFetches) { + runningFetchers.push(fetch(this.queue.shift()).catch(noop)) + } + if (this.queue.length <= FullImgQDrainLen) { + while (this.queue.length > 0) { + runningFetchers.push(fetch(this.queue.shift()).catch(noop)) + } + } + Promise.all(runningFetchers) + .then(this.imgFetchDone) + .catch(this.imgFetchDone); +}; + +AutoFetcher.prototype.queueNonAVURL = function (url) { // ensure we do not request data urls - if (url.indexOf('data:') === 0) return; + if (url.indexOf(DataURLPrefix) === 0) return; // check to see if we have seen this url before in order - // to lessen the load against the server content is autofetchd from + // to lessen the load against the server content is fetched from if (this.seen[url] != null) return; this.seen[url] = true; this.queue.push(url); }; +AutoFetcher.prototype.queueAVURL = function (url) { + // ensure we do not request data urls + if (url.indexOf(DataURLPrefix) === 0) return; + // check to see if we have seen this url before in order + // to lessen the load against the server content is fetched from + if (this.seen[url] != null) return; + this.seen[url] = true; + this.avQueue.push(url); +}; + AutoFetcher.prototype.safeResolve = function (url, resolver) { // Guard against the exception thrown by the URL constructor if the URL or resolver is bad // if resolver is undefined/null then this function passes url through @@ -95,52 +193,11 @@ AutoFetcher.prototype.urlExtractor = function (match, n1, n2, n3, offset, string // (resolvedURL will be undefined if an error occurred) var resolvedURL = this.safeResolve(n2, this.currentResolver); if (resolvedURL) { - this.queueURL(resolvedURL); + this.queueNonAVURL(resolvedURL); } return n1 + n2 + n3; }; -AutoFetcher.prototype.delay = function () { - // 2 second delay seem reasonable - return new Promise(function (resolve, reject) { - setTimeout(resolve, 2000); - }); -}; - -AutoFetcher.prototype.fetchDone = function () { - this.queuing = false; - if (this.queue.length > 0) { - // we have a Q of some length drain it - var autofetcher = this; - // wait 2 seconds before doing another batch - this.delay().then(function () { - autofetcher.fetchAll(); - }); - } -}; - -AutoFetcher.prototype.fetchAll = function () { - if (this.queuing || this.queue.length === 0) { - return; - } - // the number of fetches is limited to a maximum of 60 outstanding fetches - // the baseline maximum number of fetches is 50 but if the size(queue) <= 10 - // we add them to the current batch this.queuing = true; - this.queuing = true; - var runningFetchers = []; - while (this.queue.length > 0 && runningFetchers.length <= 50) { - runningFetchers.push(fetch(this.queue.shift()).catch(noop)) - } - if (this.queue.length <= 10) { - while (this.queue.length > 0) { - runningFetchers.push(fetch(this.queue.shift()).catch(noop)) - } - } - Promise.all(runningFetchers) - .then(this.fetchDone) - .catch(this.fetchDone); -}; - AutoFetcher.prototype.extractMedia = function (mediaRules) { // this is a broken down rewrite_style if (mediaRules == null) return; @@ -165,13 +222,17 @@ AutoFetcher.prototype.extractSrcset = function (srcsets) { extractedSrcSet = srcsets[i]; ssSplit = extractedSrcSet.srcset.split(srcsetSplit); for (j = 0; j < ssSplit.length; j++) { - if (Boolean(ssSplit[j])) { + if (ssSplit[j]) { srcsetValue = ssSplit[j].trim(); if (srcsetValue.length > 0) { // resolve the URL in an exceptionless manner (resolvedURL will be undefined if an error occurred) var resolvedURL = this.safeResolve(srcsetValue.split(' ')[0], extractedSrcSet.resolve); if (resolvedURL) { - this.queueURL(resolvedURL); + if (extractedSrcSet.mod === 'im_') { + this.queueNonAVURL(resolvedURL); + } else { + this.queueAVURL(resolvedURL); + } } } } @@ -179,12 +240,34 @@ AutoFetcher.prototype.extractSrcset = function (srcsets) { } }; +AutoFetcher.prototype.extractSrc = function (srcVals) { + // preservation worker in proxy mode sends us the value of the srcset attribute of an element + // and a URL to correctly resolve relative URLS. Thus we must recreate rewrite_srcset logic here + if (srcVals == null || srcVals.length === 0) return; + var length = srcVals.length; + var srcVal; + for (var i = 0; i < length; i++) { + srcVal = srcVals[i]; + var resolvedURL = this.safeResolve(srcVal.src, srcVal.resolve); + if (resolvedURL) { + if (srcVal.mod === 'im_') { + this.queueNonAVURL(resolvedURL); + } else { + this.queueAVURL(resolvedURL); + } + } + } +}; + + AutoFetcher.prototype.autofetchMediaSrcset = function (data) { // we got a message and now we autofetch! // these calls turn into no ops if they have no work this.extractMedia(data.media); this.extractSrcset(data.srcset); - this.fetchAll(); + this.extractSrc(data.src); + this.fetchImgs(); + this.fetchAV(); }; autofetcher = new AutoFetcher(); diff --git a/pywb/static/wombat.js b/pywb/static/wombat.js index 87df270d..ef256b2a 100644 --- a/pywb/static/wombat.js +++ b/pywb/static/wombat.js @@ -186,13 +186,30 @@ var _WBWombat = function($wbwindow, wbinfo) { } } - function isImageSrcset(elem) { - if (elem.tagName === 'IMG') return true; - return elem.tagName === 'SOURCE' && elem.parentElement && elem.parentElement.tagName === 'PICTURE'; + function isSavedSrcSrcset(elem) { + // returns true or false to indicate if the supplied element may have attributes that are auto-fetched + switch (elem.tagName) { + case 'IMG': + case 'VIDEO': + case 'AUDIO': + return true; + case 'SOURCE': + if (!elem.parentElement) return false; + switch (elem.parentElement.tagName) { + case 'PICTURE': + case 'VIDEO': + case 'AUDIO': + return true; + default: + return false; + } + default: + return false; + } } - function isImageDataSrcset(elem) { - if (isImageSrcset(elem)) return elem.dataset.srcset != null; + function isSavedDataSrcSrcset(elem) { + if (elem.dataset.srcset != null) return isSavedSrcSrcset(elem); return false; } @@ -1162,7 +1179,7 @@ var _WBWombat = function($wbwindow, wbinfo) { } else if (lowername == "style") { value = rewrite_style(value); } else if (lowername == "srcset") { - value = rewrite_srcset(value, isImageSrcset(this)); + value = rewrite_srcset(value, this); } } orig_setAttribute.call(this, name, value); @@ -1347,25 +1364,35 @@ var _WBWombat = function($wbwindow, wbinfo) { } //============================================ - function initAutoFetchWorker() { + function initAutoFetchWorker(rwRe) { if (!wbUseAFWorker) { return; } var isTop = $wbwindow === $wbwindow.__WB_replay_top; - function AutoFetchWorker(prefix, mod) { + function AutoFetchWorker(opts) { if (!(this instanceof AutoFetchWorker)) { - return new AutoFetchWorker(prefix, mod); + return new AutoFetchWorker(opts); } - this.checkIntervalCB = this.checkIntervalCB.bind(this); + // specifically target the elements we desire + this.elemSelector = ['img', 'source', 'video', 'audio'].map(function (which) { + if (which === 'source') { + return ['picture > ', 'video > ', 'audio >'].map(function (parent) { + return parent + which + '[srcset], ' + parent + which + '[data-srcset], ' + parent + which + '[data-src]' + }).join(', '); + } else { + return which + '[srcset], ' + which + '[data-srcset], ' + which + '[data-src]'; + } + }).join(', '); + if (isTop) { // we are top and can will own this worker // setup URL for the kewl case // Normal replay and preservation mode pworker setup, its all one origin so YAY! - var workerURL = wbinfo.static_prefix + + var workerURL = (wbinfo.auto_fetch_worker_prefix || wbinfo.static_prefix) + 'autoFetchWorker.js?init='+ - encodeURIComponent(JSON.stringify({ 'mod': mod, 'prefix': prefix })); + encodeURIComponent(JSON.stringify(opts)); this.worker = new $wbwindow.Worker(workerURL); } else { // add only the portions of the worker interface we use since we are not top and if in proxy mode start check polling @@ -1381,20 +1408,17 @@ var _WBWombat = function($wbwindow, wbinfo) { } } - AutoFetchWorker.prototype.checkIntervalCB = function () { - this.extractFromLocalDoc(); - }; - AutoFetchWorker.prototype.deferredSheetExtraction = function (sheet) { var rules = sheet.cssRules || sheet.rules; // if no rules this a no op if (!rules || rules.length === 0) return; - var self = this; - function extract() { + var afw = this; + // defer things until next time the Promise.resolve Qs are cleared + $wbwindow.Promise.resolve().then(function () { // loop through each rule of the stylesheet var media = []; - for (var j = 0; j < rules.length; ++j) { - var rule = rules[j]; + for (var i = 0; i < rules.length; ++i) { + var rule = rules[i]; if (rule.type === CSSRule.MEDIA_RULE) { // we are a media rule so get its text media.push(rule.cssText); @@ -1402,11 +1426,9 @@ var _WBWombat = function($wbwindow, wbinfo) { } if (media.length > 0) { // we have some media rules to preserve - self.preserveMedia(media); + afw.preserveMedia(media); } - } - // defer things until next time the Promise.resolve Qs are cleared - $wbwindow.Promise.resolve().then(extract); + }); }; AutoFetchWorker.prototype.terminate = function () { @@ -1416,29 +1438,29 @@ var _WBWombat = function($wbwindow, wbinfo) { AutoFetchWorker.prototype.postMessage = function (msg, deferred) { if (deferred) { - var self = this; + var afw = this; return Promise.resolve().then(function () { - self.worker.postMessage(msg); + afw.worker.postMessage(msg); }); } this.worker.postMessage(msg); }; - AutoFetchWorker.prototype.preserveSrcset = function (srcset) { + AutoFetchWorker.prototype.preserveSrcset = function (srcset, mod) { // send values from rewrite_srcset to the worker deferred // to ensure the page viewer sees the images first this.postMessage({ 'type': 'values', - 'srcset': {'values': srcset, 'presplit': true}, + 'srcset': { 'value': srcset, 'mod': mod, 'presplit': true }, }, true); }; - AutoFetchWorker.prototype.preserveDataSrcset = function (srcset) { + AutoFetchWorker.prototype.preserveDataSrcset = function (elem) { // send values from rewrite_attr srcset to the worker deferred // to ensure the page viewer sees the images first this.postMessage({ 'type': 'values', - 'srcset': {'values': srcset, 'presplit': false}, + 'srcset': {'value': elem.dataset.srcset, 'mod': this.rwMod(elem), 'presplit': false}, }, true); }; @@ -1447,91 +1469,86 @@ var _WBWombat = function($wbwindow, wbinfo) { this.postMessage({'type': 'values', 'media': media}, true); }; - AutoFetchWorker.prototype.extractSrcset = function (elem) { + AutoFetchWorker.prototype.getSrcset = function (elem) { if (wb_getAttribute) { return wb_getAttribute.call(elem, 'srcset'); } return elem.getAttribute('srcset'); }; - AutoFetchWorker.prototype.checkForPictureSourceDataSrcsets = function () { - var dataSS = $wbwindow.document.querySelectorAll('img[data-srcset], source[data-srcset]'); - var elem; - var srcset = []; - for (var i = 0; i < dataSS.length; i++) { - elem = dataSS[i]; - if (elem.tagName === 'SOURCE') { - if (elem.parentElement && elem.parentElement.tagName === 'PICTURE' && elem.dataset.srcset) { - srcset.push({srcset: elem.dataset.srcset}); - } - } else if (elem.dataset.srcset) { - srcset.push({srcset: elem.dataset.srcset}); - } - } - if (srcset.length) { - this.postMessage({ - 'type': 'values', - 'srcset': {'values': srcset, 'presplit': false}, - 'context': { - 'docBaseURI': $wbwindow.document.baseURI - } - }, true); - } - }; - - AutoFetchWorker.prototype.extractImgPictureSourceSrcsets = function () { - var i; - var elem = null; - var srcset = []; - var ssElements = $wbwindow.document.querySelectorAll('img[srcset], source[srcset]'); - for (i = 0; i < ssElements.length; i++) { - elem = ssElements[i]; - if (elem.tagName === 'SOURCE') { - if (elem.parentElement && elem.parentElement.tagName === 'PICTURE') { - srcset.push({srcset: this.extractSrcset(elem)}); - } - } else { - srcset.push({tagSrc: elem.src, srcset: this.extractSrcset(elem)}); - } - } - return srcset; + AutoFetchWorker.prototype.rwMod = function (elem) { + return elem.tagName === "SOURCE" ? + elem.parentElement.tagName === "PICTURE" ? 'im_' : 'oe_' + : elem.tagName === "IMG" ? 'im_' : 'oe_'; }; AutoFetchWorker.prototype.extractFromLocalDoc = function () { - // get the values to be preserved from the documents stylesheets - // and all elements with a srcset - var media = []; - var sheets = $wbwindow.document.styleSheets; - var i = 0; - for (; i < sheets.length; ++i) { - var rules = sheets[i].cssRules; - for (var j = 0; j < rules.length; ++j) { - var rule = rules[j]; - if (rule.type === CSSRule.MEDIA_RULE) { - media.push(rule.cssText); + // get the values to be preserved from the documents stylesheets + // and all img, video, audio elements with (data-)?srcset or data-src + var afw = this; + Promise.resolve().then(function () { + var msg = { 'type': 'values', 'context': { 'docBaseURI': $wbwindow.document.baseURI } }; + var media = []; + var i = 0; + var sheets = $wbwindow.document.styleSheets; + for (; i < sheets.length; ++i) { + var rules = sheets[i].cssRules; + for (var j = 0; j < rules.length; ++j) { + var rule = rules[j]; + if (rule.type === CSSRule.MEDIA_RULE) { + media.push(rule.cssText); + } } } - } - var srcset = this.extractImgPictureSourceSrcsets(); - // send the extracted values to the worker deferred - // to ensure the page viewer sees the images first - this.postMessage({ - 'type': 'values', - 'media': media, - 'srcset': {'values': srcset, 'presplit': false}, - 'context': { - 'docBaseURI': $wbwindow.document.baseURI + var elems = $wbwindow.document.querySelectorAll(afw.elemSelector); + var srcset = { 'values': [], 'presplit': false }; + var src = { 'values': [] }; + var elem, srcv, mod; + for (i = 0; i < elems.length; ++i) { + elem = elems[i]; + // we want the original src value in order to resolve URLs in the worker when needed + srcv = elem.src ? elem.src : null; + // a from value of 1 indicates images and a 2 indicates audio/video + mod = afw.rwMod(elem); + if (elem.srcset) { + srcset.values.push({ + 'srcset': afw.getSrcset(elem), + 'mod': mod, + 'tagSrc': srcv + }); + } + if (elem.dataset.srcset) { + srcset.values.push({ + 'srcset': elem.dataset.srcset, + 'mod': mod, + 'tagSrc': srcv + }); + } + if (elem.dataset.src) { + src.values.push({'src': elem.dataset.src, 'mod': mod}); + } + if (elem.tagName === "SOURCE" && srcv) { + src.values.push({'src': srcv, 'mod': mod}); + } + } + if (media.length) { + msg.media = media; + } + if (srcset.values.length) { + msg.srcset = srcset; + } + if (src.values.length) { + msg.src = src; + } + if (msg.media || msg.srcset || msg.src) { + afw.postMessage(msg); } - }, true); - // deffer the checking of img/source data-srcset - // so that we do not clobber the UI thread - var self = this; - Promise.resolve().then(function () { - self.checkForPictureSourceDataSrcsets(); }); }; - WBAutoFetchWorker = new AutoFetchWorker(wb_abs_prefix, wbinfo.mod); + WBAutoFetchWorker = new AutoFetchWorker({ + 'prefix': wb_abs_prefix, 'mod': wbinfo.mod, 'rwRe': rwRe + }); wbSheetMediaQChecker = function checkStyle() { // used only for link[rel='stylesheet'] so we remove our listener @@ -1680,7 +1697,7 @@ var _WBWombat = function($wbwindow, wbinfo) { } else if (name == "style") { new_value = rewrite_style(value); } else if (name == "srcset") { - new_value = rewrite_srcset(value, isImageSrcset(elem)); + new_value = rewrite_srcset(value, elem); } else { // Only rewrite if absolute url if (abs_url_only && !starts_with(value, VALID_PREFIXES)) { @@ -1688,8 +1705,8 @@ var _WBWombat = function($wbwindow, wbinfo) { } var mod = rwModForElement(elem, name); new_value = rewrite_url(value, false, mod, elem.ownerDocument); - if (wbUseAFWorker && isImageDataSrcset(elem)) { - WBAutoFetchWorker.preserveDataSrcset(elem.dataset.srcset); + if (wbUseAFWorker && isSavedDataSrcSrcset(elem)) { + WBAutoFetchWorker.preserveDataSrcset(elem); } } @@ -1704,7 +1721,7 @@ var _WBWombat = function($wbwindow, wbinfo) { function style_replacer(match, n1, n2, n3, offset, string) { return n1 + rewrite_url(n2) + n3; } - + function rewrite_style(value) { if (!value) { @@ -1725,7 +1742,7 @@ var _WBWombat = function($wbwindow, wbinfo) { } //============================================ - function rewrite_srcset(value, isImage) + function rewrite_srcset(value, elem) { if (!value) { return ""; @@ -1738,9 +1755,9 @@ var _WBWombat = function($wbwindow, wbinfo) { values[i] = rewrite_url(values[i].trim()); } - if (wbUseAFWorker && isImage) { + if (wbUseAFWorker && isSavedSrcSrcset(elem)) { // send post split values to preservation worker - WBAutoFetchWorker.preserveSrcset(values); + WBAutoFetchWorker.preserveSrcset(values, WBAutoFetchWorker.rwMod(elem)); } return values.join(", "); } @@ -1869,6 +1886,9 @@ var _WBWombat = function($wbwindow, wbinfo) { changed = rewrite_attr(elem, 'src'); changed = rewrite_attr(elem, 'srcset') || changed; changed = rewrite_attr(elem, 'style') || changed; + if (wbUseAFWorker && elem.dataset.srcset) { + WBAutoFetchWorker.preserveDataSrcset(elem); + } break; case 'OBJECT': changed = rewrite_attr(elem, "data", true); @@ -2097,7 +2117,7 @@ var _WBWombat = function($wbwindow, wbinfo) { if (mod == "cs_" && orig.indexOf("data:text/css") == 0) { val = rewrite_inline_style(orig); } else if (attr == "srcset") { - val = rewrite_srcset(orig, isImageSrcset(this)); + val = rewrite_srcset(orig, this); } else if (this.tagName === 'LINK' && attr === 'href') { var relV = this.rel; if (relV === 'import' || relV === 'preload') { @@ -2213,7 +2233,7 @@ var _WBWombat = function($wbwindow, wbinfo) { override_style_attr(style_proto, "borderImageSource", "border-image-source"); override_style_setProp(style_proto); - + if ($wbwindow.CSSStyleSheet && $wbwindow.CSSStyleSheet.prototype) { // https://developer.mozilla.org/en-US/docs/Web/API/CSSStyleSheet/insertRule // ruleText is a string of raw css.... @@ -2223,7 +2243,7 @@ var _WBWombat = function($wbwindow, wbinfo) { }; } } - + //============================================ function override_style_setProp(style_proto) { var orig_setProp = style_proto.setProperty; @@ -2439,7 +2459,7 @@ var _WBWombat = function($wbwindow, wbinfo) { Object.defineProperty($wbwindow.FontFace.prototype, "constructor", {value: $wbwindow.FontFace}); $wbwindow.FontFace.__wboverriden__ = true; } - + //============================================ function overrideTextProtoGetSet(textProto, whichProp) { var orig_getter = get_orig_getter(textProto, whichProp); @@ -2464,7 +2484,7 @@ var _WBWombat = function($wbwindow, wbinfo) { }; def_prop(textProto, whichProp, setter, getter); } - + function overrideTextProtoFunction(textProto, whichFN) { var original = textProto[whichFN]; textProto[whichFN] = function () { @@ -2491,7 +2511,7 @@ var _WBWombat = function($wbwindow, wbinfo) { return original.apply(this, args); }; } - + function initTextNodeOverrides($wbwindow) { if (!$wbwindow.Text || !$wbwindow.Text.prototype) return; // https://dom.spec.whatwg.org/#characterdata and https://dom.spec.whatwg.org/#interface-text @@ -2507,7 +2527,7 @@ var _WBWombat = function($wbwindow, wbinfo) { overrideTextProtoGetSet(textProto, 'data'); overrideTextProtoGetSet(textProto, 'wholeText'); } - + //============================================ function init_wombat_loc(win) { @@ -3847,14 +3867,14 @@ var _WBWombat = function($wbwindow, wbinfo) { initFontFaceOverride($wbwindow); // Worker override (experimental) - initAutoFetchWorker(); + initAutoFetchWorker(rx); init_web_worker_override(); init_service_worker_override(); initSharedWorkerOverride(); - + // text node overrides for js frameworks doing funky things with CSS initTextNodeOverrides($wbwindow); - + // innerHTML can be overriden on prototype! override_html_assign($wbwindow.HTMLElement, "innerHTML", true); override_html_assign($wbwindow.HTMLElement, "outerHTML", true); diff --git a/pywb/static/wombatProxyMode.js b/pywb/static/wombatProxyMode.js index 1e6ca162..a4d38857 100644 --- a/pywb/static/wombatProxyMode.js +++ b/pywb/static/wombatProxyMode.js @@ -29,55 +29,56 @@ var _WBWombat = function ($wbwindow, wbinfo) { wbinfo.wombat_opts = wbinfo.wombat_opts || {}; var wbAutoFetchWorkerPrefix = (wb_info.auto_fetch_worker_prefix || wb_info.static_prefix) + 'autoFetchWorkerProxyMode.js'; var WBAutoFetchWorker; - + function init_seeded_random(seed) { // Adapted from: // http://indiegamr.com/generate-repeatable-random-numbers-in-js/ - + $wbwindow.Math.seed = parseInt(seed); - + function seeded_random() { $wbwindow.Math.seed = ($wbwindow.Math.seed * 9301 + 49297) % 233280; var rnd = $wbwindow.Math.seed / 233280; - + return rnd; } - + $wbwindow.Math.random = seeded_random; } - + function init_crypto_random() { if (!$wbwindow.crypto || !$wbwindow.Crypto) { return; } - + var orig_getrandom = $wbwindow.Crypto.prototype.getRandomValues; - + var new_getrandom = function (array) { for (var i = 0; i < array.length; i++) { array[i] = parseInt($wbwindow.Math.random() * 4294967296); } return array; }; - + $wbwindow.Crypto.prototype.getRandomValues = new_getrandom; $wbwindow.crypto.getRandomValues = new_getrandom; } - + //============================================ function init_fixed_ratio() { // otherwise, just set it $wbwindow.devicePixelRatio = 1; - + // prevent changing, if possible if (Object.defineProperty) { try { // fixed pix ratio Object.defineProperty($wbwindow, "devicePixelRatio", {value: 1, writable: false}); - } catch (e) {} + } catch (e) { + } } } - + //======================================== function init_date_override(timestamp) { timestamp = parseInt(timestamp) * 1000; @@ -86,19 +87,19 @@ var _WBWombat = function ($wbwindow, wbinfo) { var timezone = 0; var start_now = $wbwindow.Date.now(); var timediff = start_now - (timestamp - timezone); - + if ($wbwindow.__wb_Date_now) { return; } - + var orig_date = $wbwindow.Date; - + var orig_utc = $wbwindow.Date.UTC; var orig_parse = $wbwindow.Date.parse; var orig_now = $wbwindow.Date.now; - + $wbwindow.__wb_Date_now = orig_now; - + $wbwindow.Date = function (Date) { return function (A, B, C, D, E, F, G) { // Apply doesn't work for constructors and Date doesn't @@ -123,21 +124,21 @@ var _WBWombat = function ($wbwindow, wbinfo) { } } }($wbwindow.Date); - + $wbwindow.Date.prototype = orig_date.prototype; - + $wbwindow.Date.now = function () { return orig_now() - timediff; }; - + $wbwindow.Date.UTC = orig_utc; $wbwindow.Date.parse = orig_parse; - + $wbwindow.Date.__WB_timediff = timediff; - + Object.defineProperty($wbwindow.Date.prototype, "constructor", {value: $wbwindow.Date}); } - + //============================================ function init_disable_notifications() { if (window.Notification) { @@ -145,36 +146,46 @@ var _WBWombat = function ($wbwindow, wbinfo) { if (callback) { callback("denied"); } - + return Promise.resolve("denied"); }; } - + if (window.geolocation) { var disabled = function (success, error, options) { if (error) { error({"code": 2, "message": "not available"}); } }; - + window.geolocation.getCurrentPosition = disabled; window.geolocation.watchPosition = disabled; } } - + function initAutoFetchWorker() { if (!$wbwindow.Worker) { return; } - + var isTop = $wbwindow.self === $wbwindow.top; - + function AutoFetchWorkerProxyMode() { if (!(this instanceof AutoFetchWorkerProxyMode)) { return new AutoFetchWorkerProxyMode(); } this.checkIntervalTime = 15000; this.checkIntervalCB = this.checkIntervalCB.bind(this); + this.elemSelector = ['img', 'source', 'video', 'audio'].map(function (which) { + if (which === 'source') { + return ['picture > ', 'video > ', 'audio >'].map(function (parent) { + return parent + which + '[srcset], ' + parent + which + '[data-srcset], ' + parent + which + '[data-src]' + }).join(', '); + } else { + return which + '[srcset], ' + which + '[data-srcset], ' + which + '[data-src]'; + } + }).join(', '); + if (isTop) { // Cannot directly load our worker from the proxy origin into the current origin // however we fetch it from proxy origin and can blob it into the current origin :) @@ -200,12 +211,13 @@ var _WBWombat = function ($wbwindow, wbinfo) { } $wbwindow.top.postMessage(msg, '*'); }, - "terminate": function () {} + "terminate": function () { + } }; this.startCheckingInterval(); } } - + AutoFetchWorkerProxyMode.prototype.startCheckingInterval = function () { // if document ready state is complete do first extraction and start check polling // otherwise wait for document ready state to complete to extract and start check polling @@ -223,16 +235,16 @@ var _WBWombat = function ($wbwindow, wbinfo) { }, 1000); } }; - + AutoFetchWorkerProxyMode.prototype.checkIntervalCB = function () { this.extractFromLocalDoc(); }; - + AutoFetchWorkerProxyMode.prototype.terminate = function () { // terminate the worker, a no op when not replay top this.worker.terminate(); }; - + AutoFetchWorkerProxyMode.prototype.postMessage = function (msg, deferred) { if (deferred) { var self = this; @@ -242,7 +254,7 @@ var _WBWombat = function ($wbwindow, wbinfo) { } this.worker.postMessage(msg); }; - + AutoFetchWorkerProxyMode.prototype.extractMediaRules = function (rules, href) { // We are in proxy mode and must include a URL to resolve relative URLs in media rules if (!rules) return []; @@ -257,7 +269,7 @@ var _WBWombat = function ($wbwindow, wbinfo) { } return text; }; - + AutoFetchWorkerProxyMode.prototype.corsCSSFetch = function (href) { // because this JS in proxy mode operates as it would on the live web // the rules of CORS apply and we cannot rely on URLs being rewritten correctly @@ -274,70 +286,50 @@ var _WBWombat = function ($wbwindow, wbinfo) { return []; }); }; - + AutoFetchWorkerProxyMode.prototype.shouldSkipSheet = function (sheet) { // we skip extracting rules from sheets if they are from our parsing style or come from pywb if (sheet.id === '$wrStyleParser$') return true; return !!(sheet.href && sheet.href.indexOf(wb_info.proxy_magic) !== -1); }; - AutoFetchWorkerProxyMode.prototype.extractImgPictureSourceSrcsets = function () { - var i; - var elem; - var srcset = []; + AutoFetchWorkerProxyMode.prototype.getImgAVElems = function () { + var elem, srcv, mod; + var results = { 'srcset': [], 'src': []} ; var baseURI = $wbwindow.document.baseURI; - var ssElements = $wbwindow.document.querySelectorAll('img[srcset], source[srcset]'); - for (i = 0; i < ssElements.length; i++) { - elem = ssElements[i]; - if (elem.tagName === 'SOURCE') { - if (elem.parentElement && elem.parentElement.tagName === 'PICTURE') { - srcset.push({srcset: elem.srcset, resolve: baseURI}); - } - } else { - srcset.push({ - srcset: elem.srcset, - resolve: elem.src != null && elem.src !== ' ' ? elem.src : baseURI - }); + var elems = $wbwindow.document.querySelectorAll(this.elemSelector); + for (var i = 0; i < elems.length; i++) { + elem = elems[i]; + // we want the original src value in order to resolve URLs in the worker when needed + srcv = elem.src ? elem.src : null; + // get the correct mod in order to inform the backing worker where the URL(s) are from + mod = elem.tagName === "SOURCE" ? + elem.parentElement.tagName === "PICTURE" ? 'im_' : 'oe_' + : elem.tagName === "IMG" ? 'im_' : 'oe_'; + if (elem.srcset) { + results.srcset.push({ 'srcset': elem.srcset, 'resolve': srcv || baseURI, 'mod': mod }); + } + if (elem.dataset.srcset) { + results.srcset.push({ 'srcset': elem.dataset.srcset, 'resolve': srcv || baseURI, 'mod': mod }); + } + if (elem.dataset.src) { + results.src.push({'src': elem.dataset.src, 'resolve': srcv || baseURI, 'mod': mod}); + } + if (elem.tagName === "SOURCE" && srcv) { + results.src.push({'src': srcv, 'resolve': baseURI, 'mod': mod}); } } - return srcset; + return results; }; - AutoFetchWorkerProxyMode.prototype.checkForPictureSourceDataSrcsets = function () { - var baseURI = $wbwindow.document.baseURI; - var dataSS = $wbwindow.document.querySelectorAll('img[data-srcset], source[data-srcset]'); - var elem; - var srcset = []; - for (var i = 0; i < dataSS.length; i++) { - elem = dataSS[i]; - if (elem.tagName === 'SOURCE') { - if (elem.parentElement && elem.parentElement.tagName === 'PICTURE' && elem.dataset.srcset) { - srcset.push({srcset: elem.dataset.srcset, resolve: baseURI}); - } - } else if (elem.dataset.srcset) { - srcset.push({srcset: elem.dataset.srcset, resolve: elem.src != null && elem.src !== ' ' ? elem.src : baseURI}); - } - } - if (srcset.length) { - this.postMessage({ - 'type': 'values', - 'srcset': {'values': srcset, 'presplit': false}, - 'context': { - 'docBaseURI': $wbwindow.document.baseURI - } - }, true); - } - }; - AutoFetchWorkerProxyMode.prototype.extractFromLocalDoc = function () { - var i = 0; var media = []; var deferredMediaURLS = []; var sheet; var resolve; // We must use the window reference passed to us to access this origins stylesheets var styleSheets = $wbwindow.document.styleSheets; - for (; i < styleSheets.length; ++i) { + for (var i = 0; i < styleSheets.length; i++) { sheet = styleSheets[i]; // if the sheet belongs to our parser node we must skip it if (!this.shouldSkipSheet(sheet)) { @@ -360,13 +352,22 @@ var _WBWombat = function ($wbwindow, wbinfo) { } // We must use the window reference passed to us to access this origins elements with srcset attr // like cssRule handling we must include a URL to resolve relative URLs by - var srcset = this.extractImgPictureSourceSrcsets(); - + var results = this.getImgAVElems(); + var msg = { 'type': 'values' }; // send what we have extracted, if anything, to the worker for processing - if (media.length > 0 || srcset.length > 0) { - this.postMessage({'type': 'values', 'media': media, 'srcset': srcset}, true); + if (media.length > 0) { + msg.media = media; } - + if (results.srcset) { + msg.srcset = results.srcset; + } + if (results.src) { + msg.src = results.src; + } + if (msg.media || msg.srcset || msg.src) { + this.postMessage(msg); + } + if (deferredMediaURLS.length > 0) { // wait for all our deferred fetching and extraction of cross origin // stylesheets to complete and then send those values, if any, to the worker @@ -381,16 +382,10 @@ var _WBWombat = function ($wbwindow, wbinfo) { } }); } - // deffer the checking of img/source data-srcset - // so that we do not clobber the UI thread - var self = this; - Promise.resolve().then(function () { - self.checkForPictureSourceDataSrcsets(); - }); }; - + WBAutoFetchWorker = new AutoFetchWorkerProxyMode(); - + if (isTop) { $wbwindow.addEventListener("message", function (event) { if (event.data && event.data.wb_type === 'aaworker') { @@ -399,11 +394,11 @@ var _WBWombat = function ($wbwindow, wbinfo) { }, false); } } - + if (wbinfo.enable_auto_fetch && wbinfo.is_live) { initAutoFetchWorker(); } - + // proxy mode overrides // Random init_seeded_random(wbinfo.wombat_sec); @@ -425,13 +420,13 @@ var _WBWombat = function ($wbwindow, wbinfo) { window._WBWombat = _WBWombat; -window._WBWombatInit = function(wbinfo) { - if (!this._wb_wombat || !this._wb_wombat.actual) { - this._wb_wombat = new _WBWombat(this, wbinfo); - this._wb_wombat.actual = true; - } else if (!this._wb_wombat) { - console.warn("_wb_wombat missing!"); - } +window._WBWombatInit = function (wbinfo) { + if (!this._wb_wombat || !this._wb_wombat.actual) { + this._wb_wombat = new _WBWombat(this, wbinfo); + this._wb_wombat.actual = true; + } else if (!this._wb_wombat) { + console.warn("_wb_wombat missing!"); + } };