mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-14 15:53:28 +01:00
pywb:
- Fix: a few broken tests due to iana.org requiring a user agent in its requests rewrite: - introduced a new JSWorkerRewriter class in order to support rewriting via wombat workers in the context of all supported worker variants via - ensured rewriter app correctly sets the static prefix wombat: - add wombat as submodule!
This commit is contained in:
parent
77f8bb6476
commit
22b4297fc5
3
.gitmodules
vendored
Normal file
3
.gitmodules
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
[submodule "wombat"]
|
||||
path = wombat
|
||||
url = https://github.com/webrecorder/wombat
|
@ -1,4 +0,0 @@
|
||||
NODE_BIN_DIR=../node_modules/.bin
|
||||
|
||||
test:
|
||||
$(NODE_BIN_DIR)/karma start --single-run
|
@ -1,9 +0,0 @@
|
||||
<html>
|
||||
<head><meta charset="UTF-8"></head>
|
||||
<body>
|
||||
<!-- This is a dummy page used in
|
||||
tests of Wombat's live-rewriting
|
||||
functionality.
|
||||
!-->
|
||||
</body>
|
||||
</html>
|
@ -1,108 +0,0 @@
|
||||
var sauceLabsConfig = {
|
||||
testName: 'pywb Client Tests',
|
||||
};
|
||||
|
||||
// see https://github.com/karma-runner/karma-sauce-launcher/issues/73
|
||||
if (process.env.TRAVIS_JOB_NUMBER) {
|
||||
sauceLabsConfig.startConnect = false;
|
||||
sauceLabsConfig.tunnelIdentifier = process.env.TRAVIS_JOB_NUMBER;
|
||||
}
|
||||
|
||||
var WOMBAT_JS_PATH = 'pywb/static/wombat.js';
|
||||
|
||||
var sauceLaunchers = {
|
||||
sl_chrome: {
|
||||
base: 'SauceLabs',
|
||||
browserName: 'chrome',
|
||||
},
|
||||
|
||||
sl_firefox: {
|
||||
base: 'SauceLabs',
|
||||
browserName: 'firefox',
|
||||
},
|
||||
|
||||
sl_safari: {
|
||||
base: 'SauceLabs',
|
||||
browserName: 'safari',
|
||||
platform: 'OS X 10.11',
|
||||
version: '9.0',
|
||||
},
|
||||
|
||||
sl_edge: {
|
||||
base: 'SauceLabs',
|
||||
browserName: 'MicrosoftEdge',
|
||||
},
|
||||
};
|
||||
|
||||
var localLaunchers = {
|
||||
localFirefox: {
|
||||
base: 'Firefox',
|
||||
},
|
||||
};
|
||||
|
||||
var customLaunchers = {};
|
||||
|
||||
if (process.env['SAUCE_USERNAME'] && process.env['SAUCE_ACCESS_KEY']) {
|
||||
customLaunchers = sauceLaunchers;
|
||||
} else {
|
||||
console.error('Sauce Labs account details not set, ' +
|
||||
'Karma tests will be run only against local browsers.' +
|
||||
'Set SAUCE_USERNAME and SAUCE_ACCESS_KEY environment variables to ' +
|
||||
'run tests against Sauce Labs browsers');
|
||||
customLaunchers = localLaunchers;
|
||||
}
|
||||
|
||||
module.exports = function(config) {
|
||||
config.set({
|
||||
basePath: '../',
|
||||
|
||||
frameworks: ['mocha', 'chai'],
|
||||
|
||||
files: [
|
||||
{
|
||||
pattern: WOMBAT_JS_PATH,
|
||||
watched: true,
|
||||
included: false,
|
||||
served: true,
|
||||
},
|
||||
{
|
||||
pattern: 'karma-tests/dummy.html',
|
||||
included: false,
|
||||
served: true,
|
||||
},
|
||||
'karma-tests/*.spec.js',
|
||||
],
|
||||
|
||||
preprocessors: {},
|
||||
|
||||
reporters: ['progress'],
|
||||
|
||||
port: 9876,
|
||||
|
||||
colors: true,
|
||||
|
||||
logLevel: config.LOG_INFO,
|
||||
|
||||
autoWatch: true,
|
||||
|
||||
sauceLabs: sauceLabsConfig,
|
||||
|
||||
// Set extended timeouts to account for the slowness
|
||||
// in connecting to remote browsers (eg. when using
|
||||
// Sauce Labs)
|
||||
//
|
||||
// See https://oligofren.wordpress.com/2014/05/27/running-karma-tests-on-browserstack/
|
||||
captureTimeout: 3 * 60000,
|
||||
browserNoActivityTimeout: 30 * 1000,
|
||||
browserDisconnectTimeout: 10 * 1000,
|
||||
browserDisconnectTolerance: 1,
|
||||
|
||||
customLaunchers: customLaunchers,
|
||||
|
||||
browsers: Object.keys(customLaunchers),
|
||||
|
||||
singleRun: false,
|
||||
|
||||
concurrency: Infinity
|
||||
})
|
||||
};
|
@ -1,225 +0,0 @@
|
||||
var DEFAULT_TIMEOUT = 20000;
|
||||
|
||||
// creates a new document in an <iframe> and runs
|
||||
// a WombatJS test case in it.
|
||||
//
|
||||
// A new <iframe> is used for each test so that each
|
||||
// case is run with fresh Document and Window objects,
|
||||
// since Wombat monkey-patches many Document and Window
|
||||
// functions
|
||||
//
|
||||
function runWombatTest(testCase, done) {
|
||||
// create an <iframe>
|
||||
var testFrame = document.createElement('iframe');
|
||||
testFrame.src = '/base/karma-tests/dummy.html';
|
||||
document.body.appendChild(testFrame);
|
||||
|
||||
testFrame.contentWindow.addEventListener('load', function () {
|
||||
var testDocument = testFrame.contentDocument;
|
||||
|
||||
function runFunctionInIFrame(func) {
|
||||
testFrame.contentWindow.eval('(' + func.toString() + ')()');
|
||||
}
|
||||
|
||||
// expose an error reporting function to the <iframe>
|
||||
window.reportError = function(ex) {
|
||||
done(new Error(ex));
|
||||
};
|
||||
|
||||
// expose utility methods for assertion testing in tests.
|
||||
// (We used to expose chai asserts here but Karma's default
|
||||
// error reporter replaces URLs in exception messages with
|
||||
// the corresponding file paths, which is unhelpful for us
|
||||
// since assert.equal() will often be called with URLs in our tests)
|
||||
window.assert = {
|
||||
equal: function (a, b) {
|
||||
if (a !== b) {
|
||||
console.error('Mismatch between', a, 'and', b);
|
||||
throw new Error('AssertionError');
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
runFunctionInIFrame(function () {
|
||||
// re-assign the iframe's console object to the parent window's
|
||||
// console so that messages are intercepted by Karma
|
||||
// and output to wherever it is configured to send
|
||||
// console logs (typically stdout)
|
||||
console = window.parent.console;
|
||||
window.onerror = function (message, url, line, col, error) {
|
||||
if (error) {
|
||||
console.log(error.stack);
|
||||
}
|
||||
reportError(new Error(message));
|
||||
};
|
||||
|
||||
// expose chai's assertion testing API to the test script
|
||||
window.assert = window.parent.assert;
|
||||
window.reportError = window.parent.reportError;
|
||||
|
||||
// helpers which check whether DOM property overrides are supported
|
||||
// in the current browser
|
||||
window.domTests = {
|
||||
areDOMPropertiesConfigurable: function () {
|
||||
var descriptor = Object.getOwnPropertyDescriptor(Node.prototype, 'baseURI');
|
||||
if (descriptor && !descriptor.configurable) {
|
||||
return false;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
};
|
||||
});
|
||||
|
||||
try {
|
||||
runFunctionInIFrame(testCase.initScript);
|
||||
} catch (e) {
|
||||
throw new Error('Configuring Wombat failed: ' + e.toString());
|
||||
}
|
||||
|
||||
try {
|
||||
testFrame.contentWindow.eval(testCase.wombatScript);
|
||||
runFunctionInIFrame(function () {
|
||||
new window._WBWombat(window, wbinfo);
|
||||
});
|
||||
} catch (e) {
|
||||
console.error(e.stack);
|
||||
throw new Error('Initializing WombatJS failed: ' + e.toString());
|
||||
}
|
||||
|
||||
if (testCase.html) {
|
||||
testDocument.body.innerHTML = testCase.html;
|
||||
}
|
||||
|
||||
if (testCase.testScript) {
|
||||
try {
|
||||
runFunctionInIFrame(testCase.testScript);
|
||||
} catch (e) {
|
||||
throw new Error('Test script failed: ' + e.toString());
|
||||
}
|
||||
}
|
||||
|
||||
testFrame.remove();
|
||||
done();
|
||||
});
|
||||
}
|
||||
|
||||
describe('WombatJS', function () {
|
||||
this.timeout(DEFAULT_TIMEOUT);
|
||||
|
||||
var wombatScript;
|
||||
|
||||
before(function (done) {
|
||||
// load the source of the WombatJS content
|
||||
// rewriting script
|
||||
var req = new XMLHttpRequest();
|
||||
req.open('GET', '/base/pywb/static/wombat.js');
|
||||
req.onload = function () {
|
||||
wombatScript = req.responseText;
|
||||
done();
|
||||
};
|
||||
req.send();
|
||||
});
|
||||
|
||||
it('should load', function (done) {
|
||||
runWombatTest({
|
||||
initScript: function () {
|
||||
wbinfo = {
|
||||
wombat_opts: {},
|
||||
wombat_ts: '',
|
||||
is_live: false,
|
||||
top_url: ''
|
||||
};
|
||||
},
|
||||
wombatScript: wombatScript,
|
||||
}, done);
|
||||
});
|
||||
|
||||
describe('anchor rewriting', function () {
|
||||
var config;
|
||||
beforeEach(function () {
|
||||
config = {
|
||||
initScript: function () {
|
||||
wbinfo = {
|
||||
wombat_opts: {},
|
||||
wombat_scheme: 'http',
|
||||
prefix: window.location.origin,
|
||||
wombat_ts: '',
|
||||
is_live: false,
|
||||
top_url: ''
|
||||
};
|
||||
},
|
||||
wombatScript: wombatScript,
|
||||
html: '<a href="foobar.html" id="link">A link</a>',
|
||||
};
|
||||
});
|
||||
|
||||
it('should rewrite links in dynamically injected <a> tags', function (done) {
|
||||
config.testScript = function () {
|
||||
if (domTests.areDOMPropertiesConfigurable()) {
|
||||
var link = document.getElementById('link');
|
||||
assert.equal(link.href, 'http:///base/karma-tests/foobar.html');
|
||||
}
|
||||
};
|
||||
|
||||
runWombatTest(config, done);
|
||||
});
|
||||
|
||||
it('toString() should return the rewritten URL', function (done) {
|
||||
config.testScript = function () {
|
||||
if (domTests.areDOMPropertiesConfigurable()) {
|
||||
var link = document.getElementById('link');
|
||||
assert.equal(link.href, link.toString());
|
||||
}
|
||||
};
|
||||
runWombatTest(config, done);
|
||||
});
|
||||
});
|
||||
|
||||
describe('base URL overrides', function () {
|
||||
it('document.baseURI should return the original URL', function (done) {
|
||||
runWombatTest({
|
||||
initScript: function () {
|
||||
wbinfo = {
|
||||
wombat_opts: {},
|
||||
prefix: window.location.origin,
|
||||
wombat_ts: '',
|
||||
wombat_scheme: 'http',
|
||||
is_live: false,
|
||||
top_url: ''
|
||||
};
|
||||
},
|
||||
wombatScript: wombatScript,
|
||||
testScript: function () {
|
||||
var baseURI = document.baseURI;
|
||||
if (typeof baseURI !== 'string') {
|
||||
throw new Error('baseURI is not a string');
|
||||
}
|
||||
if (domTests.areDOMPropertiesConfigurable()) {
|
||||
assert.equal(baseURI, 'http:///base/karma-tests/dummy.html');
|
||||
}
|
||||
},
|
||||
}, done);
|
||||
});
|
||||
|
||||
it('should allow base.href to be assigned', function (done) {
|
||||
runWombatTest({
|
||||
initScript: function () {
|
||||
wbinfo = {
|
||||
wombat_opts: {},
|
||||
wombat_scheme: 'http',
|
||||
is_live: false,
|
||||
top_url: ''
|
||||
};
|
||||
},
|
||||
wombatScript: wombatScript,
|
||||
testScript: function () {
|
||||
'use strict';
|
||||
var baseElement = document.createElement('base');
|
||||
baseElement.href = 'http://foobar.com/base';
|
||||
assert.equal(baseElement.href, 'http://foobar.com/base');
|
||||
},
|
||||
}, done);
|
||||
});
|
||||
});
|
||||
});
|
@ -238,7 +238,8 @@ class RewriterApp(object):
|
||||
host_prefix = self.get_host_prefix(environ)
|
||||
rel_prefix = self.get_rel_prefix(environ)
|
||||
full_prefix = host_prefix + rel_prefix
|
||||
|
||||
pywb_static_prefix = environ.get('pywb.host_prefix', '') + environ.get('pywb.app_prefix', '') + environ.get(
|
||||
'pywb.static_prefix', '/static/')
|
||||
is_proxy = ('wsgiprox.proxy_host' in environ)
|
||||
|
||||
response = self.handle_custom_response(environ, wb_url,
|
||||
@ -257,7 +258,8 @@ class RewriterApp(object):
|
||||
urlrewriter = UrlRewriter(wb_url,
|
||||
prefix=full_prefix,
|
||||
full_prefix=full_prefix,
|
||||
rel_prefix=rel_prefix)
|
||||
rel_prefix=rel_prefix,
|
||||
pywb_static_prefix=pywb_static_prefix)
|
||||
|
||||
framed_replay = self.framed_replay
|
||||
|
||||
|
@ -15,6 +15,8 @@ from pywb.utils.io import StreamIter, BUFF_SIZE
|
||||
|
||||
from pywb.utils.loaders import load_yaml_config, load_py_name
|
||||
|
||||
WORKER_MODS = {"wkr_", "sw_"} # type: Set[str]
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class BaseContentRewriter(object):
|
||||
@ -423,8 +425,8 @@ class RewriteInfo(object):
|
||||
def _resolve_text_type(self, text_type):
|
||||
mod = self.url_rewriter.wburl.mod
|
||||
|
||||
if mod == 'sw_' or mod == 'wkr_':
|
||||
return None
|
||||
if mod in WORKER_MODS:
|
||||
return 'js-worker'
|
||||
|
||||
if text_type == 'css' and mod == 'js_':
|
||||
text_type = 'css'
|
||||
@ -495,7 +497,7 @@ class RewriteInfo(object):
|
||||
return True
|
||||
|
||||
def is_url_rw(self):
|
||||
if self.url_rewriter.wburl.mod in ('id_', 'bn_', 'sw_', 'wkr_'):
|
||||
if self.url_rewriter.wburl.mod in ('id_', 'bn_', 'wkrf_'):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
@ -15,6 +15,8 @@ from pywb.rewrite.rewrite_dash import RewriteDASH
|
||||
from pywb.rewrite.rewrite_hls import RewriteHLS
|
||||
from pywb.rewrite.rewrite_amf import RewriteAMF
|
||||
|
||||
from pywb.rewrite.rewrite_js_workers import JSWorkerRewriter
|
||||
|
||||
from pywb import DEFAULT_RULES_FILE
|
||||
|
||||
import copy
|
||||
@ -34,6 +36,7 @@ class DefaultRewriter(BaseContentRewriter):
|
||||
|
||||
'js': JSLocationOnlyRewriter,
|
||||
'js-proxy': JSNoneRewriter,
|
||||
'js-worker': JSWorkerRewriter,
|
||||
|
||||
'json': JSONPRewriter,
|
||||
|
||||
|
@ -58,7 +58,7 @@ class HTMLRewriterMixin(StreamingRewriter):
|
||||
'embed': {'src': 'oe_'},
|
||||
'head': {'': defmod}, # for head rewriting
|
||||
'iframe': {'src': 'if_'},
|
||||
'image': {'src': 'im_', 'xlink:href': 'im_'},
|
||||
'image': {'src': 'im_', 'xlink:href': 'im_', 'href': 'im_'},
|
||||
'img': {'src': 'im_',
|
||||
'srcset': 'im_'},
|
||||
'ins': {'cite': defmod},
|
||||
@ -74,7 +74,7 @@ class HTMLRewriterMixin(StreamingRewriter):
|
||||
'q': {'cite': defmod},
|
||||
'ref': {'href': 'oe_'},
|
||||
'script': {'src': 'js_', 'xlink:href': 'js_'}, # covers both HTML and SVG script tags
|
||||
'source': {'src': 'oe_'},
|
||||
'source': {'src': 'oe_', 'srcset': 'oe_'},
|
||||
'video': {'src': 'oe_',
|
||||
'poster': 'im_'},
|
||||
}
|
||||
|
@ -63,18 +63,28 @@ class RxRules(object):
|
||||
class JSWombatProxyRules(RxRules):
|
||||
def __init__(self):
|
||||
local_init_func = '\nvar {0} = function(name) {{\
|
||||
return (self._wb_wombat && self._wb_wombat.local_init &&\
|
||||
return (self._wb_wombat && self._wb_wombat.local_init && \
|
||||
self._wb_wombat.local_init(name)) || self[name]; }};\n\
|
||||
if (!self.__WB_pmw) {{ self.__WB_pmw = function(obj) {{ return obj; }} }}\n\
|
||||
{{\n'
|
||||
local_check_this_fn = 'var {0} = function (thisObj) {{ \
|
||||
if (thisObj && thisObj._WB_wombat_obj_proxy) return thisObj._WB_wombat_obj_proxy; return thisObj; }};'
|
||||
|
||||
local_init_func_name = '_____WB$wombat$assign$function_____'
|
||||
|
||||
local_var_line = 'let {0} = {1}("{0}");'
|
||||
|
||||
this_rw = '(this && this._WB_wombat_obj_proxy || this)'
|
||||
local_check_this_func_name = '_____WB$wombat$check$this$function_____'
|
||||
|
||||
check_loc = '(self.__WB_check_loc && self.__WB_check_loc(location) || {}).href = '
|
||||
# we must use a function to perform the this check because most minfiers reduce the number of statements
|
||||
# by turning everything into one or more expressions. Our previous rewrite was an logical expression,
|
||||
# (this && this._WB_wombat_obj_proxy || this), that would cause the outer expression to be invalid when
|
||||
# it was used as the LHS of certain expressions.
|
||||
# e.g. assignment expressions containing non parenthesized logical expression.
|
||||
# By using a function the expression injected is an call expression that plays nice in those cases
|
||||
this_rw = '_____WB$wombat$check$this$function_____(this)'
|
||||
|
||||
check_loc = '((self.__WB_check_loc && self.__WB_check_loc(location)) || {}).href = '
|
||||
|
||||
self.local_objs = [
|
||||
'window',
|
||||
@ -84,8 +94,8 @@ if (!self.__WB_pmw) {{ self.__WB_pmw = function(obj) {{ return obj; }} }}\n\
|
||||
'top',
|
||||
'parent',
|
||||
'frames',
|
||||
'opener']
|
||||
|
||||
'opener'
|
||||
]
|
||||
|
||||
local_declares = '\n'.join([local_var_line.format(obj, local_init_func_name) for obj in self.local_objs])
|
||||
|
||||
@ -104,7 +114,8 @@ if (!self.__WB_pmw) {{ self.__WB_pmw = function(obj) {{ return obj; }} }}\n\
|
||||
|
||||
super(JSWombatProxyRules, self).__init__(rules)
|
||||
|
||||
self.first_buff = local_init_func.format(local_init_func_name) + local_declares
|
||||
self.first_buff = local_check_this_fn.format(local_check_this_func_name) + local_init_func.format(
|
||||
local_init_func_name) + local_declares + '\n\n'
|
||||
|
||||
self.last_buff = '\n\n}'
|
||||
|
||||
|
30
pywb/rewrite/rewrite_js_workers.py
Normal file
30
pywb/rewrite/rewrite_js_workers.py
Normal file
@ -0,0 +1,30 @@
|
||||
from pywb.rewrite.content_rewriter import StreamingRewriter, WORKER_MODS
|
||||
|
||||
__all__ = ["JSWorkerRewriter"]
|
||||
|
||||
INJECT = "(function() { self.importScripts('%s'); new WBWombat(%s); })();"
|
||||
INIT = "{'prefix': '%s', 'prefixMod': '%s/', 'originalURL': '%s'}"
|
||||
|
||||
|
||||
class JSWorkerRewriter(StreamingRewriter):
|
||||
"""A simple rewriter for rewriting web or service workers.
|
||||
The only rewriting that occurs is the injection of the init code
|
||||
for wombatWorkers.js.
|
||||
This allows for all them to operate as expected on the live web.
|
||||
"""
|
||||
|
||||
def __init__(self, url_rewriter, align_to_line=True, first_buff=''):
|
||||
"""Initialize a new JSWorkerRewriter
|
||||
|
||||
:param UrlRewriter url_rewriter: The url rewriter for this rewrite
|
||||
:param bool align_to_line: Should the response stream be aliened to line boundaries
|
||||
:param str first_buff: The first string to be added to the rewrite
|
||||
:rtype: None
|
||||
"""
|
||||
super(JSWorkerRewriter, self).__init__(url_rewriter, align_to_line, first_buff)
|
||||
wb_url = self.url_rewriter.wburl
|
||||
if wb_url.mod in WORKER_MODS:
|
||||
rw_url = self.url_rewriter.pywb_static_prefix + "wombatWorkers.js"
|
||||
prefix = self.url_rewriter.full_prefix
|
||||
init = INIT % (prefix, prefix + 'wkrf_', wb_url.url)
|
||||
self.first_buff = INJECT % (rw_url, init)
|
@ -235,24 +235,22 @@ class TestContentRewriter(object):
|
||||
|
||||
def test_rewrite_sw_add_headers(self):
|
||||
headers = {'Content-Type': 'application/x-javascript'}
|
||||
content = 'function() { location.href = "http://example.com/"; }'
|
||||
content = "function() { location.href = 'http://example.com/'; }"
|
||||
|
||||
headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701sw_')
|
||||
|
||||
assert ('Content-Type', 'application/x-javascript') in headers.headers
|
||||
assert ('Service-Worker-Allowed', 'http://localhost:8080/prefix/201701mp_/http://example.com/') in headers.headers
|
||||
|
||||
exp = 'function() { location.href = "http://example.com/"; }'
|
||||
assert b''.join(gen).decode('utf-8') == exp
|
||||
assert "self.importScripts('wombatWorkers.js');" in b''.join(gen).decode('utf-8')
|
||||
|
||||
def test_rewrite_worker(self):
|
||||
headers = {'Content-Type': 'application/x-javascript'}
|
||||
content = 'importScripts("http://example.com/js.js")'
|
||||
content = "importScripts('http://example.com/js.js')"
|
||||
|
||||
rwheaders, gen, is_rw = self.rewrite_record(headers, content, ts='201701wkr_')
|
||||
|
||||
exp = 'importScripts("http://example.com/js.js")'
|
||||
assert b''.join(gen).decode('utf-8') == exp
|
||||
assert "self.importScripts('wombatWorkers.js');" in b''.join(gen).decode('utf-8')
|
||||
|
||||
def test_banner_only_no_cookie_rewrite(self):
|
||||
headers = {'Set-Cookie': 'foo=bar; Expires=Wed, 13 Jan 2021 22:23:01 GMT; Path=/',
|
||||
|
@ -389,7 +389,7 @@ r"""
|
||||
|
||||
# parse attr with js proxy, rewrite location assignment
|
||||
>>> parse('<html><a href="javascript:location=\'foo.html\'"></a></html>', js_proxy=True)
|
||||
<html><a href="javascript:{ location=(self.__WB_check_loc && self.__WB_check_loc(location) || {}).href = 'foo.html' }"></a></html>
|
||||
<html><a href="javascript:{ location=((self.__WB_check_loc && self.__WB_check_loc(location)) || {}).href = 'foo.html' }"></a></html>
|
||||
|
||||
# parse attr with js proxy, assigning to location.href, no location assignment rewrite needed
|
||||
>>> parse('<html><a href="javascript:location.href=\'foo.html\'"></a></html>', js_proxy=True)
|
||||
|
@ -131,49 +131,49 @@ r"""
|
||||
#=================================================================
|
||||
|
||||
>>> _test_js_obj_proxy('var foo = this; location = bar')
|
||||
'var foo = (this && this._WB_wombat_obj_proxy || this); location = (self.__WB_check_loc && self.__WB_check_loc(location) || {}).href = bar'
|
||||
'var foo = _____WB$wombat$check$this$function_____(this); location = ((self.__WB_check_loc && self.__WB_check_loc(location)) || {}).href = bar'
|
||||
|
||||
>>> _test_js_obj_proxy('var that = this\n location = bar')
|
||||
'var that = (this && this._WB_wombat_obj_proxy || this)\n location = (self.__WB_check_loc && self.__WB_check_loc(location) || {}).href = bar'
|
||||
'var that = _____WB$wombat$check$this$function_____(this)\n location = ((self.__WB_check_loc && self.__WB_check_loc(location)) || {}).href = bar'
|
||||
|
||||
>>> _test_js_obj_proxy('location = "xyz"')
|
||||
'location = (self.__WB_check_loc && self.__WB_check_loc(location) || {}).href = "xyz"'
|
||||
'location = ((self.__WB_check_loc && self.__WB_check_loc(location)) || {}).href = "xyz"'
|
||||
|
||||
>>> _test_js_obj_proxy('var foo = this.location')
|
||||
'var foo = (this && this._WB_wombat_obj_proxy || this).location'
|
||||
'var foo = _____WB$wombat$check$this$function_____(this).location'
|
||||
|
||||
>>> _test_js_obj_proxy('A = B\nthis.location = "foo"')
|
||||
'A = B\n;(this && this._WB_wombat_obj_proxy || this).location = "foo"'
|
||||
'A = B\n;_____WB$wombat$check$this$function_____(this).location = "foo"'
|
||||
|
||||
>>> _test_js_obj_proxy('var foo = this.location2')
|
||||
'var foo = this.location2'
|
||||
|
||||
>>> _test_js_obj_proxy('func(Function("return this"));')
|
||||
'func(Function("return (this && this._WB_wombat_obj_proxy || this)"));'
|
||||
'func(Function("return _____WB$wombat$check$this$function_____(this)"));'
|
||||
|
||||
>>> _test_js_obj_proxy('A.call(function() { return this });')
|
||||
'A.call(function() { return (this && this._WB_wombat_obj_proxy || this) });'
|
||||
'A.call(function() { return _____WB$wombat$check$this$function_____(this) });'
|
||||
|
||||
>>> _test_js_obj_proxy('this.document.location = foo')
|
||||
'(this && this._WB_wombat_obj_proxy || this).document.location = foo'
|
||||
'_____WB$wombat$check$this$function_____(this).document.location = foo'
|
||||
|
||||
>>> _test_js_obj_proxy('if (that != this) { ... }')
|
||||
'if (that != (this && this._WB_wombat_obj_proxy || this)) { ... }'
|
||||
'if (that != _____WB$wombat$check$this$function_____(this)) { ... }'
|
||||
|
||||
>>> _test_js_obj_proxy('function(){...} (this)')
|
||||
'function(){...} ((this && this._WB_wombat_obj_proxy || this))'
|
||||
'function(){...} (_____WB$wombat$check$this$function_____(this))'
|
||||
|
||||
>>> _test_js_obj_proxy('function(){...} ) (this); foo(this)')
|
||||
'function(){...} ) ((this && this._WB_wombat_obj_proxy || this)); foo(this)'
|
||||
'function(){...} ) (_____WB$wombat$check$this$function_____(this)); foo(this)'
|
||||
|
||||
>>> _test_js_obj_proxy('var foo = that || this ;')
|
||||
'var foo = that || (this && this._WB_wombat_obj_proxy || this) ;'
|
||||
'var foo = that || _____WB$wombat$check$this$function_____(this) ;'
|
||||
|
||||
>>> _test_js_obj_proxy('a||this||that')
|
||||
'a||(this && this._WB_wombat_obj_proxy || this)||that'
|
||||
'a||_____WB$wombat$check$this$function_____(this)||that'
|
||||
|
||||
>>> _test_js_obj_proxy('a||this)')
|
||||
'a||(this && this._WB_wombat_obj_proxy || this))'
|
||||
'a||_____WB$wombat$check$this$function_____(this))'
|
||||
|
||||
# not rewritten
|
||||
>>> _test_js_obj_proxy('var window = this$')
|
||||
@ -207,7 +207,7 @@ r"""
|
||||
'this. alocation = http://example.com/'
|
||||
|
||||
>>> _test_js_obj_proxy(r'this. location = http://example.com/')
|
||||
'this. location = (self.__WB_check_loc && self.__WB_check_loc(location) || {}).href = http://example.com/'
|
||||
'this. location = ((self.__WB_check_loc && self.__WB_check_loc(location)) || {}).href = http://example.com/'
|
||||
|
||||
|
||||
|
||||
|
@ -23,7 +23,7 @@ class UrlRewriter(object):
|
||||
REL_PATH = '/'
|
||||
|
||||
def __init__(self, wburl, prefix='', full_prefix=None, rel_prefix=None,
|
||||
root_path=None, cookie_scope=None, rewrite_opts=None):
|
||||
root_path=None, cookie_scope=None, rewrite_opts=None, pywb_static_prefix=None):
|
||||
self.wburl = wburl if isinstance(wburl, WbUrl) else WbUrl(wburl)
|
||||
self.prefix = prefix
|
||||
self.full_prefix = full_prefix or prefix
|
||||
@ -36,10 +36,22 @@ class UrlRewriter(object):
|
||||
self.prefix_abs = self.prefix and self.prefix.startswith(self.PROTOCOLS)
|
||||
self.cookie_scope = cookie_scope
|
||||
self.rewrite_opts = rewrite_opts or {}
|
||||
self._pywb_static_prefix = pywb_static_prefix
|
||||
|
||||
if self.rewrite_opts.get('punycode_links'):
|
||||
self.wburl._do_percent_encode = False
|
||||
|
||||
@property
|
||||
def pywb_static_prefix(self):
|
||||
"""Returns the static path URL
|
||||
:rtype: str
|
||||
"""
|
||||
if self._pywb_static_prefix is None:
|
||||
return ''
|
||||
if self._pywb_static_prefix.startswith(self.PROTOCOLS):
|
||||
return self._pywb_static_prefix
|
||||
return self.urljoin(self.full_prefix, self._pywb_static_prefix)
|
||||
|
||||
def rewrite(self, url, mod=None, force_abs=False):
|
||||
# if special protocol, no rewriting at all
|
||||
if url.startswith(self.NO_REWRITE_URI_PREFIX):
|
||||
|
@ -16,25 +16,25 @@ function noop() {}
|
||||
|
||||
if (typeof self.Promise === 'undefined') {
|
||||
// not kewl we must polyfill Promise
|
||||
self.Promise = function (executor) {
|
||||
self.Promise = function(executor) {
|
||||
executor(noop, noop);
|
||||
};
|
||||
self.Promise.prototype.then = function (cb) {
|
||||
self.Promise.prototype.then = function(cb) {
|
||||
if (cb) cb();
|
||||
return this;
|
||||
};
|
||||
self.Promise.prototype.catch = function () {
|
||||
self.Promise.prototype.catch = function() {
|
||||
return this;
|
||||
};
|
||||
self.Promise.all = function (values) {
|
||||
self.Promise.all = function(values) {
|
||||
return new Promise(noop);
|
||||
};
|
||||
}
|
||||
|
||||
if (typeof self.fetch === 'undefined') {
|
||||
// not kewl we must polyfill fetch.
|
||||
self.fetch = function (url) {
|
||||
return new Promise(function (resolve) {
|
||||
self.fetch = function(url) {
|
||||
return new Promise(function(resolve) {
|
||||
var xhr = new XMLHttpRequest();
|
||||
xhr.open('GET', url);
|
||||
xhr.send();
|
||||
@ -43,7 +43,7 @@ if (typeof self.fetch === 'undefined') {
|
||||
};
|
||||
}
|
||||
|
||||
self.onmessage = function (event) {
|
||||
self.onmessage = function(event) {
|
||||
var data = event.data;
|
||||
switch (data.type) {
|
||||
case 'values':
|
||||
@ -77,18 +77,18 @@ function AutoFetcher(init) {
|
||||
this.avFetchDone = this.avFetchDone.bind(this);
|
||||
}
|
||||
|
||||
AutoFetcher.prototype.delay = function () {
|
||||
AutoFetcher.prototype.delay = function() {
|
||||
// 2 second delay seem reasonable
|
||||
return new Promise(function (resolve, reject) {
|
||||
return new Promise(function(resolve, reject) {
|
||||
setTimeout(resolve, 2000);
|
||||
});
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.imgFetchDone = function () {
|
||||
AutoFetcher.prototype.imgFetchDone = function() {
|
||||
if (this.queue.length > 0) {
|
||||
// we have a Q of some length drain it
|
||||
var autofetcher = this;
|
||||
this.delay().then(function () {
|
||||
this.delay().then(function() {
|
||||
autofetcher.queuing = false;
|
||||
autofetcher.fetchImgs();
|
||||
});
|
||||
@ -97,11 +97,11 @@ AutoFetcher.prototype.imgFetchDone = function () {
|
||||
}
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.avFetchDone = function () {
|
||||
AutoFetcher.prototype.avFetchDone = function() {
|
||||
if (this.avQueue.length > 0) {
|
||||
// we have a Q of some length drain it
|
||||
var autofetcher = this;
|
||||
this.delay().then(function () {
|
||||
this.delay().then(function() {
|
||||
autofetcher.queuingAV = false;
|
||||
autofetcher.fetchAV();
|
||||
});
|
||||
@ -110,7 +110,7 @@ AutoFetcher.prototype.avFetchDone = function () {
|
||||
}
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.fetchAV = function () {
|
||||
AutoFetcher.prototype.fetchAV = function() {
|
||||
if (this.queuingAV || this.avQueue.length === 0) {
|
||||
return;
|
||||
}
|
||||
@ -120,12 +120,15 @@ AutoFetcher.prototype.fetchAV = function () {
|
||||
// we limit how many we fetch at a time drastically
|
||||
this.queuingAV = true;
|
||||
var runningFetchers = [];
|
||||
while (this.avQueue.length > 0 && runningFetchers.length <= DefaultNumAvFetches) {
|
||||
runningFetchers.push(fetch(this.avQueue.shift()).catch(noop))
|
||||
while (
|
||||
this.avQueue.length > 0 &&
|
||||
runningFetchers.length <= DefaultNumAvFetches
|
||||
) {
|
||||
runningFetchers.push(fetch(this.avQueue.shift()).catch(noop));
|
||||
}
|
||||
if (this.avQueue.length <= FullAVQDrainLen) {
|
||||
while (this.avQueue.length > 0) {
|
||||
runningFetchers.push(fetch(this.avQueue.shift()).catch(noop))
|
||||
runningFetchers.push(fetch(this.avQueue.shift()).catch(noop));
|
||||
}
|
||||
}
|
||||
Promise.all(runningFetchers)
|
||||
@ -133,7 +136,7 @@ AutoFetcher.prototype.fetchAV = function () {
|
||||
.catch(this.avFetchDone);
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.fetchImgs = function () {
|
||||
AutoFetcher.prototype.fetchImgs = function() {
|
||||
if (this.queuing || this.queue.length === 0) {
|
||||
return;
|
||||
}
|
||||
@ -142,12 +145,15 @@ AutoFetcher.prototype.fetchImgs = function () {
|
||||
// we add them to the current batch
|
||||
this.queuing = true;
|
||||
var runningFetchers = [];
|
||||
while (this.queue.length > 0 && runningFetchers.length <= DefaultNumImFetches) {
|
||||
runningFetchers.push(fetch(this.queue.shift()).catch(noop))
|
||||
while (
|
||||
this.queue.length > 0 &&
|
||||
runningFetchers.length <= DefaultNumImFetches
|
||||
) {
|
||||
runningFetchers.push(fetch(this.queue.shift()).catch(noop));
|
||||
}
|
||||
if (this.queue.length <= FullImgQDrainLen) {
|
||||
while (this.queue.length > 0) {
|
||||
runningFetchers.push(fetch(this.queue.shift()).catch(noop))
|
||||
runningFetchers.push(fetch(this.queue.shift()).catch(noop));
|
||||
}
|
||||
}
|
||||
Promise.all(runningFetchers)
|
||||
@ -155,7 +161,7 @@ AutoFetcher.prototype.fetchImgs = function () {
|
||||
.catch(this.imgFetchDone);
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.queueNonAVURL = function (url) {
|
||||
AutoFetcher.prototype.queueNonAVURL = function(url) {
|
||||
// ensure we do not request data urls
|
||||
if (url.indexOf(DataURLPrefix) === 0) return;
|
||||
// check to see if we have seen this url before in order
|
||||
@ -165,7 +171,7 @@ AutoFetcher.prototype.queueNonAVURL = function (url) {
|
||||
this.queue.push(url);
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.queueAVURL = function (url) {
|
||||
AutoFetcher.prototype.queueAVURL = function(url) {
|
||||
// ensure we do not request data urls
|
||||
if (url.indexOf(DataURLPrefix) === 0) return;
|
||||
// check to see if we have seen this url before in order
|
||||
@ -175,7 +181,7 @@ AutoFetcher.prototype.queueAVURL = function (url) {
|
||||
this.avQueue.push(url);
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.maybeResolveURL = function (url, base) {
|
||||
AutoFetcher.prototype.maybeResolveURL = function(url, base) {
|
||||
// given a url and base url returns a resolved full URL or
|
||||
// null if resolution was unsuccessful
|
||||
try {
|
||||
@ -186,7 +192,7 @@ AutoFetcher.prototype.maybeResolveURL = function (url, base) {
|
||||
}
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.maybeFixUpRelSchemelessPrefix = function (url) {
|
||||
AutoFetcher.prototype.maybeFixUpRelSchemelessPrefix = function(url) {
|
||||
// attempt to ensure rewritten relative or schemeless URLs become full URLS!
|
||||
// otherwise returns null if this did not happen
|
||||
if (url.indexOf(this.relative) === 0) {
|
||||
@ -198,7 +204,7 @@ AutoFetcher.prototype.maybeFixUpRelSchemelessPrefix = function (url) {
|
||||
return null;
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.maybeFixUpURL = function (url, resolveOpts) {
|
||||
AutoFetcher.prototype.maybeFixUpURL = function(url, resolveOpts) {
|
||||
// attempt to fix up the url and do our best to ensure we can get dat 200 OK!
|
||||
if (this.rwRe.test(url)) {
|
||||
return url;
|
||||
@ -227,13 +233,20 @@ AutoFetcher.prototype.maybeFixUpURL = function (url, resolveOpts) {
|
||||
return this.prefixMod + '/' + url;
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.urlExtractor = function (match, n1, n2, n3, offset, string) {
|
||||
AutoFetcher.prototype.urlExtractor = function(
|
||||
match,
|
||||
n1,
|
||||
n2,
|
||||
n3,
|
||||
offset,
|
||||
string
|
||||
) {
|
||||
// Same function as style_replacer in wombat.rewrite_style, n2 is our URL
|
||||
this.queueNonAVURL(n2);
|
||||
return n1 + n2 + n3;
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.handleMedia = function (mediaRules) {
|
||||
AutoFetcher.prototype.handleMedia = function(mediaRules) {
|
||||
// this is a broken down rewrite_style
|
||||
if (mediaRules == null || mediaRules.length === 0) return;
|
||||
// var rules = mediaRules.values;
|
||||
@ -244,14 +257,18 @@ AutoFetcher.prototype.handleMedia = function (mediaRules) {
|
||||
}
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.handleSrc = function (srcValues, context) {
|
||||
var resolveOpts = { 'docBaseURI': context.docBaseURI };
|
||||
AutoFetcher.prototype.handleSrc = function(srcValues, context) {
|
||||
var resolveOpts = { docBaseURI: context.docBaseURI };
|
||||
if (srcValues.value) {
|
||||
resolveOpts.mod = srcValues.mod;
|
||||
if (resolveOpts.mod === 1) {
|
||||
return this.queueNonAVURL(this.maybeFixUpURL(srcValues.value.trim(), resolveOpts));
|
||||
return this.queueNonAVURL(
|
||||
this.maybeFixUpURL(srcValues.value.trim(), resolveOpts)
|
||||
);
|
||||
}
|
||||
return this.queueAVURL(this.maybeFixUpURL(srcValues.value.trim(), resolveOpts));
|
||||
return this.queueAVURL(
|
||||
this.maybeFixUpURL(srcValues.value.trim(), resolveOpts)
|
||||
);
|
||||
}
|
||||
var len = srcValues.values.length;
|
||||
for (var i = 0; i < len; i++) {
|
||||
@ -265,7 +282,8 @@ AutoFetcher.prototype.handleSrc = function (srcValues, context) {
|
||||
}
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.extractSrcSetNotPreSplit = function (ssV, resolveOpts) {
|
||||
AutoFetcher.prototype.extractSrcSetNotPreSplit = function(ssV, resolveOpts) {
|
||||
if (!ssV) return;
|
||||
// was from extract from local doc so we need to duplicate work
|
||||
var srcsetValues = ssV.split(srcsetSplit);
|
||||
for (var i = 0; i < srcsetValues.length; i++) {
|
||||
@ -282,7 +300,7 @@ AutoFetcher.prototype.extractSrcSetNotPreSplit = function (ssV, resolveOpts) {
|
||||
}
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.extractSrcset = function (srcsets, context) {
|
||||
AutoFetcher.prototype.extractSrcset = function(srcsets, context) {
|
||||
// was rewrite_srcset and only need to q
|
||||
for (var i = 0; i < srcsets.length; i++) {
|
||||
// grab the URL not width/height key
|
||||
@ -295,8 +313,8 @@ AutoFetcher.prototype.extractSrcset = function (srcsets, context) {
|
||||
}
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.handleSrcset = function (srcset, context) {
|
||||
var resolveOpts = { 'docBaseURI': context.docBaseURI };
|
||||
AutoFetcher.prototype.handleSrcset = function(srcset, context) {
|
||||
var resolveOpts = { docBaseURI: context.docBaseURI };
|
||||
if (srcset.value) {
|
||||
// we have a single value, this srcset came from either
|
||||
// preserveDataSrcset (not presplit) preserveSrcset (presplit)
|
||||
@ -318,8 +336,7 @@ AutoFetcher.prototype.handleSrcset = function (srcset, context) {
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
AutoFetcher.prototype.autoFetch = function (data) {
|
||||
AutoFetcher.prototype.autoFetch = function(data) {
|
||||
// we got a message and now we autofetch!
|
||||
// these calls turn into no ops if they have no work
|
||||
if (data.media) {
|
||||
|
@ -16,26 +16,25 @@ function noop() {}
|
||||
|
||||
if (typeof self.Promise === 'undefined') {
|
||||
// not kewl we must polyfill Promise
|
||||
self.Promise = function (executor) {
|
||||
self.Promise = function(executor) {
|
||||
executor(noop, noop);
|
||||
};
|
||||
self.Promise.prototype.then = function (cb) {
|
||||
self.Promise.prototype.then = function(cb) {
|
||||
if (cb) cb();
|
||||
return this;
|
||||
};
|
||||
self.Promise.prototype.catch = function () {
|
||||
self.Promise.prototype.catch = function() {
|
||||
return this;
|
||||
};
|
||||
self.Promise.all = function (values) {
|
||||
self.Promise.all = function(values) {
|
||||
return new Promise(noop);
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
if (typeof self.fetch === 'undefined') {
|
||||
// not kewl we must polyfill fetch.
|
||||
self.fetch = function (url) {
|
||||
return new Promise(function (resolve) {
|
||||
self.fetch = function(url) {
|
||||
return new Promise(function(resolve) {
|
||||
var xhr = new XMLHttpRequest();
|
||||
xhr.open('GET', url);
|
||||
xhr.send();
|
||||
@ -44,7 +43,7 @@ if (typeof self.fetch === 'undefined') {
|
||||
};
|
||||
}
|
||||
|
||||
self.onmessage = function (event) {
|
||||
self.onmessage = function(event) {
|
||||
var data = event.data;
|
||||
switch (data.type) {
|
||||
case 'values':
|
||||
@ -77,17 +76,17 @@ function AutoFetcher() {
|
||||
this.avFetchDone = this.avFetchDone.bind(this);
|
||||
}
|
||||
|
||||
AutoFetcher.prototype.delay = function () {
|
||||
return new Promise(function (resolve, reject) {
|
||||
AutoFetcher.prototype.delay = function() {
|
||||
return new Promise(function(resolve, reject) {
|
||||
setTimeout(resolve, FetchDelay);
|
||||
});
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.imgFetchDone = function () {
|
||||
AutoFetcher.prototype.imgFetchDone = function() {
|
||||
if (this.queue.length > 0) {
|
||||
// we have a Q of some length drain it
|
||||
var autofetcher = this;
|
||||
this.delay().then(function () {
|
||||
this.delay().then(function() {
|
||||
autofetcher.queuing = false;
|
||||
autofetcher.fetchImgs();
|
||||
});
|
||||
@ -96,11 +95,11 @@ AutoFetcher.prototype.imgFetchDone = function () {
|
||||
}
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.avFetchDone = function () {
|
||||
AutoFetcher.prototype.avFetchDone = function() {
|
||||
if (this.avQueue.length > 0) {
|
||||
// we have a Q of some length drain it
|
||||
var autofetcher = this;
|
||||
this.delay().then(function () {
|
||||
this.delay().then(function() {
|
||||
autofetcher.queuingAV = false;
|
||||
autofetcher.fetchAV();
|
||||
});
|
||||
@ -109,7 +108,7 @@ AutoFetcher.prototype.avFetchDone = function () {
|
||||
}
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.fetchAV = function () {
|
||||
AutoFetcher.prototype.fetchAV = function() {
|
||||
if (this.queuingAV || this.avQueue.length === 0) {
|
||||
return;
|
||||
}
|
||||
@ -119,12 +118,15 @@ AutoFetcher.prototype.fetchAV = function () {
|
||||
// we limit how many we fetch at a time drastically
|
||||
this.queuingAV = true;
|
||||
var runningFetchers = [];
|
||||
while (this.avQueue.length > 0 && runningFetchers.length <= DefaultNumAvFetches) {
|
||||
runningFetchers.push(fetch(this.avQueue.shift()).catch(noop))
|
||||
while (
|
||||
this.avQueue.length > 0 &&
|
||||
runningFetchers.length <= DefaultNumAvFetches
|
||||
) {
|
||||
runningFetchers.push(fetch(this.avQueue.shift()).catch(noop));
|
||||
}
|
||||
if (this.avQueue.length <= FullAVQDrainLen) {
|
||||
while (this.avQueue.length > 0) {
|
||||
runningFetchers.push(fetch(this.avQueue.shift()).catch(noop))
|
||||
runningFetchers.push(fetch(this.avQueue.shift()).catch(noop));
|
||||
}
|
||||
}
|
||||
Promise.all(runningFetchers)
|
||||
@ -132,7 +134,7 @@ AutoFetcher.prototype.fetchAV = function () {
|
||||
.catch(this.avFetchDone);
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.fetchImgs = function () {
|
||||
AutoFetcher.prototype.fetchImgs = function() {
|
||||
if (this.queuing || this.queue.length === 0) {
|
||||
return;
|
||||
}
|
||||
@ -141,12 +143,15 @@ AutoFetcher.prototype.fetchImgs = function () {
|
||||
// we add them to the current batch
|
||||
this.queuing = true;
|
||||
var runningFetchers = [];
|
||||
while (this.queue.length > 0 && runningFetchers.length <= DefaultNumImFetches) {
|
||||
runningFetchers.push(fetch(this.queue.shift()).catch(noop))
|
||||
while (
|
||||
this.queue.length > 0 &&
|
||||
runningFetchers.length <= DefaultNumImFetches
|
||||
) {
|
||||
runningFetchers.push(fetch(this.queue.shift()).catch(noop));
|
||||
}
|
||||
if (this.queue.length <= FullImgQDrainLen) {
|
||||
while (this.queue.length > 0) {
|
||||
runningFetchers.push(fetch(this.queue.shift()).catch(noop))
|
||||
runningFetchers.push(fetch(this.queue.shift()).catch(noop));
|
||||
}
|
||||
}
|
||||
Promise.all(runningFetchers)
|
||||
@ -154,7 +159,7 @@ AutoFetcher.prototype.fetchImgs = function () {
|
||||
.catch(this.imgFetchDone);
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.queueNonAVURL = function (url) {
|
||||
AutoFetcher.prototype.queueNonAVURL = function(url) {
|
||||
// ensure we do not request data urls
|
||||
if (url.indexOf(DataURLPrefix) === 0) return;
|
||||
// check to see if we have seen this url before in order
|
||||
@ -164,7 +169,7 @@ AutoFetcher.prototype.queueNonAVURL = function (url) {
|
||||
this.queue.push(url);
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.queueAVURL = function (url) {
|
||||
AutoFetcher.prototype.queueAVURL = function(url) {
|
||||
// ensure we do not request data urls
|
||||
if (url.indexOf(DataURLPrefix) === 0) return;
|
||||
// check to see if we have seen this url before in order
|
||||
@ -174,13 +179,13 @@ AutoFetcher.prototype.queueAVURL = function (url) {
|
||||
this.avQueue.push(url);
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.safeResolve = function (url, resolver) {
|
||||
AutoFetcher.prototype.safeResolve = function(url, resolver) {
|
||||
// Guard against the exception thrown by the URL constructor if the URL or resolver is bad
|
||||
// if resolver is undefined/null then this function passes url through
|
||||
var resolvedURL = url;
|
||||
if (resolver) {
|
||||
try {
|
||||
resolvedURL = (new URL(url, resolver)).href
|
||||
resolvedURL = new URL(url, resolver).href;
|
||||
} catch (e) {
|
||||
resolvedURL = url;
|
||||
}
|
||||
@ -188,8 +193,14 @@ AutoFetcher.prototype.safeResolve = function (url, resolver) {
|
||||
return resolvedURL;
|
||||
};
|
||||
|
||||
|
||||
AutoFetcher.prototype.urlExtractor = function (match, n1, n2, n3, offset, string) {
|
||||
AutoFetcher.prototype.urlExtractor = function(
|
||||
match,
|
||||
n1,
|
||||
n2,
|
||||
n3,
|
||||
offset,
|
||||
string
|
||||
) {
|
||||
// Same function as style_replacer in wombat.rewrite_style, n2 is our URL
|
||||
// this.currentResolver is set to the URL which the browser would normally
|
||||
// resolve relative urls with (URL of the stylesheet) in an exceptionless manner
|
||||
@ -201,7 +212,7 @@ AutoFetcher.prototype.urlExtractor = function (match, n1, n2, n3, offset, string
|
||||
return n1 + n2 + n3;
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.extractMedia = function (mediaRules) {
|
||||
AutoFetcher.prototype.extractMedia = function(mediaRules) {
|
||||
// this is a broken down rewrite_style
|
||||
if (mediaRules == null) return;
|
||||
for (var i = 0; i < mediaRules.length; i++) {
|
||||
@ -215,7 +226,7 @@ AutoFetcher.prototype.extractMedia = function (mediaRules) {
|
||||
}
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.extractSrcset = function (srcsets) {
|
||||
AutoFetcher.prototype.extractSrcset = function(srcsets) {
|
||||
// preservation worker in proxy mode sends us the value of the srcset attribute of an element
|
||||
// and a URL to correctly resolve relative URLS. Thus we must recreate rewrite_srcset logic here
|
||||
if (srcsets == null) return;
|
||||
@ -224,26 +235,34 @@ AutoFetcher.prototype.extractSrcset = function (srcsets) {
|
||||
for (var i = 0; i < length; i++) {
|
||||
extractedSrcSet = srcsets[i];
|
||||
ssSplit = extractedSrcSet.srcset.split(srcsetSplit);
|
||||
console.log(ssSplit);
|
||||
for (j = 0; j < ssSplit.length; j++) {
|
||||
if (ssSplit[j]) {
|
||||
srcsetValue = ssSplit[j].trim();
|
||||
if (srcsetValue.length > 0) {
|
||||
// resolve the URL in an exceptionless manner (resolvedURL will be undefined if an error occurred)
|
||||
var resolvedURL = this.safeResolve(srcsetValue.split(' ')[0], extractedSrcSet.resolve);
|
||||
var resolvedURL = this.safeResolve(
|
||||
srcsetValue.split(' ')[0],
|
||||
extractedSrcSet.resolve
|
||||
);
|
||||
if (resolvedURL) {
|
||||
if (extractedSrcSet.mod === 'im_') {
|
||||
this.queueNonAVURL(resolvedURL);
|
||||
} else {
|
||||
this.queueAVURL(resolvedURL);
|
||||
}
|
||||
} else {
|
||||
console.log(resolvedURL);
|
||||
}
|
||||
} else {
|
||||
console.log(srcsetValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.extractSrc = function (srcVals) {
|
||||
AutoFetcher.prototype.extractSrc = function(srcVals) {
|
||||
// preservation worker in proxy mode sends us the value of the srcset attribute of an element
|
||||
// and a URL to correctly resolve relative URLS. Thus we must recreate rewrite_srcset logic here
|
||||
if (srcVals == null || srcVals.length === 0) return;
|
||||
@ -262,8 +281,7 @@ AutoFetcher.prototype.extractSrc = function (srcVals) {
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
AutoFetcher.prototype.autofetchMediaSrcset = function (data) {
|
||||
AutoFetcher.prototype.autofetchMediaSrcset = function(data) {
|
||||
// we got a message and now we autofetch!
|
||||
// these calls turn into no ops if they have no work
|
||||
this.extractMedia(data.media);
|
||||
@ -273,7 +291,7 @@ AutoFetcher.prototype.autofetchMediaSrcset = function (data) {
|
||||
this.fetchAV();
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.justFetch = function (data) {
|
||||
AutoFetcher.prototype.justFetch = function(data) {
|
||||
// we got a message containing only urls to be fetched
|
||||
if (data == null || data.values == null) return;
|
||||
for (var i = 0; i < data.values.length; ++i) {
|
||||
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
19
pywb/static/wombatWorkers.js
Normal file
19
pywb/static/wombatWorkers.js
Normal file
File diff suppressed because one or more lines are too long
@ -1,82 +0,0 @@
|
||||
// pywb mini rewriter for injection into web worker scripts
|
||||
|
||||
function WBWombat(info) {
|
||||
function maybeResolveURL(origURL) {
|
||||
try {
|
||||
var resolved = new URL(origURL, info.originalURL);
|
||||
return resolved.href;
|
||||
} catch (e) {
|
||||
return origURL;
|
||||
}
|
||||
}
|
||||
|
||||
function rewrite_url(url) {
|
||||
if (url.indexOf('blob:') === 0) return url;
|
||||
if (url && info.originalURL && url.indexOf('/') === 0) {
|
||||
url = maybeResolveURL(url);
|
||||
}
|
||||
if (info.prefix) {
|
||||
return info.prefix + url;
|
||||
}
|
||||
return url;
|
||||
}
|
||||
|
||||
function init_ajax_rewrite() {
|
||||
var orig = self.XMLHttpRequest.prototype.open;
|
||||
|
||||
function open_rewritten(method, url, async, user, password) {
|
||||
url = rewrite_url(url);
|
||||
|
||||
// defaults to true
|
||||
if (async != false) {
|
||||
async = true;
|
||||
}
|
||||
|
||||
var result = orig.call(this, method, url, async, user, password);
|
||||
|
||||
if (url.indexOf('data:') !== 0) {
|
||||
this.setRequestHeader('X-Pywb-Requested-With', 'XMLHttpRequest');
|
||||
}
|
||||
}
|
||||
|
||||
self.XMLHttpRequest.prototype.open = open_rewritten;
|
||||
}
|
||||
|
||||
init_ajax_rewrite();
|
||||
|
||||
function rewriteArgs(argsObj) {
|
||||
// recreate the original arguments object just with URLs rewritten
|
||||
var newArgObj = new Array(argsObj.length);
|
||||
for (var i = 0; i < newArgObj.length; i++) {
|
||||
var arg = argsObj[i];
|
||||
newArgObj[i] = rewrite_url(arg);
|
||||
}
|
||||
return newArgObj;
|
||||
}
|
||||
|
||||
var origImportScripts = self.importScripts;
|
||||
self.importScripts = function importScripts() {
|
||||
// rewrite the arguments object and call original function via fn.apply
|
||||
var rwArgs = rewriteArgs(arguments);
|
||||
return origImportScripts.apply(this, rwArgs);
|
||||
};
|
||||
|
||||
if (self.fetch != null) {
|
||||
// this fetch is Worker.fetch
|
||||
var orig_fetch = self.fetch;
|
||||
self.fetch = function(input, init_opts) {
|
||||
var inputType = typeof(input);
|
||||
if (inputType === 'string') {
|
||||
input = rewrite_url(input);
|
||||
} else if (inputType === 'object' && input.url) {
|
||||
var new_url = rewrite_url(input.url);
|
||||
if (new_url !== input.url) {
|
||||
input = new Request(new_url, input);
|
||||
}
|
||||
}
|
||||
init_opts = init_opts || {};
|
||||
init_opts['credentials'] = 'include';
|
||||
return orig_fetch.call(this, input, init_opts);
|
||||
};
|
||||
}
|
||||
}
|
@ -1 +1 @@
|
||||
__version__ = '2.2.20190410'
|
||||
__version__ = '2.3.0.dev0'
|
||||
|
@ -23,6 +23,9 @@ def fmod_sl(request):
|
||||
# ============================================================================
|
||||
class BaseConfigTest(BaseTestClass):
|
||||
lint_app = True
|
||||
extra_headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.108 Safari/537.36'
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def get_test_app(cls, config_file, custom_config=None):
|
||||
@ -62,21 +65,34 @@ class BaseConfigTest(BaseTestClass):
|
||||
assert resp.content_length > 0
|
||||
|
||||
def get(self, url, fmod, *args, **kwargs):
|
||||
self.__ensure_headers(kwargs)
|
||||
app = self.testapp if fmod else self.testapp_non_frame
|
||||
return app.get(url.format(fmod), *args, **kwargs)
|
||||
|
||||
def post(self, url, fmod, *args, **kwargs):
|
||||
self.__ensure_headers(kwargs)
|
||||
app = self.testapp if fmod else self.testapp_non_frame
|
||||
return app.post(url.format(fmod), *args, **kwargs)
|
||||
|
||||
def post_json(self, url, fmod, *args, **kwargs):
|
||||
self.__ensure_headers(kwargs)
|
||||
app = self.testapp if fmod else self.testapp_non_frame
|
||||
return app.post_json(url.format(fmod), *args, **kwargs)
|
||||
|
||||
def head(self, url, fmod, *args, **kwargs):
|
||||
self.__ensure_headers(kwargs)
|
||||
app = self.testapp if fmod else self.testapp_non_frame
|
||||
return app.head(url.format(fmod), *args, **kwargs)
|
||||
|
||||
def __ensure_headers(self, kwargs):
|
||||
if 'headers' in kwargs:
|
||||
headers = kwargs.get('headers')
|
||||
else:
|
||||
headers = kwargs['headers'] = {}
|
||||
|
||||
if isinstance(headers, dict) and 'User-Agent' not in headers:
|
||||
headers['User-Agent'] = self.extra_headers['User-Agent']
|
||||
|
||||
|
||||
#=============================================================================
|
||||
class CollsDirMixin(TempDirTests):
|
||||
|
@ -31,7 +31,7 @@ class TestRootColl(BaseConfigTest):
|
||||
def test_root_replay_redir(self, fmod):
|
||||
resp = self.get('/20140128051539{0}/http://www.iana.org/domains/example', fmod)
|
||||
|
||||
assert resp.status_int == 302
|
||||
assert resp.status_int in (301, 302)
|
||||
|
||||
assert resp.headers['Location'] == 'http://localhost:80/20140128051539{0}/https://www.iana.org/domains/reserved'.format(fmod)
|
||||
|
||||
|
1
wombat
Submodule
1
wombat
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 0b0c171a4f0f34114ba3cefd5ba80304515f4ef8
|
Loading…
x
Reference in New Issue
Block a user