mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
Refactor of auto-fetch worker system with support for proxy mode, fixes https://github.com/webrecorder/pywb/issues/371: (#379)
- Split wombat and auto-fetch worker into two files (proxy mode and non-proxy mode) - Renamed preservationWorker to autoFetchWorker in order to better convey what it does - Root config file control over including wombat and auto-fetch worker in proxy or non-proxy mode - Added additional proxy mode + auto-fetch worker only route for fetching the auto-fetch worker code nicely for CORS - templateview: add 'tobool' formatter to more cleanly format python bools to JS 'true'/'false' - proxy options: config and command line: 'use_auto_fetch_worker' and '--proxy-with-auto-fetch' 'use_wombat' and '--proxy-with-wombat' - head_insert.html: only include wombat in proxy mode when use_wombat or use_auto_fetch_worker are set. - wombatProxyMode.js: slimmed down wombat for proxy mode only including auto-fetch support. - more consistent naming: rename 'preserveWorker' and 'autoArchive' to 'auto-fetch' Updated tests: - test_wbrequestresponse.py: added tests covering constructor defaults, _init_derived, options_response, json_response, encode_stream, text_stream - test_auto_colls.py: fixed broken test test_more_custom_templates, reason using ujson now not json so spacing was off - test_proxy.py: updated existing tests to reflect splitting wombat into proxy and non-proxy mode, added tests covering auto-fetch worker specific endpoints in proxy mode removed duplicate addons key in .travis.yml - test_cli.py: updated to properly test the cli with these changes added ultrajon dep to tests_require in setup.py to reflect its usage by wbrequestresponse.py Fully documented: - cli.py - frontendapp.py - templateview.py - wbrequestresponse.py Removed duplicate addons key in .travis.yml Added ultrajson dependency to tests_require in setup.py to reflect its usage by wbrequestresponse.py Fixes #371
This commit is contained in:
parent
71c3eb77de
commit
ec0df7b9ae
@ -9,6 +9,7 @@ os:
|
||||
- linux
|
||||
|
||||
addons:
|
||||
sauce_connect: true
|
||||
apt:
|
||||
packages:
|
||||
# This is required to run new chrome on old trusty
|
||||
@ -18,8 +19,6 @@ env:
|
||||
- WR_TEST=no
|
||||
- WR_TEST=yes
|
||||
|
||||
addons:
|
||||
sauce_connect: true
|
||||
|
||||
cache:
|
||||
directories:
|
||||
|
@ -6,6 +6,7 @@ import logging
|
||||
|
||||
#=============================================================================
|
||||
def warcserver(args=None):
|
||||
"""Utility function for starting pywb's WarcServer"""
|
||||
return WarcServerCli(args=args,
|
||||
default_port=8070,
|
||||
desc='pywb WarcServer').run()
|
||||
@ -13,6 +14,7 @@ def warcserver(args=None):
|
||||
|
||||
#=============================================================================
|
||||
def wayback(args=None):
|
||||
"""Utility function for starting pywb's Wayback Machine implementation"""
|
||||
return WaybackCli(args=args,
|
||||
default_port=8080,
|
||||
desc='pywb Wayback Machine Server').run()
|
||||
@ -20,6 +22,7 @@ def wayback(args=None):
|
||||
|
||||
#=============================================================================
|
||||
def live_rewrite_server(args=None):
|
||||
"""Utility function for starting pywb's Wayback Machine implementation in live mode"""
|
||||
return LiveCli(args=args,
|
||||
default_port=8090,
|
||||
desc='pywb Live Rewrite Proxy Server').run()
|
||||
@ -27,7 +30,15 @@ def live_rewrite_server(args=None):
|
||||
|
||||
#=============================================================================
|
||||
class BaseCli(object):
|
||||
"""Base CLI class that provides the initial arg parser setup,
|
||||
calls load to receive the application to be started and starts the application."""
|
||||
|
||||
def __init__(self, args=None, default_port=8080, desc=''):
|
||||
"""
|
||||
:param args: CLI arguments
|
||||
:param int default_port: The default port that the application will use
|
||||
:param str desc: The description for the application to be started
|
||||
"""
|
||||
parser = ArgumentParser(description=desc)
|
||||
parser.add_argument('-p', '--port', type=int, default=default_port,
|
||||
help='Port to listen on (default %s)' % default_port)
|
||||
@ -47,6 +58,10 @@ class BaseCli(object):
|
||||
help='Enable HTTP/S proxy on specified collection')
|
||||
parser.add_argument('--proxy-record', action='store_true',
|
||||
help='Enable proxy recording into specified collection')
|
||||
parser.add_argument('--proxy-with-wombat', action='store_true',
|
||||
help='Enable partial wombat support in proxy mode')
|
||||
parser.add_argument('--proxy-with-auto-fetch', action='store_true',
|
||||
help='Enable auto-load worker in proxy mode')
|
||||
|
||||
self.desc = desc
|
||||
self.extra_config = {}
|
||||
@ -57,12 +72,14 @@ class BaseCli(object):
|
||||
|
||||
logging.basicConfig(format='%(asctime)s: [%(levelname)s]: %(message)s',
|
||||
level=logging.DEBUG if self.r.debug else logging.INFO)
|
||||
|
||||
if self.r.proxy:
|
||||
self.extra_config['proxy'] = {'coll': self.r.proxy,
|
||||
'recording': self.r.proxy_record}
|
||||
self.extra_config['proxy'] = {
|
||||
'coll': self.r.proxy,
|
||||
'recording': self.r.proxy_record,
|
||||
'use_wombat': self.r.proxy_with_wombat,
|
||||
'use_auto_fetch_worker': self.r.proxy_with_auto_fetch,
|
||||
}
|
||||
self.r.live = True
|
||||
|
||||
self.application = self.load()
|
||||
|
||||
if self.r.profile:
|
||||
@ -70,9 +87,15 @@ class BaseCli(object):
|
||||
self.application = ProfilerMiddleware(self.application)
|
||||
|
||||
def _extend_parser(self, parser): #pragma: no cover
|
||||
"""Method provided for subclasses to add their cli argument on top of the default cli arguments.
|
||||
|
||||
:param ArgumentParser parser: The argument parser instance passed by BaseCli
|
||||
"""
|
||||
pass
|
||||
|
||||
def load(self):
|
||||
"""This method is called to load the application. Subclasses must return a application
|
||||
that can be used by used by pywb.utils.geventserver.GeventServer."""
|
||||
if self.r.live:
|
||||
self.extra_config['collections'] = {'live':
|
||||
{'index': '$live'}}
|
||||
@ -84,10 +107,12 @@ class BaseCli(object):
|
||||
self.extra_config['recorder'] = 'live'
|
||||
|
||||
def run(self):
|
||||
"""Start the application"""
|
||||
self.run_gevent()
|
||||
return self
|
||||
|
||||
def run_gevent(self):
|
||||
"""Created the server that runs the application supplied a subclass"""
|
||||
from pywb.utils.geventserver import GeventServer, RequestURIWSGIHandler
|
||||
logging.info('Starting Gevent Server on ' + str(self.r.port))
|
||||
ge = GeventServer(self.application,
|
||||
@ -99,6 +124,8 @@ class BaseCli(object):
|
||||
|
||||
#=============================================================================
|
||||
class ReplayCli(BaseCli):
|
||||
"""CLI class that adds the cli functionality specific to starting pywb's Wayback Machine implementation"""
|
||||
|
||||
def _extend_parser(self, parser):
|
||||
parser.add_argument('-a', '--autoindex', action='store_true',
|
||||
help='Enable auto-indexing')
|
||||
@ -110,7 +137,6 @@ class ReplayCli(BaseCli):
|
||||
help_dir='Specify root archive dir (default is current working directory)'
|
||||
parser.add_argument('-d', '--directory', help=help_dir)
|
||||
|
||||
|
||||
def load(self):
|
||||
super(ReplayCli, self).load()
|
||||
|
||||
@ -129,6 +155,8 @@ class ReplayCli(BaseCli):
|
||||
|
||||
#=============================================================================
|
||||
class WarcServerCli(BaseCli):
|
||||
"""CLI class for starting a WarcServer"""
|
||||
|
||||
def load(self):
|
||||
from pywb.warcserver.warcserver import WarcServer
|
||||
|
||||
@ -138,6 +166,8 @@ class WarcServerCli(BaseCli):
|
||||
|
||||
#=============================================================================
|
||||
class WaybackCli(ReplayCli):
|
||||
"""CLI class for starting the pywb's implementation of the Wayback Machine"""
|
||||
|
||||
def load(self):
|
||||
from pywb.apps.frontendapp import FrontEndApp
|
||||
|
||||
@ -147,6 +177,8 @@ class WaybackCli(ReplayCli):
|
||||
|
||||
#=============================================================================
|
||||
class LiveCli(BaseCli):
|
||||
"""CLI class for starting pywb in replay server in live mode"""
|
||||
|
||||
def load(self):
|
||||
from pywb.apps.frontendapp import FrontEndApp
|
||||
|
||||
|
@ -6,7 +6,7 @@ from werkzeug.exceptions import HTTPException, NotFound
|
||||
from werkzeug.wsgi import pop_path_info
|
||||
from six.moves.urllib.parse import urljoin
|
||||
from six import iteritems
|
||||
|
||||
from warcio.statusandheaders import StatusAndHeaders
|
||||
from warcio.utils import to_native_str
|
||||
from wsgiprox.wsgiprox import WSGIProxMiddleware
|
||||
|
||||
@ -33,6 +33,16 @@ import logging
|
||||
|
||||
# ============================================================================
|
||||
class FrontEndApp(object):
|
||||
"""Orchestrates pywb's core Wayback Machine functionality and is comprised of 2 core sub-apps and 3 optional apps.
|
||||
|
||||
Sub-apps:
|
||||
- WarcServer: Serves the archive content (WARC/ARC and index) as well as from the live web in record/proxy mode
|
||||
- RewriterApp: Rewrites the content served by pywb (if it is to be rewritten)
|
||||
- WSGIProxMiddleware (Optional): If proxy mode is enabled, performs pywb's HTTP(s) proxy functionality
|
||||
- AutoIndexer (Optional): If auto-indexing is enabled for the collections it is started here
|
||||
- RecorderApp (Optional): Recording functionality, available when recording mode is enabled
|
||||
"""
|
||||
|
||||
REPLAY_API = 'http://localhost:%s/{coll}/resource/postreq'
|
||||
CDX_API = 'http://localhost:%s/{coll}/index'
|
||||
RECORD_SERVER = 'http://localhost:%s'
|
||||
@ -45,6 +55,10 @@ class FrontEndApp(object):
|
||||
PROXY_CA_PATH = os.path.join('proxy-certs', 'pywb-ca.pem')
|
||||
|
||||
def __init__(self, config_file='./config.yaml', custom_config=None):
|
||||
"""
|
||||
:param str config_file: Path to the config file
|
||||
:param dict custom_config: Dictionary containing additional configuration information
|
||||
"""
|
||||
self.handler = self.handle_request
|
||||
self.warcserver = WarcServer(config_file=config_file,
|
||||
custom_config=custom_config)
|
||||
@ -55,6 +69,8 @@ class FrontEndApp(object):
|
||||
|
||||
self.warcserver_server = GeventServer(self.warcserver, port=0)
|
||||
|
||||
self.proxy_prefix = None # the URL prefix to be used for the collection with proxy mode (e.g. /coll/id_/)
|
||||
self.proxy_coll = None # the name of the collection that has proxy mode enabled
|
||||
self.init_proxy(config)
|
||||
|
||||
self.init_recorder(config.get('recorder'))
|
||||
@ -82,6 +98,8 @@ class FrontEndApp(object):
|
||||
self.metadata_cache = MetadataCache(metadata_templ)
|
||||
|
||||
def _init_routes(self):
|
||||
"""Initialize the routes and based on the configuration file makes available
|
||||
specific routes (proxy mode, record)"""
|
||||
self.url_map = Map()
|
||||
self.url_map.add(Rule('/static/_/<coll>/<path:filepath>', endpoint=self.serve_static))
|
||||
self.url_map.add(Rule('/static/<path:filepath>', endpoint=self.serve_static))
|
||||
@ -100,9 +118,19 @@ class FrontEndApp(object):
|
||||
if self.recorder_path:
|
||||
self.url_map.add(Rule(coll_prefix + self.RECORD_ROUTE + '/<path:url>', endpoint=self.serve_record))
|
||||
|
||||
if self.proxy_prefix is not None:
|
||||
# Add the proxy-fetch endpoint to enable PreservationWorker to make CORS fetches worry free in proxy mode
|
||||
self.url_map.add(Rule('/proxy-fetch/<path:url>', endpoint=self.proxy_fetch,
|
||||
methods=['GET', 'HEAD', 'OPTIONS']))
|
||||
self.url_map.add(Rule(coll_prefix + '/<path:url>', endpoint=self.serve_content))
|
||||
|
||||
def get_upstream_paths(self, port):
|
||||
"""Retrieve a dictionary containing the full URLs of the upstream apps
|
||||
|
||||
:param int port: The port used by the replay and cdx servers
|
||||
:return: A dictionary containing the upstream paths (replay, cdx-server, record [if enabled])
|
||||
:rtype: dict[str, str]
|
||||
"""
|
||||
base_paths = {
|
||||
'replay': self.REPLAY_API % port,
|
||||
'cdx-server': self.CDX_API % port,
|
||||
@ -114,6 +142,7 @@ class FrontEndApp(object):
|
||||
return base_paths
|
||||
|
||||
def init_recorder(self, recorder_config):
|
||||
"""Initialize the recording functionality of pywb. If recording_config is None this function is a no op"""
|
||||
if not recorder_config:
|
||||
self.recorder = None
|
||||
self.recorder_path = None
|
||||
@ -142,6 +171,10 @@ class FrontEndApp(object):
|
||||
self.recorder_path = self.RECORD_API % (recorder_server.port, recorder_coll)
|
||||
|
||||
def init_autoindex(self, auto_interval):
|
||||
"""Initialize and start the auto-indexing of the collections. If auto_interval is None this is a no op.
|
||||
|
||||
:param str|int auto_interval: The auto-indexing interval from the configuration file or CLI argument
|
||||
"""
|
||||
if not auto_interval:
|
||||
return
|
||||
|
||||
@ -161,7 +194,16 @@ class FrontEndApp(object):
|
||||
logging.info(msg.format(indexer.root_path, auto_interval))
|
||||
indexer.start()
|
||||
|
||||
def is_proxy_enabled(self, environ):
|
||||
return self.proxy_prefix is not None and 'wsgiprox.proxy_host' in environ
|
||||
|
||||
def serve_home(self, environ):
|
||||
"""Serves the home (/) view of pywb (not a collections)
|
||||
|
||||
:param dict environ: The WSGI environment dictionary for the request
|
||||
:return: The WbResponse for serving the home (/) path
|
||||
:rtype: WbResponse
|
||||
"""
|
||||
home_view = BaseInsertView(self.rewriterapp.jinja_env, 'index.html')
|
||||
fixed_routes = self.warcserver.list_fixed_routes()
|
||||
dynamic_routes = self.warcserver.list_dynamic_routes()
|
||||
@ -177,19 +219,38 @@ class FrontEndApp(object):
|
||||
return WbResponse.text_response(content, content_type='text/html; charset="utf-8"')
|
||||
|
||||
def serve_static(self, environ, coll='', filepath=''):
|
||||
"""Serve a static file associated with a specific collection or one of pywb's own static assets
|
||||
|
||||
:param dict environ: The WSGI environment dictionary for the request
|
||||
:param str coll: The collection the static file is associated with
|
||||
:param str filepath: The file path (relative to the collection) for the static assest
|
||||
:return: The WbResponse for the static asset
|
||||
:rtype: WbResponse
|
||||
"""
|
||||
proxy_enabled = self.is_proxy_enabled(environ)
|
||||
if proxy_enabled and environ.get('REQUEST_METHOD') == 'OPTIONS':
|
||||
return WbResponse.options_response(environ)
|
||||
if coll:
|
||||
path = os.path.join(self.warcserver.root_dir, coll, self.static_dir)
|
||||
else:
|
||||
path = self.static_dir
|
||||
|
||||
environ['pywb.static_dir'] = path
|
||||
|
||||
try:
|
||||
return self.static_handler(environ, filepath)
|
||||
response = self.static_handler(environ, filepath)
|
||||
if proxy_enabled:
|
||||
response.add_access_control_headers(env=environ)
|
||||
return response
|
||||
except:
|
||||
self.raise_not_found(environ, 'Static File Not Found: {0}'.format(filepath))
|
||||
|
||||
def get_metadata(self, coll):
|
||||
"""Retrieve the metadata associated with a collection
|
||||
|
||||
:param str coll: The name of the collection to receive metadata for
|
||||
:return: The collections metadata if it exists
|
||||
:rtype: dict
|
||||
"""
|
||||
#if coll == self.all_coll:
|
||||
# coll = '*'
|
||||
|
||||
@ -204,6 +265,13 @@ class FrontEndApp(object):
|
||||
return metadata
|
||||
|
||||
def serve_coll_page(self, environ, coll='$root'):
|
||||
"""Render and serve a collections search page (search.html).
|
||||
|
||||
:param dict environ: The WSGI environment dictionary for the request
|
||||
:param str coll: The name of the collection to serve the collections search page for
|
||||
:return: The WbResponse containing the collections search page
|
||||
:rtype: WbResponse
|
||||
"""
|
||||
if not self.is_valid_coll(coll):
|
||||
self.raise_not_found(environ, 'No handler for "/{0}"'.format(coll))
|
||||
|
||||
@ -225,6 +293,13 @@ class FrontEndApp(object):
|
||||
return WbResponse.text_response(content, content_type='text/html; charset="utf-8"')
|
||||
|
||||
def serve_cdx(self, environ, coll='$root'):
|
||||
"""Make the upstream CDX query for a collection and response with the results of the query
|
||||
|
||||
:param dict environ: The WSGI environment dictionary for the request
|
||||
:param str coll: The name of the collection this CDX query is for
|
||||
:return: The WbResponse containing the results of the CDX query
|
||||
:rtype: WbResponse
|
||||
"""
|
||||
base_url = self.rewriterapp.paths['cdx-server']
|
||||
|
||||
#if coll == self.all_coll:
|
||||
@ -248,12 +323,31 @@ class FrontEndApp(object):
|
||||
return WbResponse.text_response('Error: ' + str(e), status='400 Bad Request')
|
||||
|
||||
def serve_record(self, environ, coll='$root', url=''):
|
||||
"""Serve a URL's content from a WARC/ARC record in replay mode or from the live web in
|
||||
live, proxy, and record mode.
|
||||
|
||||
:param dict environ: The WSGI environment dictionary for the request
|
||||
:param str coll: The name of the collection the record is to be served from
|
||||
:param str url: The URL for the corresponding record to be served if it exists
|
||||
:return: WbResponse containing the contents of the record/URL
|
||||
:rtype: WbResponse
|
||||
"""
|
||||
if coll in self.warcserver.list_fixed_routes():
|
||||
return WbResponse.text_response('Error: Can Not Record Into Custom Collection "{0}"'.format(coll))
|
||||
|
||||
return self.serve_content(environ, coll, url, record=True)
|
||||
|
||||
def serve_content(self, environ, coll='$root', url='', timemap_output='', record=False):
|
||||
"""Serve the contents of a URL/Record rewriting the contents of the response when applicable.
|
||||
|
||||
:param dict environ: The WSGI environment dictionary for the request
|
||||
:param str coll: The name of the collection the record is to be served from
|
||||
:param str url: The URL for the corresponding record to be served if it exists
|
||||
:param str timemap_output: The contents of the timemap included in the link header of the response
|
||||
:param bool record: Should the content being served by recorded (save to a warc). Only valid in record mode
|
||||
:return: WbResponse containing the contents of the record/URL
|
||||
:rtype: WbResponse
|
||||
"""
|
||||
if not self.is_valid_coll(coll):
|
||||
self.raise_not_found(environ, 'No handler for "/{0}"'.format(coll))
|
||||
|
||||
@ -282,10 +376,16 @@ class FrontEndApp(object):
|
||||
except UpstreamException as ue:
|
||||
response = self.rewriterapp.handle_error(environ, ue)
|
||||
raise HTTPException(response=response)
|
||||
|
||||
return response
|
||||
|
||||
def setup_paths(self, environ, coll, record=False):
|
||||
"""Populates the WSGI environment dictionary with the path information necessary to perform a response for
|
||||
content or record.
|
||||
|
||||
:param dict environ: The WSGI environment dictionary for the request
|
||||
:param str coll: The name of the collection the record is to be served from
|
||||
:param bool record: Should the content being served by recorded (save to a warc). Only valid in record mode
|
||||
"""
|
||||
if not coll or not self.warcserver.root_dir:
|
||||
return
|
||||
|
||||
@ -305,6 +405,12 @@ class FrontEndApp(object):
|
||||
environ['pywb.templates_dir'] = '/'.join(paths)
|
||||
|
||||
def serve_listing(self, environ):
|
||||
"""Serves the response for WARCServer fixed and dynamic listing (paths)
|
||||
|
||||
:param dict environ: The WSGI environment dictionary for the request
|
||||
:return: WbResponse containing the frontend apps WARCServer URL paths
|
||||
:rtype: WbResponse
|
||||
"""
|
||||
result = {'fixed': self.warcserver.list_fixed_routes(),
|
||||
'dynamic': self.warcserver.list_dynamic_routes()
|
||||
}
|
||||
@ -312,6 +418,12 @@ class FrontEndApp(object):
|
||||
return WbResponse.json_response(result)
|
||||
|
||||
def is_valid_coll(self, coll):
|
||||
"""Determines if the collection name for a request is valid (exists)
|
||||
|
||||
:param str coll: The name of the collection to check
|
||||
:return: True if the collection is valid, false otherwise
|
||||
:rtype: bool
|
||||
"""
|
||||
#if coll == self.all_coll:
|
||||
# return True
|
||||
|
||||
@ -319,9 +431,21 @@ class FrontEndApp(object):
|
||||
coll in self.warcserver.list_dynamic_routes())
|
||||
|
||||
def raise_not_found(self, environ, msg):
|
||||
"""Utility function for raising a werkzeug.exceptions.NotFound execption with the supplied WSGI environment
|
||||
and message.
|
||||
|
||||
:param dict environ: The WSGI environment dictionary for the request
|
||||
:param str msg: The error message
|
||||
"""
|
||||
raise NotFound(response=self.rewriterapp._error_response(environ, msg))
|
||||
|
||||
def _check_refer_redirect(self, environ):
|
||||
"""Returns a WbResponse for a HTTP 307 redirection if the HTTP referer header is the same as the HTTP host header
|
||||
|
||||
:param dict environ: The WSGI environment dictionary for the request
|
||||
:return: WbResponse HTTP 307 redirection
|
||||
:rtype: WbResponse
|
||||
"""
|
||||
referer = environ.get('HTTP_REFERER')
|
||||
if not referer:
|
||||
return
|
||||
@ -353,10 +477,16 @@ class FrontEndApp(object):
|
||||
return self.handler(environ, start_response)
|
||||
|
||||
def handle_request(self, environ, start_response):
|
||||
"""Retrieves the route handler and calls the handler returning its the response
|
||||
|
||||
:param dict environ: The WSGI environment dictionary for the request
|
||||
:param start_response:
|
||||
:return: The WbResponse for the request
|
||||
:rtype: WbResponse
|
||||
"""
|
||||
urls = self.url_map.bind_to_environ(environ)
|
||||
try:
|
||||
endpoint, args = urls.match()
|
||||
|
||||
# store original script_name (original prefix) before modifications are made
|
||||
environ['pywb.app_prefix'] = environ.get('SCRIPT_NAME')
|
||||
|
||||
@ -379,13 +509,23 @@ class FrontEndApp(object):
|
||||
|
||||
@classmethod
|
||||
def create_app(cls, port):
|
||||
"""Create a new instance of FrontEndApp that listens on port with a hostname of 0.0.0.0
|
||||
|
||||
:param int port: The port FrontEndApp is to listen on
|
||||
:return: A new instance of FrontEndApp wrapped in GeventServer
|
||||
:rtype: GeventServer
|
||||
"""
|
||||
app = FrontEndApp()
|
||||
app_server = GeventServer(app, port=port, hostname='0.0.0.0')
|
||||
return app_server
|
||||
|
||||
def init_proxy(self, config):
|
||||
"""Initialize and start proxy mode. If proxy configuration entry is not contained in the config
|
||||
this is a no op. Causes handler to become an instance of WSGIProxMiddleware.
|
||||
|
||||
:param dict config: The configuration object used to configure this instance of FrontEndApp
|
||||
"""
|
||||
proxy_config = config.get('proxy')
|
||||
self.proxy_prefix = None
|
||||
if not proxy_config:
|
||||
return
|
||||
|
||||
@ -418,10 +558,12 @@ class FrontEndApp(object):
|
||||
else:
|
||||
self.proxy_prefix = '/{0}/id_/'.format(proxy_coll)
|
||||
|
||||
self.proxy_coll = proxy_coll
|
||||
|
||||
self.handler = WSGIProxMiddleware(self.handle_request,
|
||||
self.proxy_route_request,
|
||||
proxy_host=proxy_config.get('host', 'pywb.proxy'),
|
||||
proxy_options=proxy_config)
|
||||
self.proxy_route_request,
|
||||
proxy_host=proxy_config.get('host', 'pywb.proxy'),
|
||||
proxy_options=proxy_config)
|
||||
|
||||
def proxy_route_request(self, url, environ):
|
||||
""" Return the full url that this proxy request will be routed to
|
||||
@ -431,14 +573,65 @@ class FrontEndApp(object):
|
||||
"""
|
||||
return self.proxy_prefix + url
|
||||
|
||||
def proxy_fetch(self, env, url):
|
||||
"""Proxy mode only endpoint that handles OPTIONS requests and COR fetches for Preservation Worker.
|
||||
|
||||
Due to normal cross-origin browser restrictions in proxy mode, auto fetch worker cannot access the CSS rules
|
||||
of cross-origin style sheets and must re-fetch them in a manner that is CORS safe. This endpoint facilitates
|
||||
that by fetching the stylesheets for the auto fetch worker and then responds with its contents
|
||||
|
||||
:param dict env: The WSGI environment dictionary
|
||||
:param str url: The URL of the resource to be fetched
|
||||
:return: WbResponse that is either response to an Options request or the results of fetching url
|
||||
:rtype: WbResponse
|
||||
"""
|
||||
if not self.is_proxy_enabled(env):
|
||||
# we are not in proxy mode so just respond with forbidden
|
||||
return WbResponse.text_response('proxy mode must be enabled to use this endpoint',
|
||||
status='403 Forbidden')
|
||||
|
||||
if env.get('REQUEST_METHOD') == 'OPTIONS':
|
||||
return WbResponse.options_response(env)
|
||||
|
||||
# ensure full URL
|
||||
request_url = env['REQUEST_URI']
|
||||
# replace with /id_ so we do not get rewritten
|
||||
url = request_url.replace('/proxy-fetch', '/id_')
|
||||
# update WSGI environment object
|
||||
env['REQUEST_URI'] = self.proxy_coll + url
|
||||
env['PATH_INFO'] = env['PATH_INFO'].replace('/proxy-fetch', self.proxy_coll + '/id_')
|
||||
# make request using normal serve_content
|
||||
response = self.serve_content(env, self.proxy_coll, url)
|
||||
# for WR
|
||||
if isinstance(response, WbResponse):
|
||||
response.add_access_control_headers(env=env)
|
||||
return response
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class MetadataCache(object):
|
||||
"""This class holds the collection medata template string and
|
||||
caches the metadata for a collection once it is rendered once.
|
||||
Cached metadata is updated if its corresponding file has been updated since last cache time (file mtime based)"""
|
||||
|
||||
def __init__(self, template_str):
|
||||
"""
|
||||
:param str template_str: The template string to be cached
|
||||
"""
|
||||
self.template_str = template_str
|
||||
self.cache = {}
|
||||
|
||||
def load(self, coll):
|
||||
"""Load and receive the metadata associated with a collection.
|
||||
|
||||
If the metadata for the collection is not cached yet its metadata file is read in and stored.
|
||||
If the cache has seen the collection before the mtime of the metadata file is checked and if it is more recent
|
||||
than the cached time, the cache is updated and returned otherwise the cached version is returned.
|
||||
|
||||
:param str coll: Name of a collection
|
||||
:return: The cached metadata for a collection
|
||||
:rtype: dict
|
||||
"""
|
||||
path = self.template_str.format(coll=coll)
|
||||
try:
|
||||
mtime = os.path.getmtime(path)
|
||||
@ -456,11 +649,25 @@ class MetadataCache(object):
|
||||
return self.store_new(coll, path, mtime)
|
||||
|
||||
def store_new(self, coll, path, mtime):
|
||||
"""Load a collections metadata file and store it
|
||||
|
||||
:param str coll: The name of the collection the metadata is for
|
||||
:param str path: The path to the collections metadata file
|
||||
:param float mtime: The current mtime of the collections metadata file
|
||||
:return: The collections metadata
|
||||
:rtype: dict
|
||||
"""
|
||||
obj = load_yaml_config(path)
|
||||
self.cache[coll] = (mtime, obj)
|
||||
return obj
|
||||
|
||||
def get_all(self, routes):
|
||||
"""Load the metadata for all routes (collections) and populate the cache
|
||||
|
||||
:param list[str] routes: List of collection names
|
||||
:return: A dictionary containing each collections metadata
|
||||
:rtype: dict
|
||||
"""
|
||||
for route in routes:
|
||||
self.load(route)
|
||||
|
||||
|
@ -366,6 +366,7 @@ class RewriterApp(object):
|
||||
top_url,
|
||||
environ,
|
||||
framed_replay,
|
||||
coll=kwargs.get('coll', ''),
|
||||
replay_mod=self.replay_mod,
|
||||
config=self.config))
|
||||
|
||||
|
@ -1,3 +1,9 @@
|
||||
import inspect
|
||||
try:
|
||||
import ujson as json
|
||||
except ImportError: # pragma: no cover
|
||||
import json
|
||||
|
||||
from pywb.apps.wbrequestresponse import WbResponse
|
||||
from warcio.statusandheaders import StatusAndHeaders
|
||||
|
||||
@ -40,6 +46,98 @@ def test_resp_4():
|
||||
assert(resp == expected)
|
||||
|
||||
|
||||
def test_wbresponse_redir_supplied_headers():
|
||||
res = WbResponse.redir_response('http://overhere.now', headers=[('A', 'B')])
|
||||
assert ('A', 'B') in res.status_headers.headers
|
||||
|
||||
|
||||
def test_wbresponse_creation_defaults():
|
||||
res = WbResponse(None)
|
||||
assert res.status_headers is None
|
||||
assert isinstance(res.body, list)
|
||||
assert len(res.body) == 0
|
||||
|
||||
|
||||
def test_wbresponse_encode_stream():
|
||||
stream = [u'\u00c3'] # Unicode Character 'LATIN CAPITAL LETTER A WITH TILDE' (U+00C3)
|
||||
expected = [b'\xc3\x83']
|
||||
encoding_stream = WbResponse.encode_stream(stream)
|
||||
assert inspect.isgenerator(encoding_stream)
|
||||
assert list(encoding_stream) == expected
|
||||
|
||||
|
||||
def test_wbresponse_text_stream():
|
||||
stream = [u'\u00c3'] # Unicode Character 'LATIN CAPITAL LETTER A WITH TILDE' (U+00C3)
|
||||
expected = [b'\xc3\x83']
|
||||
res = WbResponse.text_stream(stream, content_type='text/plain')
|
||||
status_headers = res.status_headers
|
||||
assert status_headers.statusline == '200 OK'
|
||||
assert ('Content-Type', 'text/plain; charset=utf-8') in status_headers.headers
|
||||
assert inspect.isgenerator(res.body)
|
||||
assert list(res.body) == expected
|
||||
|
||||
res = WbResponse.text_stream(stream)
|
||||
status_headers = res.status_headers
|
||||
assert status_headers.statusline == '200 OK'
|
||||
assert ('Content-Type', 'text/plain; charset=utf-8') in status_headers.headers
|
||||
assert inspect.isgenerator(res.body)
|
||||
assert list(res.body) == expected
|
||||
|
||||
|
||||
def test_wbresponse_options_response():
|
||||
res = WbResponse.options_response(dict(HTTP_ORIGIN='http://example.com'))
|
||||
assert ('Access-Control-Allow-Origin', 'http://example.com') in res.status_headers.headers
|
||||
res = WbResponse.options_response(dict(HTTP_REFERER='http://example.com'))
|
||||
assert ('Access-Control-Allow-Origin', 'http://example.com') in res.status_headers.headers
|
||||
res = WbResponse.options_response(dict())
|
||||
assert ('Access-Control-Allow-Origin', '*') in res.status_headers.headers
|
||||
res = WbResponse.options_response(dict(HTTP_ORIGIN=None))
|
||||
assert ('Access-Control-Allow-Origin', '*') in res.status_headers.headers
|
||||
res = WbResponse.options_response(dict(HTTP_REFERER=None))
|
||||
assert ('Access-Control-Allow-Origin', '*') in res.status_headers.headers
|
||||
|
||||
|
||||
def test_wbresponse_json_response():
|
||||
body = dict(pywb=1, wr=2)
|
||||
res = WbResponse.json_response(body)
|
||||
status_headers = res.status_headers
|
||||
assert status_headers.statusline == '200 OK'
|
||||
assert ('Content-Type', 'application/json; charset=utf-8') in status_headers.headers
|
||||
assert json.loads(res.body[0]) == body
|
||||
|
||||
|
||||
def test_wbresponse_init_derived():
|
||||
class Derived(WbResponse):
|
||||
def __init__(self, status_headers, value=None, **kwargs):
|
||||
self.received_kwargs = dict()
|
||||
super(Derived, self).__init__(status_headers, value=value, **kwargs)
|
||||
|
||||
def _init_derived(self, params):
|
||||
self.received_kwargs.update(params)
|
||||
|
||||
dres = Derived(None, pywb=1, wr=2)
|
||||
assert dres.received_kwargs == dict(pywb=1, wr=2)
|
||||
|
||||
|
||||
def test_wbresponse_callable():
|
||||
expected_body = dict(pywb=1, wr=2)
|
||||
res = WbResponse.json_response(expected_body)
|
||||
env = dict(REQUEST_METHOD='GET')
|
||||
expected_passed_values = dict(
|
||||
status_line='200 OK',
|
||||
headers=[('Content-Type', 'application/json; charset=utf-8'), ('Content-Length', '17')]
|
||||
)
|
||||
passed_values = dict(status_line=None, headers=None)
|
||||
|
||||
def start_response(status_line, headers):
|
||||
passed_values['status_line'] = status_line
|
||||
passed_values['headers'] = headers
|
||||
|
||||
body = res(env, start_response)
|
||||
assert json.loads(body[0]) == expected_body
|
||||
assert passed_values == expected_passed_values
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import doctest
|
||||
doctest.testmod()
|
||||
|
@ -1,39 +1,76 @@
|
||||
from warcio.statusandheaders import StatusAndHeaders
|
||||
|
||||
import json
|
||||
try:
|
||||
import ujson as json
|
||||
except ImportError: # pragma: no cover
|
||||
import json
|
||||
|
||||
|
||||
|
||||
#=================================================================
|
||||
# =================================================================
|
||||
class WbResponse(object):
|
||||
"""
|
||||
Represnts a pywb wsgi response object.
|
||||
"""Represnts a pywb wsgi response object.
|
||||
|
||||
Holds a status_headers object and a response iter, to be
|
||||
returned to wsgi container.
|
||||
"""
|
||||
def __init__(self, status_headers, value=[], **kwargs):
|
||||
returned to wsgi container."""
|
||||
|
||||
def __init__(self, status_headers, value=None, **kwargs):
|
||||
"""
|
||||
:param StatusAndHeaders status_headers: The StatusAndHeaders object for this response
|
||||
:param Any value: The response body
|
||||
:param Any kwargs: Additional keyword arguments to be passed to subclasses
|
||||
"""
|
||||
if value is None:
|
||||
value = list()
|
||||
self.status_headers = status_headers
|
||||
self.body = value
|
||||
self._init_derived(kwargs)
|
||||
|
||||
def _init_derived(self, params):
|
||||
"""Receive the kwargs used in construction of this class
|
||||
|
||||
:param Any params:
|
||||
:return:
|
||||
:rtype: None
|
||||
"""
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def text_stream(stream, content_type='text/plain; charset=utf-8', status='200 OK'):
|
||||
def encode(stream):
|
||||
for obj in stream:
|
||||
yield obj.encode('utf-8')
|
||||
"""Utility method for constructing a streaming text response.
|
||||
|
||||
:param Any stream: The response body stream
|
||||
:param str content_type: The content-type of the response
|
||||
:param str status: The HTTP status line
|
||||
:return: WbResponse that is a text stream
|
||||
:rtype WbResponse:
|
||||
"""
|
||||
if 'charset' not in content_type:
|
||||
content_type += '; charset=utf-8'
|
||||
|
||||
return WbResponse.bin_stream(encode(stream), content_type, status)
|
||||
return WbResponse.bin_stream(WbResponse.encode_stream(stream), content_type, status)
|
||||
|
||||
@staticmethod
|
||||
def encode_stream(stream):
|
||||
"""Utility method to encode a stream using utf-8.
|
||||
|
||||
:param Any stream: The stream to be encoded using utf-8
|
||||
:return: A generator that yields the contents of the stream encoded as utf-8
|
||||
"""
|
||||
for obj in stream:
|
||||
yield obj.encode('utf-8')
|
||||
|
||||
@staticmethod
|
||||
def bin_stream(stream, content_type, status='200 OK',
|
||||
headers=None):
|
||||
headers=None):
|
||||
"""Utility method for constructing a binary response.
|
||||
|
||||
:param Any stream: The response body stream
|
||||
:param str content_type: The content-type of the response
|
||||
:param str status: The HTTP status line
|
||||
:param list[tuple[str, str]] headers: Additional headers for this response
|
||||
:return: WbResponse that is a binary stream
|
||||
:rtype: WbResponse
|
||||
"""
|
||||
def_headers = [('Content-Type', content_type)]
|
||||
if headers:
|
||||
def_headers += headers
|
||||
@ -44,6 +81,14 @@ class WbResponse(object):
|
||||
|
||||
@staticmethod
|
||||
def text_response(text, status='200 OK', content_type='text/plain; charset=utf-8'):
|
||||
"""Utility method for constructing a text response.
|
||||
|
||||
:param str text: The text response body
|
||||
:param str content_type: The content-type of the response
|
||||
:param str status: The HTTP status line
|
||||
:return: WbResponse text response
|
||||
:rtype: WbResponse
|
||||
"""
|
||||
encoded_text = text.encode('utf-8')
|
||||
status_headers = StatusAndHeaders(status,
|
||||
[('Content-Type', content_type),
|
||||
@ -53,21 +98,59 @@ class WbResponse(object):
|
||||
|
||||
@staticmethod
|
||||
def json_response(obj, status='200 OK', content_type='application/json; charset=utf-8'):
|
||||
"""Utility method for constructing a JSON response.
|
||||
|
||||
:param dict obj: The dictionary to be serialized in JSON format
|
||||
:param str content_type: The content-type of the response
|
||||
:param str status: The HTTP status line
|
||||
:return: WbResponse JSON response
|
||||
:rtype: WbResponse
|
||||
"""
|
||||
return WbResponse.text_response(json.dumps(obj), status, content_type)
|
||||
|
||||
@staticmethod
|
||||
def redir_response(location, status='302 Redirect', headers=None):
|
||||
"""Utility method for constructing redirection response.
|
||||
|
||||
:param str location: The location of the resource redirecting to
|
||||
:param str status: The HTTP status line
|
||||
:param list[tuple[str, str]] headers: Additional headers for this response
|
||||
:return: WbResponse redirection response
|
||||
:rtype: WbResponse
|
||||
"""
|
||||
redir_headers = [('Location', location), ('Content-Length', '0')]
|
||||
if headers:
|
||||
redir_headers += headers
|
||||
|
||||
return WbResponse(StatusAndHeaders(status, redir_headers))
|
||||
|
||||
@staticmethod
|
||||
def options_response(env):
|
||||
"""Construct WbResponse for OPTIONS based on the WSGI env dictionary
|
||||
|
||||
:param dict env: The WSGI environment dictionary
|
||||
:return: The WBResponse for the options request
|
||||
:rtype: WbResponse
|
||||
"""
|
||||
status_headers = StatusAndHeaders('200 Ok', [
|
||||
('Content-Type', 'text/plain'),
|
||||
('Content-Length', '0'),
|
||||
])
|
||||
response = WbResponse(status_headers)
|
||||
response.add_access_control_headers(env=env)
|
||||
return response
|
||||
|
||||
def __call__(self, env, start_response):
|
||||
"""Callable definition to allow WbResponse control over how the response is sent
|
||||
|
||||
:param dict env: The WSGI environment dictionary
|
||||
:param function start_response: The WSGI start_response function
|
||||
:return: The response body
|
||||
"""
|
||||
start_response(self.status_headers.statusline,
|
||||
self.status_headers.headers)
|
||||
|
||||
if env['REQUEST_METHOD'] == 'HEAD' or self.status_headers.statusline.startswith('304'):
|
||||
request_method = env['REQUEST_METHOD']
|
||||
if request_method == 'HEAD' or request_method == 'OPTIONS' or self.status_headers.statusline.startswith('304'):
|
||||
if hasattr(self.body, 'close'):
|
||||
self.body.close()
|
||||
return []
|
||||
@ -75,8 +158,42 @@ class WbResponse(object):
|
||||
return self.body
|
||||
|
||||
def add_range(self, *args):
|
||||
"""Add HTTP range header values to this response
|
||||
|
||||
:param int args: The values for the range HTTP header
|
||||
:return: The same WbResponse but with the values for the range HTTP header added
|
||||
:rtype: WbResponse
|
||||
"""
|
||||
self.status_headers.add_range(*args)
|
||||
return self
|
||||
|
||||
def add_access_control_headers(self, env=None):
|
||||
"""Adds Access-Control* HTTP headers to this WbResponse's HTTP headers.
|
||||
|
||||
:param dict env: The WSGI environment dictionary
|
||||
:return: The same WbResponse but with the values for the Access-Control* HTTP header added
|
||||
:rtype: WbResponse
|
||||
"""
|
||||
allowed_methods = 'GET, POST, PUT, OPTIONS, DELETE, PATCH, HEAD, TRACE, CONNECT'
|
||||
allowed_origin = None
|
||||
if env is not None:
|
||||
acr_method = env.get('HTTP_ACCESS_CONTROL_REQUEST_METHOD')
|
||||
if acr_method is not None and acr_method not in allowed_methods:
|
||||
allowed_methods = allowed_methods + ', ' + acr_method
|
||||
r_method = env.get('REQUEST_METHOD')
|
||||
if r_method is not None and r_method not in allowed_methods:
|
||||
allowed_methods = allowed_methods + ', ' + r_method
|
||||
acr_headers = env.get('HTTP_ACCESS_CONTROL_REQUEST_HEADERS')
|
||||
if acr_headers is not None:
|
||||
self.status_headers.add_header('Access-Control-Allow-Headers', acr_headers)
|
||||
allowed_origin = env.get('HTTP_ORIGIN', env.get('HTTP_REFERER', allowed_origin))
|
||||
if allowed_origin is None:
|
||||
allowed_origin = '*'
|
||||
self.status_headers.replace_header('Access-Control-Allow-Origin', allowed_origin)
|
||||
self.status_headers.add_header('Access-Control-Allow-Methods', allowed_methods)
|
||||
self.status_headers.add_header('Access-Control-Allow-Credentials', 'true')
|
||||
self.status_headers.add_header('Access-Control-Max-Age', '1800')
|
||||
return self
|
||||
|
||||
def __repr__(self):
|
||||
return str(vars(self))
|
||||
|
@ -14,9 +14,13 @@ from webassets.env import Resolver
|
||||
|
||||
from pkg_resources import resource_filename
|
||||
|
||||
import json
|
||||
import os
|
||||
|
||||
try:
|
||||
import ujson as json
|
||||
except ImportError: # pragma: no cover
|
||||
import json
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class RelEnvironment(Environment):
|
||||
@ -27,14 +31,35 @@ class RelEnvironment(Environment):
|
||||
|
||||
# ============================================================================
|
||||
class JinjaEnv(object):
|
||||
def __init__(self, paths=['templates', '.', '/'],
|
||||
packages=['pywb'],
|
||||
assets_path=None,
|
||||
globals=None,
|
||||
overlay=None,
|
||||
extensions=None,
|
||||
env_template_params_key='pywb.template_params',
|
||||
env_template_dir_key='pywb.templates_dir'):
|
||||
"""Pywb JinjaEnv class that provides utility functions used by the templates,
|
||||
configured template loaders and template paths, and contains the actual Jinja
|
||||
env used by each template."""
|
||||
|
||||
def __init__(self, paths=None,
|
||||
packages=None,
|
||||
assets_path=None,
|
||||
globals=None,
|
||||
overlay=None,
|
||||
extensions=None,
|
||||
env_template_params_key='pywb.template_params',
|
||||
env_template_dir_key='pywb.templates_dir'):
|
||||
"""Construct a new JinjaEnv.
|
||||
|
||||
:param list[str] paths: List of paths to search for templates
|
||||
:param list[str] packages: List of assets package names
|
||||
:param str assets_path: Path to a yaml file containing assets
|
||||
:param dict[str, str] globals: Dictionary of additional globals available during template rendering
|
||||
:param overlay:
|
||||
:param list extensions: List of webassets extension classes
|
||||
:param str env_template_params_key: The full pywb package key for the template params
|
||||
:param str env_template_dir_key: The full pywb package key for the template directory
|
||||
"""
|
||||
|
||||
if paths is None:
|
||||
paths = ['templates', '.', '/']
|
||||
|
||||
if packages is None:
|
||||
packages = ['pywb']
|
||||
|
||||
self._init_filters()
|
||||
|
||||
@ -72,6 +97,13 @@ class JinjaEnv(object):
|
||||
jinja_env.assets_environment = assets_env
|
||||
|
||||
def _make_loaders(self, paths, packages):
|
||||
"""Initialize the template loaders based on the supplied paths and packages.
|
||||
|
||||
:param list[str] paths: List of paths to search for templates
|
||||
:param list[str] packages: List of assets package names
|
||||
:return: A list of loaders to be used for loading the template assets
|
||||
:rtype: list[FileSystemLoader|PackageLoader]
|
||||
"""
|
||||
loaders = []
|
||||
# add loaders for paths
|
||||
for path in paths:
|
||||
@ -84,6 +116,15 @@ class JinjaEnv(object):
|
||||
return loaders
|
||||
|
||||
def template_filter(self, param=None):
|
||||
"""Returns a decorator that adds the wrapped function to dictionary of template filters.
|
||||
|
||||
The wrapped function is keyed by either the supplied param (if supplied)
|
||||
or by the wrapped functions name.
|
||||
|
||||
:param param: Optional name to use instead of the name of the function to be wrapped
|
||||
:return: A decorator to wrap a template filter function
|
||||
:rtype: callable
|
||||
"""
|
||||
def deco(func):
|
||||
name = param or func.__name__
|
||||
self.filters[name] = func
|
||||
@ -92,10 +133,18 @@ class JinjaEnv(object):
|
||||
return deco
|
||||
|
||||
def _init_filters(self):
|
||||
"""Initialize the default pywb provided Jninja filters available during template rendering"""
|
||||
self.filters = {}
|
||||
|
||||
@self.template_filter()
|
||||
def format_ts(value, format_='%a, %b %d %Y %H:%M:%S'):
|
||||
"""Formats the supplied timestamp using format_
|
||||
|
||||
:param str value: The timestamp to be formatted
|
||||
:param str format_: The format string
|
||||
:return: The correctly formatted timestamp as determined by format_
|
||||
:rtype: str
|
||||
"""
|
||||
if format_ == '%s':
|
||||
return timestamp_to_sec(value)
|
||||
else:
|
||||
@ -104,22 +153,58 @@ class JinjaEnv(object):
|
||||
|
||||
@self.template_filter('urlsplit')
|
||||
def get_urlsplit(url):
|
||||
"""Splits the supplied URL
|
||||
|
||||
:param str url: The url to be split
|
||||
:return: The split url
|
||||
:rtype: urllib.parse.SplitResult
|
||||
"""
|
||||
split = urlsplit(url)
|
||||
return split
|
||||
|
||||
@self.template_filter()
|
||||
def tojson(obj):
|
||||
"""Converts the supplied object/array/any to a JSON string if it can be JSONified
|
||||
|
||||
:param any obj: The value to be converted to a JSON string
|
||||
:return: The JSON string representation of the supplied value
|
||||
:rtype: str
|
||||
"""
|
||||
return json.dumps(obj)
|
||||
|
||||
@self.template_filter()
|
||||
def tobool(bool_val):
|
||||
"""Converts a python boolean to a JS "true" or "false" string
|
||||
:param any obj: A value to be evaluated as a boolean
|
||||
:return: The string "true" or "false" to be inserted into JS
|
||||
"""
|
||||
|
||||
return 'true' if bool_val else 'false'
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class BaseInsertView(object):
|
||||
"""Base class of all template views used by Pywb"""
|
||||
|
||||
def __init__(self, jenv, insert_file, banner_view=None):
|
||||
"""Create a new BaseInsertView.
|
||||
|
||||
:param JinjaEnv jenv: The instance of pywb.rewrite.templateview.JinjaEnv to be used
|
||||
:param str insert_file: The name of the template file
|
||||
:param BaseInsertView banner_view: The banner_view property of pywb.apps.RewriterApp
|
||||
"""
|
||||
self.jenv = jenv
|
||||
self.insert_file = insert_file
|
||||
self.banner_view = banner_view
|
||||
|
||||
def render_to_string(self, env, **kwargs):
|
||||
"""Render this template.
|
||||
|
||||
:param dict env: The WSGI environment associated with the request causing this template to be rendered
|
||||
:param any kwargs: The keyword arguments to be supplied to the Jninja template render method
|
||||
:return: The rendered template
|
||||
:rtype: str
|
||||
"""
|
||||
template = None
|
||||
template_path = env.get(self.jenv.env_template_dir_key)
|
||||
|
||||
@ -149,6 +234,9 @@ class BaseInsertView(object):
|
||||
|
||||
# ============================================================================
|
||||
class HeadInsertView(BaseInsertView):
|
||||
"""The template view class associated with rendering the HTML inserted
|
||||
into the head of the pages replayed (WB Insert)."""
|
||||
|
||||
def create_insert_func(self, wb_url,
|
||||
wb_prefix,
|
||||
host_prefix,
|
||||
@ -158,19 +246,32 @@ class HeadInsertView(BaseInsertView):
|
||||
coll='',
|
||||
include_ts=True,
|
||||
**kwargs):
|
||||
"""Create the function used to render the header insert template for the current request.
|
||||
|
||||
:param rewrite.wburl.WbUrl wb_url: The WbUrl for the request this template is being rendered for
|
||||
:param str wb_prefix: The URL prefix pywb is serving the content using (e.g. http://localhost:8080/live/)
|
||||
:param str host_prefix: The host URL prefix pywb is running on (e.g. http://localhost:8080)
|
||||
:param str top_url: The full URL for this request (e.g. http://localhost:8080/live/http://example.com)
|
||||
:param dict env: The WSGI environment dictionary for this request
|
||||
:param bool is_framed: Is pywb or a specific collection running in framed mode
|
||||
:param str coll: The name of the collection this request is associated with
|
||||
:param bool include_ts: Should a timestamp be included in the rendered template
|
||||
:param kwargs: Additional keyword arguments to be supplied to the Jninja template render method
|
||||
:return: A function to be used to render the header insert for the request this template is being rendered for
|
||||
:rtype: callable
|
||||
"""
|
||||
params = kwargs
|
||||
params['host_prefix'] = host_prefix
|
||||
params['wb_prefix'] = wb_prefix
|
||||
params['wb_url'] = wb_url
|
||||
params['top_url'] = top_url
|
||||
params['coll'] = coll
|
||||
params['is_framed'] = 'true' if is_framed else 'false'
|
||||
params['is_framed'] = is_framed
|
||||
|
||||
def make_head_insert(rule, cdx):
|
||||
params['wombat_ts'] = cdx['timestamp'] if include_ts else ''
|
||||
params['wombat_sec'] = timestamp_to_sec(cdx['timestamp'])
|
||||
params['is_live'] = 'true' if cdx.get('is_live') else 'false'
|
||||
params['is_live'] = cdx.get('is_live')
|
||||
|
||||
if self.banner_view:
|
||||
banner_html = self.banner_view.render_to_string(env, cdx=cdx, **params)
|
||||
@ -183,6 +284,8 @@ class HeadInsertView(BaseInsertView):
|
||||
|
||||
# ============================================================================
|
||||
class TopFrameView(BaseInsertView):
|
||||
"""The template view class associated with rendering the replay iframe"""
|
||||
|
||||
def get_top_frame(self, wb_url,
|
||||
wb_prefix,
|
||||
host_prefix,
|
||||
@ -191,6 +294,18 @@ class TopFrameView(BaseInsertView):
|
||||
replay_mod,
|
||||
coll='',
|
||||
extra_params=None):
|
||||
"""
|
||||
:param rewrite.wburl.WbUrl wb_url: The WbUrl for the request this template is being rendered for
|
||||
:param str wb_prefix: The URL prefix pywb is serving the content using (e.g. http://localhost:8080/live/)
|
||||
:param str host_prefix: The host URL prefix pywb is running on (e.g. http://localhost:8080)
|
||||
:param dict env: The WSGI environment dictionary for the request this template is being rendered for
|
||||
:param str frame_mod: The modifier to be used for framing (e.g. if_)
|
||||
:param str replay_mod: The modifier to be used in the URL of the page being replayed (e.g. mp_)
|
||||
:param str coll: The name of the collection this template is being rendered for
|
||||
:param dict extra_params: Additional parameters to be supplied to the Jninja template render method
|
||||
:return: The frame insert string
|
||||
:rtype: str
|
||||
"""
|
||||
|
||||
embed_url = wb_url.to_str(mod=replay_mod)
|
||||
|
||||
@ -227,7 +342,15 @@ class TopFrameView(BaseInsertView):
|
||||
|
||||
# ============================================================================
|
||||
class PkgResResolver(Resolver):
|
||||
"""Class for resolving pywb package resources when install via pypi or setup.py"""
|
||||
|
||||
def get_pkg_path(self, item):
|
||||
"""Get the package path for the
|
||||
|
||||
:param str item: A resources full package path
|
||||
:return: The netloc and path from the items package path
|
||||
:rtype: tuple[str, str]
|
||||
"""
|
||||
if not isinstance(item, str):
|
||||
return None
|
||||
|
||||
|
@ -3,8 +3,8 @@
|
||||
var STYLE_REGEX = /(url\s*\(\s*[\\"']*)([^)'"]+)([\\"']*\s*\))/gi;
|
||||
var IMPORT_REGEX = /(@import\s+[\\"']*)([^)'";]+)([\\"']*\s*;?)/gi;
|
||||
var srcsetSplit = /\s*(\S*\s+[\d.]+[wx]),|(?:\s*,(?:\s+|(?=https?:)))/;
|
||||
// the preserver instance for this worker
|
||||
var preserver = null;
|
||||
// the autofetcher instance for this worker
|
||||
var autofetcher = null;
|
||||
|
||||
function noop() {}
|
||||
|
||||
@ -41,31 +41,25 @@ self.onmessage = function (event) {
|
||||
var data = event.data;
|
||||
switch (data.type) {
|
||||
case 'values':
|
||||
preserver.preserveMediaSrcset(data);
|
||||
autofetcher.autofetchMediaSrcset(data);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
function pMap(p) {
|
||||
// mapping function to ensure each fetch promises catch has a no op cb
|
||||
return p.catch(noop);
|
||||
}
|
||||
|
||||
function Preserver(prefix, mod) {
|
||||
if (!(this instanceof Preserver)) {
|
||||
return new Preserver(prefix, mod);
|
||||
function AutoFetcher(init) {
|
||||
if (!(this instanceof AutoFetcher)) {
|
||||
return new AutoFetcher(init);
|
||||
}
|
||||
this.prefix = prefix;
|
||||
this.mod = mod;
|
||||
this.prefixMod = prefix + mod;
|
||||
this.proxyMode = init.proxyMode;
|
||||
this.prefix = init.prefix;
|
||||
this.mod = init.mod;
|
||||
this.prefixMod = init.prefix + init.mod;
|
||||
// relative url, WorkerLocation is set by owning document
|
||||
this.relative = prefix.split(location.origin)[1];
|
||||
this.relative = init.prefix.split(location.origin)[1];
|
||||
// schemeless url
|
||||
this.schemeless = '/' + this.relative;
|
||||
// local cache of URLs fetched, to reduce server load
|
||||
this.seen = {};
|
||||
// counter used to know when to clear seen (count > 2500)
|
||||
this.seenCount = 0;
|
||||
// array of promises returned by fetch(URL)
|
||||
this.fetches = [];
|
||||
// array of URL to be fetched
|
||||
@ -76,7 +70,7 @@ function Preserver(prefix, mod) {
|
||||
this.fetchDone = this.fetchDone.bind(this);
|
||||
}
|
||||
|
||||
Preserver.prototype.fixupURL = function (url) {
|
||||
AutoFetcher.prototype.fixupURL = function (url) {
|
||||
// attempt to fix up the url and do our best to ensure we can get dat 200 OK!
|
||||
if (url.indexOf(this.prefixMod) === 0) {
|
||||
return url;
|
||||
@ -93,57 +87,54 @@ Preserver.prototype.fixupURL = function (url) {
|
||||
return url;
|
||||
};
|
||||
|
||||
Preserver.prototype.safeFetch = function (url) {
|
||||
AutoFetcher.prototype.safeFetch = function (url) {
|
||||
var fixedURL = this.fixupURL(url);
|
||||
// check to see if we have seen this url before in order
|
||||
// to lessen the load against the server content is preserved from
|
||||
// to lessen the load against the server content is fetched from
|
||||
if (this.seen[url] != null) return;
|
||||
this.seen[url] = true;
|
||||
if (this.queuing) {
|
||||
// we are currently waiting for a batch of fetches to complete
|
||||
return this.queue.push(fixedURL);
|
||||
}
|
||||
// queue this urls fetch
|
||||
this.fetches.push(fetch(fixedURL));
|
||||
// fetch this url
|
||||
this.fetches.push(fetch(url));
|
||||
};
|
||||
|
||||
Preserver.prototype.urlExtractor = function (match, n1, n2, n3, offset, string) {
|
||||
AutoFetcher.prototype.urlExtractor = function (match, n1, n2, n3, offset, string) {
|
||||
// Same function as style_replacer in wombat.rewrite_style, n2 is our URL
|
||||
this.safeFetch(n2);
|
||||
return n1 + n2 + n3;
|
||||
};
|
||||
|
||||
Preserver.prototype.fetchDone = function () {
|
||||
// clear our fetches array in place
|
||||
// https://www.ecma-international.org/ecma-262/9.0/index.html#sec-properties-of-array-instances-length
|
||||
this.fetches.length = 0;
|
||||
AutoFetcher.prototype.fetchDone = function () {
|
||||
// indicate we no longer need to Q
|
||||
this.queuing = false;
|
||||
if (this.queue.length > 0) {
|
||||
// we have a Q of some length drain it
|
||||
this.drainQ();
|
||||
} else if (this.seenCount > 2500) {
|
||||
// we seen 2500 URLs so lets free some memory as at this point
|
||||
// we will probably see some more. GC it!
|
||||
this.seen = {};
|
||||
this.seenCount = 0;
|
||||
}
|
||||
};
|
||||
|
||||
Preserver.prototype.fetchAll = function () {
|
||||
AutoFetcher.prototype.fetchAll = function () {
|
||||
// if we are queuing or have no fetches this is a no op
|
||||
if (this.queuing) return;
|
||||
if (this.fetches.length === 0) return;
|
||||
// we are about to fetch queue anything that comes our way
|
||||
this.queuing = true;
|
||||
// initiate fetches by turning the initial fetch promises
|
||||
// into rejctionless promises and "await" all
|
||||
Promise.all(this.fetches.map(pMap))
|
||||
/// initiate fetches by turning the initial fetch promises
|
||||
// into rejctionless promises and "await" all clearing
|
||||
// our fetches array in place
|
||||
var runningFetchers = [];
|
||||
while (this.fetches.length > 0) {
|
||||
runningFetchers.push(this.fetches.shift().catch(noop))
|
||||
}
|
||||
Promise.all(runningFetchers)
|
||||
.then(this.fetchDone)
|
||||
.catch(this.fetchDone);
|
||||
};
|
||||
|
||||
Preserver.prototype.drainQ = function () {
|
||||
AutoFetcher.prototype.drainQ = function () {
|
||||
// clear our Q in place and fill our fetches array
|
||||
while (this.queue.length > 0) {
|
||||
this.fetches.push(fetch(this.queue.shift()));
|
||||
@ -152,17 +143,18 @@ Preserver.prototype.drainQ = function () {
|
||||
this.fetchAll();
|
||||
};
|
||||
|
||||
Preserver.prototype.extractMedia = function (mediaRules) {
|
||||
AutoFetcher.prototype.extractMedia = function (mediaRules) {
|
||||
// this is a broken down rewrite_style
|
||||
if (mediaRules == null) return;
|
||||
for (var i = 0; i < mediaRules.length; i++) {
|
||||
var rule = mediaRules[i];
|
||||
rule.replace(STYLE_REGEX, this.urlExtractor);
|
||||
rule.replace(IMPORT_REGEX, this.urlExtractor);
|
||||
if (mediaRules == null || mediaRules.values === null) return;
|
||||
var rules = mediaRules.values;
|
||||
for (var i = 0; i < rules.length; i++) {
|
||||
var rule = rules[i];
|
||||
rule.replace(STYLE_REGEX, this.urlExtractor)
|
||||
.replace(IMPORT_REGEX, this.urlExtractor);
|
||||
}
|
||||
};
|
||||
|
||||
Preserver.prototype.extractSrcset = function (srcsets) {
|
||||
AutoFetcher.prototype.extractSrcset = function (srcsets) {
|
||||
if (srcsets == null || srcsets.values == null) return;
|
||||
var srcsetValues = srcsets.values;
|
||||
// was srcsets from rewrite_srcset and if so no need to split
|
||||
@ -175,19 +167,21 @@ Preserver.prototype.extractSrcset = function (srcsets) {
|
||||
this.safeFetch(srcset.split(' ')[0]);
|
||||
} else {
|
||||
// was from extract from local doc so we need to duplicate work
|
||||
var values = srcset.split(srcsetSplit).filter(Boolean);
|
||||
var values = srcset.split(srcsetSplit);
|
||||
for (var j = 0; j < values.length; j++) {
|
||||
var value = values[j].trim();
|
||||
if (value.length > 0) {
|
||||
this.safeFetch(value.split(' ')[0]);
|
||||
if (Boolean(values[j])) {
|
||||
var value = values[j].trim();
|
||||
if (value.length > 0) {
|
||||
this.safeFetch(value.split(' ')[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Preserver.prototype.preserveMediaSrcset = function (data) {
|
||||
// we got a message and now we preserve!
|
||||
AutoFetcher.prototype.autofetchMediaSrcset = function (data) {
|
||||
// we got a message and now we autofetch!
|
||||
// these calls turn into no ops if they have no work
|
||||
this.extractMedia(data.media);
|
||||
this.extractSrcset(data.srcset);
|
||||
@ -197,9 +191,12 @@ Preserver.prototype.preserveMediaSrcset = function (data) {
|
||||
// initialize ourselves from the query params :)
|
||||
try {
|
||||
var loc = new self.URL(location);
|
||||
preserver = new Preserver(loc.searchParams.get('prefix'), loc.searchParams.get('mod'));
|
||||
autofetcher = new AutoFetcher(JSON.parse(loc.searchParams.get('init')));
|
||||
} catch (e) {
|
||||
// likely we are in an older version of safari
|
||||
var search = decodeURIComponent(location.search.split('?')[1]).split('&');
|
||||
preserver = new Preserver(search[0].substr(search[0].indexOf('=') + 1), search[1].substr(search[1].indexOf('=') + 1));
|
||||
var init = JSON.parse(search[0].substr(search[0].indexOf('=') + 1));
|
||||
init.prefix = decodeURIComponent(init.prefix);
|
||||
init.baseURI = decodeURIComponent(init.baseURI);
|
||||
autofetcher = new AutoFetcher(init);
|
||||
}
|
192
pywb/static/autoFetchWorkerProxyMode.js
Normal file
192
pywb/static/autoFetchWorkerProxyMode.js
Normal file
@ -0,0 +1,192 @@
|
||||
'use strict';
|
||||
// thanks wombat
|
||||
var STYLE_REGEX = /(url\s*\(\s*[\\"']*)([^)'"]+)([\\"']*\s*\))/gi;
|
||||
var IMPORT_REGEX = /(@import\s+[\\"']*)([^)'";]+)([\\"']*\s*;?)/gi;
|
||||
var srcsetSplit = /\s*(\S*\s+[\d.]+[wx]),|(?:\s*,(?:\s+|(?=https?:)))/;
|
||||
// the autofetcher instance for this worker
|
||||
var autofetcher = null;
|
||||
|
||||
function noop() {}
|
||||
|
||||
if (typeof self.Promise === 'undefined') {
|
||||
// not kewl we must polyfill Promise
|
||||
self.Promise = function (executor) {
|
||||
executor(noop, noop);
|
||||
};
|
||||
self.Promise.prototype.then = function (cb) {
|
||||
if (cb) cb();
|
||||
return this;
|
||||
};
|
||||
self.Promise.prototype.catch = function () {
|
||||
return this;
|
||||
};
|
||||
self.Promise.all = function (values) {
|
||||
return new Promise(noop);
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
if (typeof self.fetch === 'undefined') {
|
||||
// not kewl we must polyfill fetch.
|
||||
self.fetch = function (url) {
|
||||
return new Promise(function (resolve) {
|
||||
var xhr = new XMLHttpRequest();
|
||||
xhr.open('GET', url);
|
||||
xhr.send();
|
||||
resolve();
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
self.onmessage = function (event) {
|
||||
var data = event.data;
|
||||
switch (data.type) {
|
||||
case 'values':
|
||||
autofetcher.autofetchMediaSrcset(data);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
function AutoFetcher() {
|
||||
if (!(this instanceof AutoFetcher)) {
|
||||
return new AutoFetcher();
|
||||
}
|
||||
// local cache of URLs fetched, to reduce server load
|
||||
this.seen = {};
|
||||
// array of promises returned by fetch(URL)
|
||||
this.fetches = [];
|
||||
// array of URL to be fetched
|
||||
this.queue = [];
|
||||
// should we queue a URL or not
|
||||
this.queuing = false;
|
||||
// a URL to resolve relative URLs found in the cssText of CSSMedia rules.
|
||||
this.currentResolver = null;
|
||||
this.urlExtractor = this.urlExtractor.bind(this);
|
||||
this.fetchDone = this.fetchDone.bind(this);
|
||||
}
|
||||
|
||||
AutoFetcher.prototype.safeFetch = function (url) {
|
||||
// ensure we do not request data urls
|
||||
if (url.indexOf('data:') === 0) return;
|
||||
// check to see if we have seen this url before in order
|
||||
// to lessen the load against the server content is autofetchd from
|
||||
if (this.seen[url] != null) return;
|
||||
this.seen[url] = true;
|
||||
if (this.queuing) {
|
||||
// we are currently waiting for a batch of fetches to complete
|
||||
return this.queue.push(url);
|
||||
}
|
||||
// fetch this url
|
||||
this.fetches.push(fetch(url));
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.safeResolve = function (url, resolver) {
|
||||
// Guard against the exception thrown by the URL constructor if the URL or resolver is bad
|
||||
// if resolver is undefined/null then this function passes url through
|
||||
var resolvedURL = url;
|
||||
if (resolver) {
|
||||
try {
|
||||
resolvedURL = (new URL(url, resolver)).href
|
||||
} catch (e) {
|
||||
resolvedURL = url;
|
||||
}
|
||||
}
|
||||
return resolvedURL;
|
||||
};
|
||||
|
||||
|
||||
AutoFetcher.prototype.urlExtractor = function (match, n1, n2, n3, offset, string) {
|
||||
// Same function as style_replacer in wombat.rewrite_style, n2 is our URL
|
||||
// this.currentResolver is set to the URL which the browser would normally
|
||||
// resolve relative urls with (URL of the stylesheet) in an exceptionless manner
|
||||
// (resolvedURL will be undefined if an error occurred)
|
||||
var resolvedURL = this.safeResolve(n2, this.currentResolver);
|
||||
if (resolvedURL) {
|
||||
this.safeFetch(resolvedURL);
|
||||
}
|
||||
return n1 + n2 + n3;
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.fetchDone = function () {
|
||||
// indicate we no longer need to Q
|
||||
this.queuing = false;
|
||||
if (this.queue.length > 0) {
|
||||
// we have a Q of some length drain it
|
||||
this.drainQ();
|
||||
}
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.fetchAll = function () {
|
||||
// if we are queuing or have no fetches this is a no op
|
||||
if (this.queuing) return;
|
||||
if (this.fetches.length === 0) return;
|
||||
// we are about to fetch queue anything that comes our way
|
||||
this.queuing = true;
|
||||
// initiate fetches by turning the initial fetch promises
|
||||
// into rejctionless promises and "await" all clearing
|
||||
// our fetches array in place
|
||||
var runningFetchers = [];
|
||||
while (this.fetches.length > 0) {
|
||||
runningFetchers.push(this.fetches.shift().catch(noop))
|
||||
}
|
||||
Promise.all(runningFetchers)
|
||||
.then(this.fetchDone)
|
||||
.catch(this.fetchDone);
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.drainQ = function () {
|
||||
// clear our Q in place and fill our fetches array
|
||||
while (this.queue.length > 0) {
|
||||
this.fetches.push(fetch(this.queue.shift()));
|
||||
}
|
||||
// fetch all the things
|
||||
this.fetchAll();
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.extractMedia = function (mediaRules) {
|
||||
// this is a broken down rewrite_style
|
||||
if (mediaRules == null) return;
|
||||
for (var i = 0; i < mediaRules.length; i++) {
|
||||
// set currentResolver to the value of this stylesheets URL, done to ensure we do not have to
|
||||
// create functions on each loop iteration because we potentially create a new `URL` object
|
||||
// twice per iteration
|
||||
this.currentResolver = mediaRules[i].resolve;
|
||||
mediaRules[i].cssText
|
||||
.replace(STYLE_REGEX, this.urlExtractor)
|
||||
.replace(IMPORT_REGEX, this.urlExtractor);
|
||||
}
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.extractSrcset = function (srcsets) {
|
||||
// preservation worker in proxy mode sends us the value of the srcset attribute of an element
|
||||
// and a URL to correctly resolve relative URLS. Thus we must recreate rewrite_srcset logic here
|
||||
if (srcsets == null) return;
|
||||
var length = srcsets.length;
|
||||
var extractedSrcSet, srcsetValue, ssSplit, j;
|
||||
for (var i = 0; i < length; i++) {
|
||||
extractedSrcSet = srcsets[i];
|
||||
ssSplit = extractedSrcSet.srcset.split(srcsetSplit);
|
||||
for (j = 0; j < ssSplit.length; j++) {
|
||||
if (Boolean(ssSplit[j])) {
|
||||
srcsetValue = ssSplit[j].trim();
|
||||
if (srcsetValue.length > 0) {
|
||||
// resolve the URL in an exceptionless manner (resolvedURL will be undefined if an error occurred)
|
||||
var resolvedURL = this.safeResolve(srcsetValue.split(' ')[0], extractedSrcSet.resolve);
|
||||
if (resolvedURL) {
|
||||
this.safeFetch(resolvedURL);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
AutoFetcher.prototype.autofetchMediaSrcset = function (data) {
|
||||
// we got a message and now we autofetch!
|
||||
// these calls turn into no ops if they have no work
|
||||
this.extractMedia(data.media);
|
||||
this.extractSrcset(data.srcset);
|
||||
this.fetchAll();
|
||||
};
|
||||
|
||||
autofetcher = new AutoFetcher();
|
@ -78,9 +78,9 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
var wb_setAttribute = $wbwindow.Element.prototype.setAttribute;
|
||||
var wb_getAttribute = $wbwindow.Element.prototype.getAttribute;
|
||||
var wb_funToString = Function.prototype.toString;
|
||||
var WBPreserWorker;
|
||||
var WBAutoFetchWorker;
|
||||
var wbSheetMediaQChecker;
|
||||
var wbUsePresWorker = $wbwindow.Worker != null && wbinfo.is_live;
|
||||
var wbUseAAWorker = $wbwindow.Worker != null && wbinfo.is_live;
|
||||
|
||||
var wb_info;
|
||||
|
||||
@ -131,6 +131,11 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
'TRACK': {'src': 'oe_'},
|
||||
};
|
||||
|
||||
// pulled up rewrite_style and rewrite_srcset regex's as they are considered globals (uppercase)
|
||||
var STYLE_REGEX = /(url\s*\(\s*[\\"']*)([^)'"]+)([\\"']*\s*\))/gi;
|
||||
var IMPORT_REGEX = /(@import\s+[\\"']*)([^)'";]+)([\\"']*\s*;?)/gi;
|
||||
var SRCSET_REGEX = /\s*(\S*\s+[\d\.]+[wx]),|(?:\s*,(?:\s+|(?=https?:)))/;
|
||||
|
||||
function rwModForElement(elem, attrName) {
|
||||
// this function was created to help add in retrial of element attribute rewrite modifiers
|
||||
if (!elem) {
|
||||
@ -1329,85 +1334,91 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
}
|
||||
|
||||
//============================================
|
||||
function initPreserveWorker() {
|
||||
if (!wbUsePresWorker) {
|
||||
function initAutoFetchWorker() {
|
||||
if (!wbUseAAWorker) {
|
||||
return;
|
||||
}
|
||||
|
||||
var Preserver = (function(Worker) {
|
||||
function PWorker(prefix, mod) {
|
||||
if (!(this instanceof PWorker)) {
|
||||
return new PWorker(prefix, mod);
|
||||
}
|
||||
if ($wbwindow === $wbwindow.__WB_replay_top) {
|
||||
// we are top and can will own this worker
|
||||
// setup URL for the kewl case
|
||||
var isTop = $wbwindow === $wbwindow.__WB_replay_top;
|
||||
|
||||
function AutoFetchWorker(prefix, mod) {
|
||||
if (!(this instanceof AutoFetchWorker)) {
|
||||
return new AutoFetchWorker(prefix, mod);
|
||||
}
|
||||
this.checkIntervalCB = this.checkIntervalCB.bind(this);
|
||||
if (isTop) {
|
||||
// we are top and can will own this worker
|
||||
// setup URL for the kewl case
|
||||
// Normal replay and preservation mode pworker setup, its all one origin so YAY!
|
||||
var workerURL = wbinfo.static_prefix +
|
||||
'wombatPreservationWorker.js?prefix=' +
|
||||
encodeURIComponent(prefix) + '&mod=' +
|
||||
encodeURIComponent(mod);
|
||||
this.worker = new Worker(workerURL);
|
||||
} else {
|
||||
this.worker = null;
|
||||
'autoFetchWorker.js?init='+
|
||||
encodeURIComponent(JSON.stringify({ 'mod': mod, 'prefix': prefix }));
|
||||
this.worker = new $wbwindow.Worker(workerURL);
|
||||
} else {
|
||||
// add only the portions of the worker interface we use since we are not top and if in proxy mode start check polling
|
||||
this.worker = {
|
||||
"postMessage": function (msg) {
|
||||
if (!msg.wb_type) {
|
||||
msg = { 'wb_type': 'aaworker', 'msg': msg };
|
||||
}
|
||||
$wbwindow.__WB_replay_top.__orig_postMessage(msg, '*');
|
||||
},
|
||||
"terminate": function () {}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
AutoFetchWorker.prototype.checkIntervalCB = function () {
|
||||
this.extractFromLocalDoc();
|
||||
};
|
||||
|
||||
AutoFetchWorker.prototype.deferredSheetExtraction = function (sheet) {
|
||||
var rules = sheet.cssRules || sheet.rules;
|
||||
// if no rules this a no op
|
||||
if (!rules || rules.length === 0) return;
|
||||
var self = this;
|
||||
function extract() {
|
||||
// loop through each rule of the stylesheet
|
||||
var media = [];
|
||||
for (var j = 0; j < rules.length; ++j) {
|
||||
var rule = rules[j];
|
||||
if (rule.type === CSSRule.MEDIA_RULE) {
|
||||
// we are a media rule so get its text
|
||||
media.push(rule.cssText);
|
||||
}
|
||||
}
|
||||
if (media.length > 0) {
|
||||
// we have some media rules to preserve
|
||||
self.preserveMedia(media);
|
||||
}
|
||||
}
|
||||
// defer things until next time the Promise.resolve Qs are cleared
|
||||
$wbwindow.Promise.resolve().then(extract);
|
||||
};
|
||||
|
||||
PWorker.prototype.deferredSheetExtraction = function(sheet) {
|
||||
var rules = sheet.cssRules || sheet.rules;
|
||||
// if no rules this a no op
|
||||
if (!rules || rules.length === 0) return;
|
||||
function extract() {
|
||||
// loop through each rule of the stylesheet
|
||||
var media = [];
|
||||
for (var j = 0; j < rules.length; ++j) {
|
||||
var rule = rules[j];
|
||||
if (rule instanceof CSSMediaRule) {
|
||||
// we are a media rule so get its text
|
||||
media.push(rule.cssText);
|
||||
}
|
||||
}
|
||||
if (media.length > 0) {
|
||||
// we have some media rules to preserve
|
||||
WBPreserWorker.preserveMedia(media);
|
||||
}
|
||||
}
|
||||
// defer things until next time the Promise.resolve Qs are cleared
|
||||
$wbwindow.Promise.resolve().then(extract);
|
||||
};
|
||||
AutoFetchWorker.prototype.terminate = function () {
|
||||
// terminate the worker, a no op when not replay top
|
||||
this.worker.terminate();
|
||||
};
|
||||
|
||||
PWorker.prototype.terminate = function() {
|
||||
// terminate the worker, a no op when not replay top
|
||||
if ($wbwindow === $wbwindow.__WB_replay_top) {
|
||||
this.worker.terminate();
|
||||
}
|
||||
};
|
||||
AutoFetchWorker.prototype.postMessage = function (msg) {
|
||||
this.worker.postMessage(msg);
|
||||
};
|
||||
|
||||
PWorker.prototype.postMessage = function(msg) {
|
||||
if ($wbwindow === $wbwindow.__WB_replay_top) {
|
||||
// we are actually replay top so send directly to worker
|
||||
this.worker.postMessage(msg);
|
||||
} else {
|
||||
// send message to replay top
|
||||
$wbwindow.__WB_replay_top.__orig_postMessage({
|
||||
'wb_type': 'pworker', 'msg': msg,
|
||||
}, '*');
|
||||
}
|
||||
};
|
||||
AutoFetchWorker.prototype.preserveSrcset = function (srcset) {
|
||||
// send values from rewrite_srcset to the worker
|
||||
this.postMessage({
|
||||
'type': 'values',
|
||||
'srcset': {'values': srcset, 'presplit': true},
|
||||
});
|
||||
};
|
||||
|
||||
PWorker.prototype.preserveSrcset = function(srcset) {
|
||||
// send values from rewrite_srcset to the worker
|
||||
this.postMessage({
|
||||
'type': 'values',
|
||||
'srcset': {'values': srcset, 'presplit': true},
|
||||
});
|
||||
};
|
||||
AutoFetchWorker.prototype.preserveMedia = function (media) {
|
||||
// send CSSMediaRule values to the worker
|
||||
this.postMessage({'type': 'values', 'media': media})
|
||||
};
|
||||
|
||||
PWorker.prototype.preserveMedia = function(media) {
|
||||
// send CSSMediaRule values to the worker
|
||||
this.postMessage({'type': 'values', 'media': media})
|
||||
};
|
||||
|
||||
PWorker.prototype.extractFromLocalDoc = function() {
|
||||
AutoFetchWorker.prototype.extractFromLocalDoc = function () {
|
||||
// get the values to be preserved from the documents stylesheets
|
||||
// and all elements with a srcset
|
||||
var media = [];
|
||||
@ -1415,20 +1426,19 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
var sheets = $wbwindow.document.styleSheets;
|
||||
var i = 0;
|
||||
for (; i < sheets.length; ++i) {
|
||||
var sheet = sheets[i];
|
||||
var rules = sheet.cssRules;
|
||||
var rules = sheets[i].cssRules;
|
||||
for (var j = 0; j < rules.length; ++j) {
|
||||
var rule = rules[j];
|
||||
if (rule instanceof CSSMediaRule) {
|
||||
if (rule.type === CSSRule.MEDIA_RULE) {
|
||||
media.push(rule.cssText);
|
||||
}
|
||||
}
|
||||
}
|
||||
var srcsetElems = $wbwindow.document.querySelectorAll('*[srcset]');
|
||||
var srcsetElems = $wbwindow.document.querySelectorAll('img[srcset]');
|
||||
for (i = 0; i < srcsetElems.length; i++) {
|
||||
var srcsetElem = srcsetElems[i];
|
||||
if (wb_getAttribute) {
|
||||
srcset.push(wb_getAttribute.call(srcsetElem,'srcset'));
|
||||
srcset.push(wb_getAttribute.call(srcsetElem, 'srcset'));
|
||||
} else {
|
||||
srcset.push(srcsetElem.getAttribute('srcset'));
|
||||
}
|
||||
@ -1440,18 +1450,15 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
});
|
||||
};
|
||||
|
||||
return PWorker;
|
||||
})($wbwindow.Worker);
|
||||
WBAutoFetchWorker = new AutoFetchWorker(wb_abs_prefix, wbinfo.mod);
|
||||
|
||||
WBPreserWorker = new Preserver(wb_abs_prefix, wbinfo.mod);
|
||||
|
||||
wbSheetMediaQChecker = function checkStyle () {
|
||||
wbSheetMediaQChecker = function checkStyle() {
|
||||
// used only for link[rel='stylesheet'] so we remove our listener
|
||||
this.removeEventListener('load', wbSheetMediaQChecker);
|
||||
// check no op condition
|
||||
if (this.sheet == null) return;
|
||||
// defer extraction to be nice :)
|
||||
WBPreserWorker.deferredSheetExtraction(this.sheet);
|
||||
WBAutoFetchWorker.deferredSheetExtraction(this.sheet);
|
||||
};
|
||||
}
|
||||
|
||||
@ -1612,10 +1619,6 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
//============================================
|
||||
function rewrite_style(value)
|
||||
{
|
||||
var STYLE_REGEX = /(url\s*\(\s*[\\"']*)([^)'"]+)([\\"']*\s*\))/gi;
|
||||
|
||||
var IMPORT_REGEX = /(@import\s+[\\"']*)([^)'";]+)([\\"']*\s*;?)/gi;
|
||||
|
||||
function style_replacer(match, n1, n2, n3, offset, string) {
|
||||
return n1 + rewrite_url(n2) + n3;
|
||||
}
|
||||
@ -1645,14 +1648,14 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
}
|
||||
|
||||
// Filter removes non-truthy values like null, undefined, and ""
|
||||
var values = value.split(/\s*(\S*\s+[\d\.]+[wx]),|(?:\s*,(?:\s+|(?=https?:)))/).filter(Boolean);
|
||||
var values = value.split(SRCSET_REGEX).filter(Boolean);
|
||||
|
||||
for (var i = 0; i < values.length; i++) {
|
||||
values[i] = rewrite_url(values[i].trim());
|
||||
}
|
||||
if (wbUsePresWorker) {
|
||||
if (wbUseAAWorker) {
|
||||
// send post split values to preservation worker
|
||||
WBPreserWorker.preserveSrcset(values);
|
||||
WBAutoFetchWorker.preserveSrcset(values);
|
||||
}
|
||||
return values.join(", ");
|
||||
}
|
||||
@ -1756,16 +1759,16 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
if (elem.textContent !== new_content) {
|
||||
elem.textContent = new_content;
|
||||
changed = true;
|
||||
if (wbUsePresWorker && elem.sheet != null) {
|
||||
if (wbUseAAWorker && elem.sheet != null) {
|
||||
// we have a stylesheet so lets be nice to UI thread
|
||||
// and defer extraction
|
||||
WBPreserWorker.deferredSheetExtraction(elem.sheet);
|
||||
WBAutoFetchWorker.deferredSheetExtraction(elem.sheet);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 'LINK':
|
||||
changed = rewrite_attr(elem, 'href');
|
||||
if (wbUsePresWorker && elem.rel === 'stylesheet') {
|
||||
if (wbUseAAWorker && elem.rel === 'stylesheet') {
|
||||
// we can only check link[rel='stylesheet'] when it loads
|
||||
elem.addEventListener('load', wbSheetMediaQChecker);
|
||||
}
|
||||
@ -2194,9 +2197,9 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
}
|
||||
}
|
||||
orig_setter.call(this, res);
|
||||
if (wbUsePresWorker && this.tagName === 'STYLE' && this.sheet != null) {
|
||||
if (wbUseAAWorker && this.tagName === 'STYLE' && this.sheet != null) {
|
||||
// got preserve all the things
|
||||
WBPreserWorker.deferredSheetExtraction(this.sheet);
|
||||
WBAutoFetchWorker.deferredSheetExtraction(this.sheet);
|
||||
}
|
||||
};
|
||||
|
||||
@ -3602,140 +3605,138 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
init_wombat_loc($wbwindow);
|
||||
|
||||
// archival mode: init url-rewriting intercepts
|
||||
if (!wb_is_proxy) {
|
||||
init_wombat_top($wbwindow);
|
||||
init_wombat_top($wbwindow);
|
||||
|
||||
// updated wb_unrewrite_rx for imgur.com
|
||||
var wb_origin = $wbwindow.__WB_replay_top.location.origin;
|
||||
var wb_host = $wbwindow.__WB_replay_top.location.host;
|
||||
var wb_proto = $wbwindow.__WB_replay_top.location.protocol;
|
||||
if (wb_replay_prefix && wb_replay_prefix.indexOf(wb_origin) == 0) {
|
||||
wb_rel_prefix = wb_replay_prefix.substring(wb_origin.length);
|
||||
} else {
|
||||
wb_rel_prefix = wb_replay_prefix;
|
||||
}
|
||||
|
||||
// make the protocol and host optional now
|
||||
var rx = "((" + wb_proto + ")?\/\/" + wb_host + ")?" + wb_rel_prefix + "[^/]+/";
|
||||
wb_unrewrite_rx = new RegExp(rx, "g");
|
||||
|
||||
// History
|
||||
init_history_overrides();
|
||||
|
||||
// Doc Title
|
||||
init_doc_title_override();
|
||||
|
||||
// postMessage
|
||||
// OPT skip
|
||||
if (!wb_opts.skip_postmessage) {
|
||||
init_postmessage_override($wbwindow);
|
||||
init_messageevent_override($wbwindow);
|
||||
}
|
||||
|
||||
initMouseEventOverride($wbwindow);
|
||||
|
||||
init_hash_change();
|
||||
|
||||
// write
|
||||
init_write_override();
|
||||
|
||||
// eval
|
||||
//init_eval_override();
|
||||
|
||||
// Ajax
|
||||
init_ajax_rewrite();
|
||||
|
||||
// Fetch
|
||||
init_fetch_rewrite();
|
||||
init_request_override();
|
||||
|
||||
// Audio
|
||||
init_audio_override();
|
||||
|
||||
// FontFace
|
||||
initFontFaceOverride($wbwindow);
|
||||
|
||||
// Worker override (experimental)
|
||||
initPreserveWorker();
|
||||
init_web_worker_override();
|
||||
init_service_worker_override();
|
||||
initSharedWorkerOverride();
|
||||
|
||||
|
||||
// innerHTML can be overriden on prototype!
|
||||
override_html_assign($wbwindow.HTMLElement, "innerHTML", true);
|
||||
override_html_assign($wbwindow.HTMLElement, "outerHTML", true);
|
||||
override_html_assign($wbwindow.HTMLIFrameElement, "srcdoc", true);
|
||||
override_html_assign($wbwindow.HTMLStyleElement, "textContent");
|
||||
|
||||
// Document.URL override
|
||||
override_prop_extract($wbwindow.Document.prototype, "URL");
|
||||
override_prop_extract($wbwindow.Document.prototype, "documentURI");
|
||||
|
||||
// Node.baseURI override
|
||||
override_prop_extract($wbwindow.Node.prototype, "baseURI");
|
||||
|
||||
// Attr nodeValue and value
|
||||
override_attr_props();
|
||||
|
||||
// init insertAdjacentHTML() override
|
||||
init_insertAdjacentHTML_override();
|
||||
initInsertAdjacentElementOverride();
|
||||
|
||||
|
||||
// iframe.contentWindow and iframe.contentDocument overrides to
|
||||
// ensure wombat is inited on the iframe $wbwindow!
|
||||
override_iframe_content_access("contentWindow");
|
||||
override_iframe_content_access("contentDocument");
|
||||
|
||||
// override funcs to convert first arg proxy->obj
|
||||
override_func_first_arg_proxy_to_obj($wbwindow.MutationObserver, "observe");
|
||||
override_func_first_arg_proxy_to_obj($wbwindow.Node, "compareDocumentPosition");
|
||||
override_func_first_arg_proxy_to_obj($wbwindow.Node, "contains");
|
||||
override_func_first_arg_proxy_to_obj($wbwindow.Document, "createTreeWalker");
|
||||
|
||||
override_func_this_proxy_to_obj($wbwindow, "getComputedStyle", $wbwindow);
|
||||
//override_func_this_proxy_to_obj($wbwindow.EventTarget, "addEventListener");
|
||||
//override_func_this_proxy_to_obj($wbwindow.EventTarget, "removeEventListener");
|
||||
|
||||
override_apply_func($wbwindow);
|
||||
initTimeoutIntervalOverrides($wbwindow, "setTimeout");
|
||||
initTimeoutIntervalOverrides($wbwindow, "setInterval");
|
||||
|
||||
override_frames_access($wbwindow);
|
||||
|
||||
// setAttribute
|
||||
if (!wb_opts.skip_setAttribute) {
|
||||
init_setAttribute_override();
|
||||
init_getAttribute_override();
|
||||
}
|
||||
init_svg_image_overrides();
|
||||
|
||||
// override href and src attrs
|
||||
init_attr_overrides();
|
||||
|
||||
// Cookies
|
||||
init_cookies_override();
|
||||
|
||||
// ensure namespace urls are NOT rewritten
|
||||
init_createElementNS_fix();
|
||||
|
||||
// Image
|
||||
//init_image_override();
|
||||
|
||||
// DOM
|
||||
// OPT skip
|
||||
if (!wb_opts.skip_dom) {
|
||||
init_dom_override();
|
||||
}
|
||||
|
||||
// registerProtocolHandler override
|
||||
init_registerPH_override();
|
||||
|
||||
//sendBeacon override
|
||||
init_beacon_override();
|
||||
// updated wb_unrewrite_rx for imgur.com
|
||||
var wb_origin = $wbwindow.__WB_replay_top.location.origin;
|
||||
var wb_host = $wbwindow.__WB_replay_top.location.host;
|
||||
var wb_proto = $wbwindow.__WB_replay_top.location.protocol;
|
||||
if (wb_replay_prefix && wb_replay_prefix.indexOf(wb_origin) == 0) {
|
||||
wb_rel_prefix = wb_replay_prefix.substring(wb_origin.length);
|
||||
} else {
|
||||
wb_rel_prefix = wb_replay_prefix;
|
||||
}
|
||||
|
||||
// make the protocol and host optional now
|
||||
var rx = "((" + wb_proto + ")?\/\/" + wb_host + ")?" + wb_rel_prefix + "[^/]+/";
|
||||
wb_unrewrite_rx = new RegExp(rx, "g");
|
||||
|
||||
// History
|
||||
init_history_overrides();
|
||||
|
||||
// Doc Title
|
||||
init_doc_title_override();
|
||||
|
||||
// postMessage
|
||||
// OPT skip
|
||||
if (!wb_opts.skip_postmessage) {
|
||||
init_postmessage_override($wbwindow);
|
||||
init_messageevent_override($wbwindow);
|
||||
}
|
||||
|
||||
initMouseEventOverride($wbwindow);
|
||||
|
||||
init_hash_change();
|
||||
|
||||
// write
|
||||
init_write_override();
|
||||
|
||||
// eval
|
||||
//init_eval_override();
|
||||
|
||||
// Ajax
|
||||
init_ajax_rewrite();
|
||||
|
||||
// Fetch
|
||||
init_fetch_rewrite();
|
||||
init_request_override();
|
||||
|
||||
// Audio
|
||||
init_audio_override();
|
||||
|
||||
// FontFace
|
||||
initFontFaceOverride($wbwindow);
|
||||
|
||||
// Worker override (experimental)
|
||||
initAutoFetchWorker();
|
||||
init_web_worker_override();
|
||||
init_service_worker_override();
|
||||
initSharedWorkerOverride();
|
||||
|
||||
|
||||
// innerHTML can be overriden on prototype!
|
||||
override_html_assign($wbwindow.HTMLElement, "innerHTML", true);
|
||||
override_html_assign($wbwindow.HTMLElement, "outerHTML", true);
|
||||
override_html_assign($wbwindow.HTMLIFrameElement, "srcdoc", true);
|
||||
override_html_assign($wbwindow.HTMLStyleElement, "textContent");
|
||||
|
||||
// Document.URL override
|
||||
override_prop_extract($wbwindow.Document.prototype, "URL");
|
||||
override_prop_extract($wbwindow.Document.prototype, "documentURI");
|
||||
|
||||
// Node.baseURI override
|
||||
override_prop_extract($wbwindow.Node.prototype, "baseURI");
|
||||
|
||||
// Attr nodeValue and value
|
||||
override_attr_props();
|
||||
|
||||
// init insertAdjacentHTML() override
|
||||
init_insertAdjacentHTML_override();
|
||||
initInsertAdjacentElementOverride();
|
||||
|
||||
|
||||
// iframe.contentWindow and iframe.contentDocument overrides to
|
||||
// ensure wombat is inited on the iframe $wbwindow!
|
||||
override_iframe_content_access("contentWindow");
|
||||
override_iframe_content_access("contentDocument");
|
||||
|
||||
// override funcs to convert first arg proxy->obj
|
||||
override_func_first_arg_proxy_to_obj($wbwindow.MutationObserver, "observe");
|
||||
override_func_first_arg_proxy_to_obj($wbwindow.Node, "compareDocumentPosition");
|
||||
override_func_first_arg_proxy_to_obj($wbwindow.Node, "contains");
|
||||
override_func_first_arg_proxy_to_obj($wbwindow.Document, "createTreeWalker");
|
||||
|
||||
|
||||
override_func_this_proxy_to_obj($wbwindow, "getComputedStyle", $wbwindow);
|
||||
//override_func_this_proxy_to_obj($wbwindow.EventTarget, "addEventListener");
|
||||
//override_func_this_proxy_to_obj($wbwindow.EventTarget, "removeEventListener");
|
||||
|
||||
override_apply_func($wbwindow);
|
||||
initTimeoutIntervalOverrides($wbwindow, "setTimeout");
|
||||
initTimeoutIntervalOverrides($wbwindow, "setInterval");
|
||||
|
||||
override_frames_access($wbwindow);
|
||||
|
||||
// setAttribute
|
||||
if (!wb_opts.skip_setAttribute) {
|
||||
init_setAttribute_override();
|
||||
init_getAttribute_override();
|
||||
}
|
||||
init_svg_image_overrides();
|
||||
|
||||
// override href and src attrs
|
||||
init_attr_overrides();
|
||||
|
||||
// Cookies
|
||||
init_cookies_override();
|
||||
|
||||
// ensure namespace urls are NOT rewritten
|
||||
init_createElementNS_fix();
|
||||
|
||||
// Image
|
||||
//init_image_override();
|
||||
|
||||
// DOM
|
||||
// OPT skip
|
||||
if (!wb_opts.skip_dom) {
|
||||
init_dom_override();
|
||||
}
|
||||
|
||||
// registerProtocolHandler override
|
||||
init_registerPH_override();
|
||||
|
||||
//sendBeacon override
|
||||
init_beacon_override();
|
||||
// other overrides
|
||||
// proxy mode: only using these overrides
|
||||
|
||||
@ -3765,13 +3766,13 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
init_document_obj_proxy($wbwindow.document);
|
||||
|
||||
// expose functions
|
||||
var obj = {}
|
||||
var obj = {};
|
||||
obj.extract_orig = extract_orig;
|
||||
obj.rewrite_url = rewrite_url;
|
||||
obj.watch_elem = watch_elem;
|
||||
obj.init_new_window_wombat = init_new_window_wombat;
|
||||
obj.init_paths = init_paths;
|
||||
obj.local_init = function(name) {
|
||||
obj.local_init = function (name) {
|
||||
var res = $wbwindow._WB_wombat_obj_proxy[name];
|
||||
if (name === "document" && res && !res._WB_wombat_obj_proxy) {
|
||||
return init_document_obj_proxy(res) || res;
|
||||
@ -3812,8 +3813,8 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
return;
|
||||
}
|
||||
|
||||
if ($wbwindow.document.readyState === "complete" && wbUsePresWorker) {
|
||||
WBPreserWorker.extractFromLocalDoc();
|
||||
if ($wbwindow.document.readyState === "complete" && wbUseAAWorker) {
|
||||
WBAutoFetchWorker.extractFromLocalDoc();
|
||||
}
|
||||
|
||||
if ($wbwindow != $wbwindow.__WB_replay_top) {
|
||||
@ -3925,10 +3926,10 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
|
||||
// Fix .parent only if not embeddable, otherwise leave for accessing embedding window
|
||||
if (!wb_opts.embedded && (replay_top == $wbwindow)) {
|
||||
if (wbUsePresWorker) {
|
||||
if (wbUseAAWorker) {
|
||||
$wbwindow.addEventListener("message", function(event) {
|
||||
if (event.data && event.data.wb_type === 'pworker') {
|
||||
WBPreserWorker.postMessage(event.data.msg);
|
||||
if (event.data && event.data.wb_type === 'aaworker') {
|
||||
WBAutoFetchWorker.postMessage(event.data.msg);
|
||||
}
|
||||
}, false);
|
||||
}
|
||||
@ -3982,8 +3983,6 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// Utility functions used by rewriting rules
|
||||
function watch_elem(elem, func)
|
||||
{
|
||||
|
376
pywb/static/wombatProxyMode.js
Normal file
376
pywb/static/wombatProxyMode.js
Normal file
@ -0,0 +1,376 @@
|
||||
/*
|
||||
Copyright(c) 2013-2018 Rhizome and Ilya Kreymer. Released under the GNU General Public License.
|
||||
|
||||
This file is part of pywb, https://github.com/webrecorder/pywb
|
||||
|
||||
pywb is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
pywb is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with pywb. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
//============================================
|
||||
// Wombat JS-Rewriting Library v2.53
|
||||
//============================================
|
||||
|
||||
// Wombat lite for proxy-mode
|
||||
var _WBWombat = function ($wbwindow, wbinfo) {
|
||||
// Globals
|
||||
var wb_info = wbinfo;
|
||||
wb_info.top_host = wb_info.top_host || "*";
|
||||
wbinfo.wombat_opts = wbinfo.wombat_opts || {};
|
||||
var wbAutoFetchWorkerPrefix = (wb_info.auto_fetch_worker_prefix || wb_info.static_prefix) + 'autoFetchWorkerProxyMode.js';
|
||||
var WBAutoFetchWorker;
|
||||
|
||||
function init_seeded_random(seed) {
|
||||
// Adapted from:
|
||||
// http://indiegamr.com/generate-repeatable-random-numbers-in-js/
|
||||
|
||||
$wbwindow.Math.seed = parseInt(seed);
|
||||
|
||||
function seeded_random() {
|
||||
$wbwindow.Math.seed = ($wbwindow.Math.seed * 9301 + 49297) % 233280;
|
||||
var rnd = $wbwindow.Math.seed / 233280;
|
||||
|
||||
return rnd;
|
||||
}
|
||||
|
||||
$wbwindow.Math.random = seeded_random;
|
||||
}
|
||||
|
||||
function init_crypto_random() {
|
||||
if (!$wbwindow.crypto || !$wbwindow.Crypto) {
|
||||
return;
|
||||
}
|
||||
|
||||
var orig_getrandom = $wbwindow.Crypto.prototype.getRandomValues;
|
||||
|
||||
var new_getrandom = function (array) {
|
||||
for (var i = 0; i < array.length; i++) {
|
||||
array[i] = parseInt($wbwindow.Math.random() * 4294967296);
|
||||
}
|
||||
return array;
|
||||
};
|
||||
|
||||
$wbwindow.Crypto.prototype.getRandomValues = new_getrandom;
|
||||
$wbwindow.crypto.getRandomValues = new_getrandom;
|
||||
}
|
||||
|
||||
//============================================
|
||||
function init_fixed_ratio() {
|
||||
// otherwise, just set it
|
||||
$wbwindow.devicePixelRatio = 1;
|
||||
|
||||
// prevent changing, if possible
|
||||
if (Object.defineProperty) {
|
||||
try {
|
||||
// fixed pix ratio
|
||||
Object.defineProperty($wbwindow, "devicePixelRatio", {value: 1, writable: false});
|
||||
} catch (e) {}
|
||||
}
|
||||
}
|
||||
|
||||
//========================================
|
||||
function init_date_override(timestamp) {
|
||||
timestamp = parseInt(timestamp) * 1000;
|
||||
//var timezone = new Date().getTimezoneOffset() * 60 * 1000;
|
||||
// Already UTC!
|
||||
var timezone = 0;
|
||||
var start_now = $wbwindow.Date.now();
|
||||
var timediff = start_now - (timestamp - timezone);
|
||||
|
||||
if ($wbwindow.__wb_Date_now) {
|
||||
return;
|
||||
}
|
||||
|
||||
var orig_date = $wbwindow.Date;
|
||||
|
||||
var orig_utc = $wbwindow.Date.UTC;
|
||||
var orig_parse = $wbwindow.Date.parse;
|
||||
var orig_now = $wbwindow.Date.now;
|
||||
|
||||
$wbwindow.__wb_Date_now = orig_now;
|
||||
|
||||
$wbwindow.Date = function (Date) {
|
||||
return function (A, B, C, D, E, F, G) {
|
||||
// Apply doesn't work for constructors and Date doesn't
|
||||
// seem to like undefined args, so must explicitly
|
||||
// call constructor for each possible args 0..7
|
||||
if (A === undefined) {
|
||||
return new Date(orig_now() - timediff);
|
||||
} else if (B === undefined) {
|
||||
return new Date(A);
|
||||
} else if (C === undefined) {
|
||||
return new Date(A, B);
|
||||
} else if (D === undefined) {
|
||||
return new Date(A, B, C);
|
||||
} else if (E === undefined) {
|
||||
return new Date(A, B, C, D);
|
||||
} else if (F === undefined) {
|
||||
return new Date(A, B, C, D, E);
|
||||
} else if (G === undefined) {
|
||||
return new Date(A, B, C, D, E, F);
|
||||
} else {
|
||||
return new Date(A, B, C, D, E, F, G);
|
||||
}
|
||||
}
|
||||
}($wbwindow.Date);
|
||||
|
||||
$wbwindow.Date.prototype = orig_date.prototype;
|
||||
|
||||
$wbwindow.Date.now = function () {
|
||||
return orig_now() - timediff;
|
||||
};
|
||||
|
||||
$wbwindow.Date.UTC = orig_utc;
|
||||
$wbwindow.Date.parse = orig_parse;
|
||||
|
||||
$wbwindow.Date.__WB_timediff = timediff;
|
||||
|
||||
Object.defineProperty($wbwindow.Date.prototype, "constructor", {value: $wbwindow.Date});
|
||||
}
|
||||
|
||||
//============================================
|
||||
function init_disable_notifications() {
|
||||
if (window.Notification) {
|
||||
window.Notification.requestPermission = function (callback) {
|
||||
if (callback) {
|
||||
callback("denied");
|
||||
}
|
||||
|
||||
return Promise.resolve("denied");
|
||||
};
|
||||
}
|
||||
|
||||
if (window.geolocation) {
|
||||
var disabled = function (success, error, options) {
|
||||
if (error) {
|
||||
error({"code": 2, "message": "not available"});
|
||||
}
|
||||
};
|
||||
|
||||
window.geolocation.getCurrentPosition = disabled;
|
||||
window.geolocation.watchPosition = disabled;
|
||||
}
|
||||
}
|
||||
|
||||
function initAutoFetchWorker() {
|
||||
if (!$wbwindow.Worker) {
|
||||
return;
|
||||
}
|
||||
|
||||
var isTop = $wbwindow.self === $wbwindow.top;
|
||||
|
||||
function AutoFetchWorker() {
|
||||
if (!(this instanceof AutoFetchWorker)) {
|
||||
return new AutoFetchWorker();
|
||||
}
|
||||
this.checkIntervalTime = 15000;
|
||||
this.checkIntervalCB = this.checkIntervalCB.bind(this);
|
||||
if (isTop) {
|
||||
// Cannot directly load our worker from the proxy origin into the current origin
|
||||
// however we fetch it from proxy origin and can blob it into the current origin :)
|
||||
var self = this;
|
||||
fetch(wbAutoFetchWorkerPrefix)
|
||||
.then(function (res) {
|
||||
return res.text().then(function (text) {
|
||||
var blob = new Blob([text], {"type": "text/javascript"});
|
||||
self.worker = new $wbwindow.Worker(URL.createObjectURL(blob));
|
||||
// use our origins reference to the document in order for us to parse stylesheets :/
|
||||
self.styleTag = document.createElement('style');
|
||||
self.styleTag.id = '$wrStyleParser$';
|
||||
document.documentElement.appendChild(self.styleTag);
|
||||
self.startCheckingInterval();
|
||||
});
|
||||
});
|
||||
} else {
|
||||
// add only the portions of the worker interface we use since we are not top and if in proxy mode start check polling
|
||||
this.worker = {
|
||||
"postMessage": function (msg) {
|
||||
if (!msg.wb_type) {
|
||||
msg = {'wb_type': 'aaworker', 'msg': msg};
|
||||
}
|
||||
$wbwindow.top.postMessage(msg, '*');
|
||||
},
|
||||
"terminate": function () {}
|
||||
};
|
||||
this.startCheckingInterval();
|
||||
}
|
||||
}
|
||||
|
||||
AutoFetchWorker.prototype.startCheckingInterval = function () {
|
||||
// if document ready state is complete do first extraction and start check polling
|
||||
// otherwise wait for document ready state to complete to extract and start check polling
|
||||
var self = this;
|
||||
if ($wbwindow.document.readyState === "complete") {
|
||||
this.extractFromLocalDoc();
|
||||
setInterval(this.checkIntervalCB, this.checkIntervalTime);
|
||||
} else {
|
||||
var i = setInterval(function () {
|
||||
if ($wbwindow.document.readyState === "complete") {
|
||||
self.extractFromLocalDoc();
|
||||
clearInterval(i);
|
||||
setInterval(self.checkIntervalCB, self.checkIntervalTime);
|
||||
}
|
||||
}, 1000);
|
||||
}
|
||||
};
|
||||
|
||||
AutoFetchWorker.prototype.checkIntervalCB = function () {
|
||||
this.extractFromLocalDoc();
|
||||
};
|
||||
|
||||
AutoFetchWorker.prototype.terminate = function () {
|
||||
// terminate the worker, a no op when not replay top
|
||||
this.worker.terminate();
|
||||
};
|
||||
|
||||
AutoFetchWorker.prototype.postMessage = function (msg) {
|
||||
this.worker.postMessage(msg);
|
||||
};
|
||||
|
||||
AutoFetchWorker.prototype.extractMediaRules = function (rules, href) {
|
||||
// We are in proxy mode and must include a URL to resolve relative URLs in media rules
|
||||
if (!rules) return [];
|
||||
var rvlen = rules.length;
|
||||
var text = [];
|
||||
var rule;
|
||||
for (var i = 0; i < rvlen; ++i) {
|
||||
rule = rules[i];
|
||||
if (rule.type === CSSRule.MEDIA_RULE) {
|
||||
text.push({"cssText": rule.cssText, "resolve": href});
|
||||
}
|
||||
}
|
||||
return text;
|
||||
};
|
||||
|
||||
AutoFetchWorker.prototype.corsCSSFetch = function (href) {
|
||||
// because this JS in proxy mode operates as it would on the live web
|
||||
// the rules of CORS apply and we cannot rely on URLs being rewritten correctly
|
||||
// fetch the cross origin css file and then parse it using a style tag to get the rules
|
||||
var url = location.protocol + '//' + wb_info.proxy_magic + '/proxy-fetch/' + href;
|
||||
var aaw = this;
|
||||
return fetch(url).then(function (res) {
|
||||
return res.text().then(function (text) {
|
||||
aaw.styleTag.textContent = text;
|
||||
var sheet = aaw.styleTag.sheet || {};
|
||||
return aaw.extractMediaRules(sheet.cssRules || sheet.rules, href);
|
||||
});
|
||||
}).catch(function (error) {
|
||||
return [];
|
||||
});
|
||||
};
|
||||
|
||||
AutoFetchWorker.prototype.shouldSkipSheet = function (sheet) {
|
||||
// we skip extracting rules from sheets if they are from our parsing style or come from pywb
|
||||
if (sheet.id === '$wrStyleParser$') return true;
|
||||
return !!(sheet.href && sheet.href.indexOf(wb_info.proxy_magic) !== -1);
|
||||
};
|
||||
|
||||
AutoFetchWorker.prototype.extractFromLocalDoc = function () {
|
||||
var i = 0;
|
||||
var media = [];
|
||||
var deferredMediaURLS = [];
|
||||
var srcset = [];
|
||||
var sheet;
|
||||
var resolve;
|
||||
// We must use the window reference passed to us to access this origins stylesheets
|
||||
var styleSheets = $wbwindow.document.styleSheets;
|
||||
for (; i < styleSheets.length; ++i) {
|
||||
sheet = styleSheets[i];
|
||||
// if the sheet belongs to our parser node we must skip it
|
||||
if (!this.shouldSkipSheet(sheet)) {
|
||||
try {
|
||||
// if no error is thrown due to cross origin sheet the urls then just add
|
||||
// the resolved URLS if any to the media urls array
|
||||
if (sheet.cssRules != null) {
|
||||
resolve = sheet.href || $wbwindow.document.baseURI;
|
||||
media = media.concat(this.extractMediaRules(sheet.cssRules, resolve));
|
||||
} else if (sheet.href != null) {
|
||||
// depending on the browser cross origin stylesheets will have their
|
||||
// cssRules property null but href non-null
|
||||
deferredMediaURLS.push(this.corsCSSFetch(sheet.href));
|
||||
}
|
||||
} catch (error) {
|
||||
// the stylesheet is cross origin and we must re-fetch via PYWB to get the contents for checking
|
||||
deferredMediaURLS.push(this.corsCSSFetch(sheet.href));
|
||||
}
|
||||
}
|
||||
}
|
||||
// We must use the window reference passed to us to access this origins elements with srcset attr
|
||||
// like cssRule handling we must include a URL to resolve relative URLs by
|
||||
var srcsetElems = $wbwindow.document.querySelectorAll('img[srcset]');
|
||||
var ssElem, resolveAgainst;
|
||||
for (i = 0; i < srcsetElems.length; i++) {
|
||||
ssElem = srcsetElems[i];
|
||||
resolveAgainst = ssElem.src != null && ssElem.src !== ' ' ? ssElem.src : $wbwindow.document.baseURI;
|
||||
srcset.push({'srcset': ssElem.srcset, 'resolve': resolveAgainst});
|
||||
}
|
||||
|
||||
// send what we have extracted, if anything, to the worker for processing
|
||||
if (media.length > 0 || srcset.length > 0) {
|
||||
this.postMessage({'type': 'values', 'media': media, 'srcset': srcset});
|
||||
}
|
||||
|
||||
if (deferredMediaURLS.length > 0) {
|
||||
// wait for all our deferred fetching and extraction of cross origin
|
||||
// stylesheets to complete and then send those values, if any, to the worker
|
||||
var aaw = this;
|
||||
Promise.all(deferredMediaURLS).then(function (values) {
|
||||
var results = [];
|
||||
while (values.length > 0) {
|
||||
results = results.concat(values.shift());
|
||||
}
|
||||
if (results.length > 0) {
|
||||
aaw.postMessage({'type': 'values', 'media': results});
|
||||
}
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
WBAutoFetchWorker = new AutoFetchWorker();
|
||||
|
||||
if (isTop) {
|
||||
$wbwindow.addEventListener("message", function (event) {
|
||||
if (event.data && event.data.wb_type === 'aaworker') {
|
||||
WBAutoFetchWorker.postMessage(event.data.msg);
|
||||
}
|
||||
}, false);
|
||||
}
|
||||
}
|
||||
|
||||
if (wbinfo.use_auto_fetch_worker && wbinfo.is_live) {
|
||||
initAutoFetchWorker();
|
||||
}
|
||||
|
||||
if (wbinfo.use_wombat) {
|
||||
// proxy mode overrides
|
||||
// Random
|
||||
init_seeded_random(wbinfo.wombat_sec);
|
||||
|
||||
// Crypto Random
|
||||
init_crypto_random();
|
||||
|
||||
// set fixed pixel ratio
|
||||
init_fixed_ratio();
|
||||
|
||||
// Date
|
||||
init_date_override(wbinfo.wombat_sec);
|
||||
|
||||
// disable notifications
|
||||
init_disable_notifications();
|
||||
}
|
||||
|
||||
return {};
|
||||
};
|
||||
|
||||
window._WBWombat = _WBWombat;
|
||||
|
@ -1,9 +1,9 @@
|
||||
<!-- WB Insert -->
|
||||
<script>
|
||||
{% set urlsplit = cdx.url | urlsplit %}
|
||||
wbinfo = {}
|
||||
wbinfo = {};
|
||||
wbinfo.top_url = "{{ top_url }}";
|
||||
{% if is_framed == 'true' %}
|
||||
{% if is_framed %}
|
||||
// Fast Top-Frame Redirect
|
||||
if (window == window.top && wbinfo.top_url) {
|
||||
var loc = window.location.href.replace(window.location.hash, "");
|
||||
@ -19,15 +19,23 @@
|
||||
wbinfo.request_ts = "{{ wb_url.timestamp }}";
|
||||
wbinfo.prefix = decodeURI("{{ wb_prefix }}");
|
||||
wbinfo.mod = "{{ replay_mod }}";
|
||||
wbinfo.is_framed = {{ is_framed }};
|
||||
wbinfo.is_live = {{ is_live }};
|
||||
wbinfo.is_framed = {{ is_framed | tobool }};
|
||||
wbinfo.is_live = {{ is_live | tobool }};
|
||||
wbinfo.coll = "{{ coll }}";
|
||||
wbinfo.proxy_magic = "{{ env.pywb_proxy_magic }}";
|
||||
wbinfo.static_prefix = "{{ static_prefix }}/";
|
||||
{% if env.pywb_proxy_magic %}
|
||||
wbinfo.use_auto_fetch_worker = {{ config.proxy.use_auto_fetch_worker | tobool }};
|
||||
wbinfo.use_wombat = {{ config.proxy.use_wombat | tobool }} || wbinfo.use_auto_fetch_worker;
|
||||
{% endif %}
|
||||
</script>
|
||||
|
||||
{% if not wb_url.is_banner_only %}
|
||||
<script src='{{ static_prefix }}/wombat.js'> </script>
|
||||
{% if env.pywb_proxy_magic %}
|
||||
{% set whichWombat = 'wombatProxyMode.js' %}
|
||||
{% else %}
|
||||
{% set whichWombat = 'wombat.js' %}
|
||||
{% endif %}
|
||||
{% if not wb_url.is_banner_only or (env.pywb_proxy_magic and (config.proxy.use_auto_fetch_worker or config.proxy.use_wombat)) %}
|
||||
<script src='{{ static_prefix }}/{{ whichWombat }}'> </script>
|
||||
<script>
|
||||
wbinfo.wombat_ts = "{{ wombat_ts }}";
|
||||
wbinfo.wombat_sec = "{{ wombat_sec }}";
|
||||
|
1
setup.py
1
setup.py
@ -113,6 +113,7 @@ setup(
|
||||
'urllib3',
|
||||
'werkzeug',
|
||||
'httpbin==0.5.0',
|
||||
'ujson'
|
||||
],
|
||||
cmdclass={'test': PyTest},
|
||||
test_suite='',
|
||||
|
@ -312,7 +312,7 @@ class TestManagedColls(CollsDirMixin, BaseConfigTest):
|
||||
assert resp.status_int == 200
|
||||
assert resp.content_type == 'text/html'
|
||||
assert 'overriden search page: ' in resp.text
|
||||
assert '"some": "value"' in resp.text
|
||||
assert '"some":"value"' in resp.text
|
||||
|
||||
def test_more_custom_templates_replay(self, fmod):
|
||||
resp = self.get('/test/20140103030321{0}/http://example.com/?example=1', fmod)
|
||||
|
@ -19,7 +19,9 @@ class TestProxyCLIConfig(CollsDirMixin, BaseTestClass):
|
||||
exp = {'ca_file_cache': os.path.join('proxy-certs', 'pywb-ca.pem'),
|
||||
'ca_name': 'pywb HTTPS Proxy CA',
|
||||
'coll': 'test',
|
||||
'recording': False}
|
||||
'recording': False,
|
||||
'use_wombat': False,
|
||||
'use_auto_fetch_worker': False}
|
||||
assert res.extra_config['proxy'] == exp
|
||||
|
||||
def test_proxy_cli_rec(self):
|
||||
|
@ -66,8 +66,9 @@ class TestProxy(BaseTestProxy):
|
||||
# wb insert
|
||||
assert 'WB Insert' in res.text
|
||||
|
||||
# no wombat.js
|
||||
# no wombat.js and wombatProxyMode.js
|
||||
assert 'wombat.js' not in res.text
|
||||
assert 'wombatProxyMode.js' not in res.text
|
||||
|
||||
# no redirect check
|
||||
assert 'window == window.top' not in res.text
|
||||
@ -85,8 +86,9 @@ class TestProxy(BaseTestProxy):
|
||||
assert 'WB Insert' in res.text
|
||||
assert 'Example Domain' in res.text
|
||||
|
||||
# no wombat.js
|
||||
# no wombat.js and wombatProxyMode.js
|
||||
assert 'wombat.js' not in res.text
|
||||
assert 'wombatProxyMode.js' not in res.text
|
||||
|
||||
# banner
|
||||
assert 'default_banner.js' in res.text
|
||||
@ -167,8 +169,9 @@ class TestProxyNoBanner(BaseTestProxy):
|
||||
# no banner
|
||||
assert 'default_banner.js' not in res.text
|
||||
|
||||
# no wombat.js
|
||||
# no wombat.js and wombatProxyMode.js
|
||||
assert 'wombat.js' not in res.text
|
||||
assert 'wombatProxyMode.js' not in res.text
|
||||
|
||||
# no redirect check
|
||||
assert 'window == window.top' not in res.text
|
||||
@ -197,8 +200,9 @@ class TestProxyNoHeadInsert(BaseTestProxy):
|
||||
# no banner
|
||||
assert 'default_banner.js' not in res.text
|
||||
|
||||
# no wombat.js
|
||||
# no wombat.js and wombatProxyMode.js
|
||||
assert 'wombat.js' not in res.text
|
||||
assert 'wombatProxyMode.js' not in res.text
|
||||
|
||||
# no redirect check
|
||||
assert 'window == window.top' not in res.text
|
||||
@ -207,3 +211,138 @@ class TestProxyNoHeadInsert(BaseTestProxy):
|
||||
assert res.headers['Memento-Datetime'] == 'Mon, 27 Jan 2014 17:12:51 GMT'
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class TestProxyIncludeBothWombatAutoFetchWorker(BaseTestProxy):
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
super(TestProxyIncludeBothWombatAutoFetchWorker, cls).setup_class(
|
||||
extra_opts={'use_wombat': True, 'use_auto_fetch_worker': True}
|
||||
)
|
||||
|
||||
def test_include_both_wombat_auto_fetch_worker(self, scheme):
|
||||
res = requests.get('{0}://example.com/'.format(scheme),
|
||||
proxies=self.proxies,
|
||||
verify=self.root_ca_file)
|
||||
|
||||
# content
|
||||
assert 'Example Domain' in res.text
|
||||
|
||||
# yes head insert
|
||||
assert 'WB Insert' in res.text
|
||||
|
||||
# no wombat.js, yes wombatProxyMode.js
|
||||
assert 'wombat.js' not in res.text
|
||||
assert 'wombatProxyMode.js' in res.text
|
||||
assert 'wbinfo.use_wombat = true || wbinfo.use_auto_fetch_worker;' in res.text
|
||||
assert 'wbinfo.use_auto_fetch_worker = true;' in res.text
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class TestProxyIncludeWombatNotAutoFetchWorker(BaseTestProxy):
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
super(TestProxyIncludeWombatNotAutoFetchWorker, cls).setup_class(
|
||||
extra_opts={'use_wombat': True, 'use_auto_fetch': False}
|
||||
)
|
||||
|
||||
def test_include_wombat_not_auto_fetch_worker(self, scheme):
|
||||
res = requests.get('{0}://example.com/'.format(scheme),
|
||||
proxies=self.proxies,
|
||||
verify=self.root_ca_file)
|
||||
|
||||
# content
|
||||
assert 'Example Domain' in res.text
|
||||
|
||||
# yes head insert
|
||||
assert 'WB Insert' in res.text
|
||||
|
||||
# no wombat.js, yes wombatProxyMode.js
|
||||
assert 'wombat.js' not in res.text
|
||||
assert 'wombatProxyMode.js' in res.text
|
||||
assert 'wbinfo.use_wombat = true || wbinfo.use_auto_fetch_worker;' in res.text
|
||||
assert 'wbinfo.use_auto_fetch_worker = false;' in res.text
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class TestProxyIncludeAutoFetchWorkerNotWombat(BaseTestProxy):
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
super(TestProxyIncludeAutoFetchWorkerNotWombat, cls).setup_class(
|
||||
extra_opts={'use_wombat': False, 'use_auto_fetch': True}
|
||||
)
|
||||
|
||||
def test_include_auto_fetch_worker_not_wombat(self, scheme):
|
||||
res = requests.get('{0}://example.com/'.format(scheme),
|
||||
proxies=self.proxies,
|
||||
verify=self.root_ca_file)
|
||||
|
||||
# content
|
||||
assert 'Example Domain' in res.text
|
||||
|
||||
# yes head insert
|
||||
assert 'WB Insert' in res.text
|
||||
|
||||
# no wombat.js, no wombatProxyMode.js
|
||||
# auto fetch worker requires wombat
|
||||
assert 'wombat.js' not in res.text
|
||||
assert 'wombatProxyMode.js' not in res.text
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class TestProxyAutoFetchWorkerEndPoints(BaseTestProxy):
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
super(TestProxyAutoFetchWorkerEndPoints, cls).setup_class(
|
||||
extra_opts={'use_wombat': True, 'use_auto_fetch': True}
|
||||
)
|
||||
|
||||
def test_proxy_fetch_options_request(self, scheme):
|
||||
expected_origin = '{0}://example.com'.format(scheme)
|
||||
res = requests.options('{0}://pywb.proxy/proxy-fetch/{1}'.format(scheme, expected_origin),
|
||||
headers=dict(Origin=expected_origin),
|
||||
proxies=self.proxies, verify=self.root_ca_file)
|
||||
|
||||
assert res.ok
|
||||
assert res.headers.get('Access-Control-Allow-Origin') == expected_origin
|
||||
|
||||
def test_proxy_fetch(self, scheme):
|
||||
expected_origin = '{0}://example.com'.format(scheme)
|
||||
res = requests.get('{0}://pywb.proxy/proxy-fetch/{1}'.format(scheme, expected_origin),
|
||||
headers=dict(Origin='{0}://example.com'.format(scheme)),
|
||||
proxies=self.proxies, verify=self.root_ca_file)
|
||||
assert res.ok
|
||||
assert 'Example Domain' in res.text
|
||||
|
||||
res = requests.get('{0}://pywb.proxy/proxy-fetch/{1}'.format(scheme, expected_origin),
|
||||
proxies=self.proxies, verify=self.root_ca_file)
|
||||
|
||||
assert res.ok
|
||||
assert 'Example Domain' in res.text
|
||||
|
||||
def test_proxy_worker_options_request(self, scheme):
|
||||
expected_origin = '{0}://example.com'.format(scheme)
|
||||
res = requests.options('{0}://pywb.proxy/static/autoFetchWorkerProxyMode.js'.format(scheme),
|
||||
headers=dict(Origin=expected_origin),
|
||||
proxies=self.proxies, verify=self.root_ca_file)
|
||||
|
||||
assert res.ok
|
||||
assert res.headers.get('Access-Control-Allow-Origin') == expected_origin
|
||||
|
||||
def test_proxy_worker_fetch(self, scheme):
|
||||
origin = '{0}://example.com'.format(scheme)
|
||||
url = '{0}://pywb.proxy/static/autoFetchWorkerProxyMode.js'.format(scheme)
|
||||
res = requests.get(url,
|
||||
headers=dict(Origin=origin),
|
||||
proxies=self.proxies, verify=self.root_ca_file)
|
||||
|
||||
assert res.ok
|
||||
assert res.headers.get('Content-Type') == 'application/javascript'
|
||||
assert res.headers.get('Access-Control-Allow-Origin') == origin
|
||||
assert 'AutoFetcher.prototype.safeResolve' in res.text
|
||||
|
||||
res = requests.get(url, proxies=self.proxies, verify=self.root_ca_file)
|
||||
|
||||
assert res.ok
|
||||
assert res.headers.get('Content-Type') == 'application/javascript'
|
||||
assert res.headers.get('Access-Control-Allow-Origin') == '*'
|
||||
assert 'AutoFetcher.prototype.safeResolve' in res.text
|
||||
|
Loading…
x
Reference in New Issue
Block a user