2017-03-12 12:21:54 -07:00
|
|
|
from gevent.monkey import patch_all; patch_all()
|
2015-03-23 09:08:09 -07:00
|
|
|
from argparse import ArgumentParser
|
|
|
|
|
2017-03-12 12:21:54 -07:00
|
|
|
import logging
|
2015-03-23 09:08:09 -07:00
|
|
|
|
|
|
|
|
2017-03-12 12:21:54 -07:00
|
|
|
#=============================================================================
|
|
|
|
def webagg(args=None):
|
2017-05-23 19:08:29 -07:00
|
|
|
WarcServerCli(args=args,
|
2017-03-12 12:21:54 -07:00
|
|
|
default_port=8070,
|
|
|
|
desc='pywb Web Aggregator Server').run()
|
2015-03-22 23:03:39 -07:00
|
|
|
|
|
|
|
|
2017-03-12 12:21:54 -07:00
|
|
|
#=============================================================================
|
2015-03-22 21:50:56 -07:00
|
|
|
def wayback(args=None):
|
2015-04-03 10:13:27 -07:00
|
|
|
WaybackCli(args=args,
|
|
|
|
default_port=8080,
|
2017-03-12 12:21:54 -07:00
|
|
|
desc='pywb Wayback Machine Server').run()
|
2016-12-09 16:46:33 -08:00
|
|
|
|
|
|
|
|
2017-02-17 18:04:07 -08:00
|
|
|
#=============================================================================
|
2017-03-12 12:21:54 -07:00
|
|
|
def live_rewrite_server(args=None):
|
|
|
|
LiveCli(args=args,
|
|
|
|
default_port=8090,
|
|
|
|
desc='pywb Live Rewrite Proxy Server').run()
|
2017-02-17 18:04:07 -08:00
|
|
|
|
|
|
|
|
2015-04-03 10:13:27 -07:00
|
|
|
#=============================================================================
|
|
|
|
class BaseCli(object):
|
|
|
|
def __init__(self, args=None, default_port=8080, desc=''):
|
2016-09-14 09:58:44 -07:00
|
|
|
parser = ArgumentParser(description=desc)
|
2015-04-03 10:13:27 -07:00
|
|
|
parser.add_argument('-p', '--port', type=int, default=default_port)
|
|
|
|
parser.add_argument('-t', '--threads', type=int, default=4)
|
2017-02-17 18:04:07 -08:00
|
|
|
parser.add_argument('--debug', action='store_true')
|
2017-02-27 19:07:51 -08:00
|
|
|
parser.add_argument('--profile', action='store_true')
|
2015-03-23 09:08:09 -07:00
|
|
|
|
2017-09-28 02:08:31 -07:00
|
|
|
parser.add_argument('--live', action='store_true', help='Add live-web handler at /live')
|
2017-09-27 13:47:02 -07:00
|
|
|
|
2017-09-28 02:08:31 -07:00
|
|
|
parser.add_argument('--proxy', help='Enable HTTP/S Proxy on specified collection')
|
2017-03-12 12:21:54 -07:00
|
|
|
|
2015-04-03 10:13:27 -07:00
|
|
|
self.desc = desc
|
2017-09-28 02:08:31 -07:00
|
|
|
self.extra_config = {}
|
2015-03-22 21:50:56 -07:00
|
|
|
|
2015-04-03 10:13:27 -07:00
|
|
|
self._extend_parser(parser)
|
2015-03-22 21:50:56 -07:00
|
|
|
|
2015-04-03 10:13:27 -07:00
|
|
|
self.r = parser.parse_args(args)
|
|
|
|
|
2017-02-17 18:04:07 -08:00
|
|
|
logging.basicConfig(format='%(asctime)s: [%(levelname)s]: %(message)s',
|
|
|
|
level=logging.DEBUG if self.r.debug else logging.INFO)
|
|
|
|
|
2015-04-03 10:13:27 -07:00
|
|
|
self.application = self.load()
|
|
|
|
|
2017-09-27 13:47:02 -07:00
|
|
|
if self.r.proxy:
|
|
|
|
self.application = self.application.init_proxy(self.r.proxy)
|
|
|
|
|
2017-02-27 19:07:51 -08:00
|
|
|
if self.r.profile:
|
|
|
|
from werkzeug.contrib.profiler import ProfilerMiddleware
|
|
|
|
self.application = ProfilerMiddleware(self.application)
|
|
|
|
|
2015-04-03 10:13:27 -07:00
|
|
|
def _extend_parser(self, parser): #pragma: no cover
|
|
|
|
pass
|
|
|
|
|
2017-03-12 12:21:54 -07:00
|
|
|
def load(self):
|
|
|
|
if self.r.live:
|
2017-09-28 02:08:31 -07:00
|
|
|
self.extra_config['collections'] = {'live':
|
|
|
|
{'index': '$live',
|
|
|
|
'use_js_obj_proxy': True}}
|
|
|
|
|
|
|
|
if self.r.debug:
|
|
|
|
self.extra_config['debug'] = True
|
2015-04-03 10:13:27 -07:00
|
|
|
|
|
|
|
def run(self):
|
2017-03-12 12:21:54 -07:00
|
|
|
self.run_gevent()
|
2015-04-03 10:13:27 -07:00
|
|
|
|
2016-12-09 16:46:33 -08:00
|
|
|
def run_gevent(self):
|
|
|
|
from gevent.pywsgi import WSGIServer
|
2017-02-17 18:04:07 -08:00
|
|
|
logging.info('Starting Gevent Server on ' + str(self.r.port))
|
2016-12-09 16:46:33 -08:00
|
|
|
WSGIServer(('', self.r.port), self.application).serve_forever()
|
|
|
|
|
2015-04-03 10:13:27 -07:00
|
|
|
|
|
|
|
#=============================================================================
|
|
|
|
class ReplayCli(BaseCli):
|
|
|
|
def _extend_parser(self, parser):
|
|
|
|
parser.add_argument('-a', '--autoindex', action='store_true')
|
2017-02-17 18:04:07 -08:00
|
|
|
parser.add_argument('--auto-interval', type=int, default=30)
|
2015-04-03 10:13:27 -07:00
|
|
|
|
2017-09-28 02:08:31 -07:00
|
|
|
parser.add_argument('--all-coll', help='Set "all" collection')
|
|
|
|
|
2015-04-03 10:13:27 -07:00
|
|
|
help_dir='Specify root archive dir (default is current working directory)'
|
|
|
|
parser.add_argument('-d', '--directory', help=help_dir)
|
|
|
|
|
|
|
|
|
|
|
|
def load(self):
|
2017-03-12 12:21:54 -07:00
|
|
|
super(ReplayCli, self).load()
|
2017-09-28 02:08:31 -07:00
|
|
|
|
|
|
|
if self.r.all_coll:
|
|
|
|
self.extra_config['all_coll'] = self.r.all_coll
|
|
|
|
|
JS Object Proxy Override System (#224)
* Init commit for Wombat JS Proxies off of https://github.com/ikreymer/pywb/tree/develop
Changes
- cli.py: add import os for os.chdir(self.r.directory)
- frontendapp.py: added initial support for cors requests.
- static_handler.py: add import for NotFoundException
- wbrequestresponse.py: added the intital implementation for cors requests, webrecoder needs this for recording!
- default_rewriter.py: added JSWombatProxyRewriter to default js rewriter class for internal testing
- html_rewriter.py: made JSWombatProxyRewriter to be default js rewriter class for internal testing
- regex_rewriters.py: implemented JSWombatProxyRewriter and JSWombatProxyRewriter to support wombat JS Proxy
- wombat.js: added JS Proxy support
- remove print
* wombat proxy: simplify mixin using 'first_buff'
* js local scope rewrite/proxy work:
- add DefaultHandlerWithJSProxy to enable new proxy rewrite (disabled by default)
- new proxy toggleable with 'js_local_scope_rewrite: true'
- work on integrating john's proxy work
- getAllOwnProps() to generate list of functions that need to be rebound
- remove non-proxy related changes for now, remove angular special cases (for now)
* local scope proxy work:
- add back __WB_pmw() prefix for postMessage
- don't override postMessage() in proxy obj
- MessageEvent resolve proxy to original window obj
* js obj proxy: use local_init() to load local vars from proxy obj
* wombat: js object proxy improvements:
- use same object '_WB_wombat_obj_proxy' on window and document objects
- reuse default_proxy_get() for get operation from window or document
- resolve and Window/Document object to the proxy, eg. if '_WB_wombat_obj_proxy' exists, return that
- override MessageEvent.source to return window proxy object
* obj proxy work:
- window proxy: defineProperty() override calls Reflect.defineProperty on dummy object as well as window to avoid exception
- window proxy: set() also sets on dummy object, and returns false if Reflect.set returns false (eg. altered by Reflect.defineProperty disabled writing)
- add override_prop_to_proxy() to add override to return proxy obj for attribute
- add override for Node.ownerDocument and HTMLElement.parentNode to return document proxy
server side rewrite: generalize local proxy insert, add list for local let overrides
* js obj proxy work:
- add default '__WB_pmw' to self if undefined (for service workers)
- document.origin override
- proxy obj: improved defineProperty override to work with safari
- proxy obj: catch any exception in dummy obj setter
* client-side rewriting:
- proxy obj: catch exception (such as cross-domain access) in own props init
- proxy obj: check for self reference '_WB_wombat_obj_proxy' access to avoid infinite recurse
- rewrite style: add 'cursor' attr for css url rewriting
* content rewriter: if is_ajax(), skip JS proxy obj rewriting also (html rewrite also skipped)
* client-side rewrite: rewrite 'data:text/css' as inline stylesheet when set via setAttribute() on 'href' in link
* client-side document override improvements:
- fix document.domain, document.referrer, forms add document.origin overrides to use only the document object
- init_doc_overrides() called as part of proxy init
- move non-document overrides to main init
rewrite: add rewrite for "Function('return this')" pattern to use proxy obj
* js obj proxy: now a per-collection (and even a per-request) setting 'use_js_obj_prox' (defaults to False)
live-rewrite-server: defaults to enabled js obj proxy
metadata: get_metadata() loads metadata.yaml for config settings for dynamic collections),
or collection config for static collections
warcserver: get_coll_config() returns config for static collection
tests: use custom test dir instead of default 'collections' dir
tests: add basic test for js obj proxy
update to warcio>=1.4.0
* karma tests: update to safari >10
* client-side rewrite:
- ensure wombat.js is ES5 compatible (don't use let)
- check if Proxy obj exists before attempting to init
* js proxy obj: RewriteWithProxyObj uses user-agent to determine if Proxy obj can be supported
content_rewriter: add overridable get_rewriter()
content_rewriter: fix elif -> if in should_rw_content()
tests: update js proxy obj test with different user agents (supported and unsupported)
karma: reset test to safari 9
* compatibility: remove shorthand notation from wombat.js
* js obj proxy: override MutationObserver.observe() to retrieve original object from proxy
wombat.js: cleanup, remove commented out code, label new proxy system functions, bump version to 2.40
2017-08-05 10:37:32 -07:00
|
|
|
import os
|
2015-04-03 10:13:27 -07:00
|
|
|
if self.r.directory: #pragma: no cover
|
|
|
|
os.chdir(self.r.directory)
|
|
|
|
|
|
|
|
def run(self):
|
|
|
|
if self.r.autoindex:
|
2017-09-21 22:12:57 -07:00
|
|
|
from pywb.manager.autoindex import AutoIndexer
|
2016-12-09 16:46:33 -08:00
|
|
|
import os
|
|
|
|
|
2017-09-21 22:12:57 -07:00
|
|
|
indexer = AutoIndexer(interval=self.r.auto_interval)
|
|
|
|
if not os.path.isdir(indexer.root_path):
|
2015-04-03 10:13:27 -07:00
|
|
|
msg = 'No managed directory "{0}" for auto-indexing'
|
2017-09-21 22:12:57 -07:00
|
|
|
logging.error(msg.format(indexer.root_path))
|
2015-04-03 10:13:27 -07:00
|
|
|
import sys
|
|
|
|
sys.exit(2)
|
2017-09-21 22:12:57 -07:00
|
|
|
|
|
|
|
msg = 'Auto-Indexing Enabled on "{0}", checking every {1} secs'
|
|
|
|
logging.info(msg.format(indexer.root_path, self.r.auto_interval))
|
|
|
|
indexer.start()
|
2015-04-03 10:13:27 -07:00
|
|
|
|
|
|
|
super(ReplayCli, self).run()
|
|
|
|
|
2017-02-17 18:04:07 -08:00
|
|
|
|
2016-12-09 16:46:33 -08:00
|
|
|
#=============================================================================
|
2017-05-23 19:08:29 -07:00
|
|
|
class WarcServerCli(BaseCli):
|
2016-12-09 16:46:33 -08:00
|
|
|
def load(self):
|
2017-05-23 19:08:29 -07:00
|
|
|
from pywb.warcserver.warcserver import WarcServer
|
2016-12-09 16:46:33 -08:00
|
|
|
|
2017-05-23 19:08:29 -07:00
|
|
|
super(WarcServerCli, self).load()
|
|
|
|
return WarcServer(custom_config=self.extra_config)
|
2015-04-03 10:13:27 -07:00
|
|
|
|
|
|
|
|
2017-02-17 18:04:07 -08:00
|
|
|
#=============================================================================
|
2017-03-12 12:21:54 -07:00
|
|
|
class WaybackCli(ReplayCli):
|
2017-02-17 18:04:07 -08:00
|
|
|
def load(self):
|
2017-05-23 19:08:29 -07:00
|
|
|
from pywb.apps.frontendapp import FrontEndApp
|
2017-03-12 12:21:54 -07:00
|
|
|
|
|
|
|
super(WaybackCli, self).load()
|
|
|
|
return FrontEndApp(custom_config=self.extra_config)
|
2017-02-17 18:04:07 -08:00
|
|
|
|
|
|
|
|
2017-03-10 14:15:02 -08:00
|
|
|
#=============================================================================
|
2017-03-12 12:21:54 -07:00
|
|
|
class LiveCli(BaseCli):
|
2017-03-10 14:15:02 -08:00
|
|
|
def load(self):
|
2017-05-23 19:08:29 -07:00
|
|
|
from pywb.apps.frontendapp import FrontEndApp
|
2017-03-10 14:15:02 -08:00
|
|
|
|
2017-03-12 12:21:54 -07:00
|
|
|
self.r.live = True
|
2017-03-10 14:15:02 -08:00
|
|
|
|
2017-03-12 12:21:54 -07:00
|
|
|
super(LiveCli, self).load()
|
|
|
|
return FrontEndApp(config_file=None, custom_config=self.extra_config)
|
2017-03-10 14:15:02 -08:00
|
|
|
|
2017-02-17 18:04:07 -08:00
|
|
|
|
2015-04-03 10:13:27 -07:00
|
|
|
#=============================================================================
|
2015-03-22 21:50:56 -07:00
|
|
|
if __name__ == "__main__":
|
|
|
|
wayback()
|