1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-31 03:04:12 +02:00
pywb/pywb/apps/cli.py

154 lines
5.0 KiB
Python
Raw Normal View History

from gevent.monkey import patch_all; patch_all()
from argparse import ArgumentParser
import logging
#=============================================================================
def webagg(args=None):
WarcServerCli(args=args,
default_port=8070,
desc='pywb Web Aggregator Server').run()
#=============================================================================
def wayback(args=None):
WaybackCli(args=args,
default_port=8080,
desc='pywb Wayback Machine Server').run()
#=============================================================================
def live_rewrite_server(args=None):
LiveCli(args=args,
default_port=8090,
desc='pywb Live Rewrite Proxy Server').run()
#=============================================================================
class BaseCli(object):
def __init__(self, args=None, default_port=8080, desc=''):
2016-09-14 09:58:44 -07:00
parser = ArgumentParser(description=desc)
parser.add_argument('-p', '--port', type=int, default=default_port)
parser.add_argument('-t', '--threads', type=int, default=4)
parser.add_argument('--debug', action='store_true')
parser.add_argument('--profile', action='store_true')
parser.add_argument('--live', action='store_true', help='Add live-web handler at /live')
parser.add_argument('--proxy', help='Enable HTTP/S Proxy on specified collection')
self.desc = desc
self.extra_config = {}
self._extend_parser(parser)
self.r = parser.parse_args(args)
logging.basicConfig(format='%(asctime)s: [%(levelname)s]: %(message)s',
level=logging.DEBUG if self.r.debug else logging.INFO)
self.application = self.load()
if self.r.proxy:
self.application = self.application.init_proxy(self.r.proxy)
if self.r.profile:
from werkzeug.contrib.profiler import ProfilerMiddleware
self.application = ProfilerMiddleware(self.application)
def _extend_parser(self, parser): #pragma: no cover
pass
def load(self):
if self.r.live:
self.extra_config['collections'] = {'live':
{'index': '$live',
'use_js_obj_proxy': True}}
if self.r.debug:
self.extra_config['debug'] = True
def run(self):
self.run_gevent()
def run_gevent(self):
from gevent.pywsgi import WSGIServer
logging.info('Starting Gevent Server on ' + str(self.r.port))
WSGIServer(('', self.r.port), self.application).serve_forever()
#=============================================================================
class ReplayCli(BaseCli):
def _extend_parser(self, parser):
parser.add_argument('-a', '--autoindex', action='store_true')
parser.add_argument('--auto-interval', type=int, default=30)
parser.add_argument('--all-coll', help='Set "all" collection')
help_dir='Specify root archive dir (default is current working directory)'
parser.add_argument('-d', '--directory', help=help_dir)
def load(self):
super(ReplayCli, self).load()
if self.r.all_coll:
self.extra_config['all_coll'] = self.r.all_coll
JS Object Proxy Override System (#224) * Init commit for Wombat JS Proxies off of https://github.com/ikreymer/pywb/tree/develop Changes - cli.py: add import os for os.chdir(self.r.directory) - frontendapp.py: added initial support for cors requests. - static_handler.py: add import for NotFoundException - wbrequestresponse.py: added the intital implementation for cors requests, webrecoder needs this for recording! - default_rewriter.py: added JSWombatProxyRewriter to default js rewriter class for internal testing - html_rewriter.py: made JSWombatProxyRewriter to be default js rewriter class for internal testing - regex_rewriters.py: implemented JSWombatProxyRewriter and JSWombatProxyRewriter to support wombat JS Proxy - wombat.js: added JS Proxy support - remove print * wombat proxy: simplify mixin using 'first_buff' * js local scope rewrite/proxy work: - add DefaultHandlerWithJSProxy to enable new proxy rewrite (disabled by default) - new proxy toggleable with 'js_local_scope_rewrite: true' - work on integrating john's proxy work - getAllOwnProps() to generate list of functions that need to be rebound - remove non-proxy related changes for now, remove angular special cases (for now) * local scope proxy work: - add back __WB_pmw() prefix for postMessage - don't override postMessage() in proxy obj - MessageEvent resolve proxy to original window obj * js obj proxy: use local_init() to load local vars from proxy obj * wombat: js object proxy improvements: - use same object '_WB_wombat_obj_proxy' on window and document objects - reuse default_proxy_get() for get operation from window or document - resolve and Window/Document object to the proxy, eg. if '_WB_wombat_obj_proxy' exists, return that - override MessageEvent.source to return window proxy object * obj proxy work: - window proxy: defineProperty() override calls Reflect.defineProperty on dummy object as well as window to avoid exception - window proxy: set() also sets on dummy object, and returns false if Reflect.set returns false (eg. altered by Reflect.defineProperty disabled writing) - add override_prop_to_proxy() to add override to return proxy obj for attribute - add override for Node.ownerDocument and HTMLElement.parentNode to return document proxy server side rewrite: generalize local proxy insert, add list for local let overrides * js obj proxy work: - add default '__WB_pmw' to self if undefined (for service workers) - document.origin override - proxy obj: improved defineProperty override to work with safari - proxy obj: catch any exception in dummy obj setter * client-side rewriting: - proxy obj: catch exception (such as cross-domain access) in own props init - proxy obj: check for self reference '_WB_wombat_obj_proxy' access to avoid infinite recurse - rewrite style: add 'cursor' attr for css url rewriting * content rewriter: if is_ajax(), skip JS proxy obj rewriting also (html rewrite also skipped) * client-side rewrite: rewrite 'data:text/css' as inline stylesheet when set via setAttribute() on 'href' in link * client-side document override improvements: - fix document.domain, document.referrer, forms add document.origin overrides to use only the document object - init_doc_overrides() called as part of proxy init - move non-document overrides to main init rewrite: add rewrite for "Function('return this')" pattern to use proxy obj * js obj proxy: now a per-collection (and even a per-request) setting 'use_js_obj_prox' (defaults to False) live-rewrite-server: defaults to enabled js obj proxy metadata: get_metadata() loads metadata.yaml for config settings for dynamic collections), or collection config for static collections warcserver: get_coll_config() returns config for static collection tests: use custom test dir instead of default 'collections' dir tests: add basic test for js obj proxy update to warcio>=1.4.0 * karma tests: update to safari >10 * client-side rewrite: - ensure wombat.js is ES5 compatible (don't use let) - check if Proxy obj exists before attempting to init * js proxy obj: RewriteWithProxyObj uses user-agent to determine if Proxy obj can be supported content_rewriter: add overridable get_rewriter() content_rewriter: fix elif -> if in should_rw_content() tests: update js proxy obj test with different user agents (supported and unsupported) karma: reset test to safari 9 * compatibility: remove shorthand notation from wombat.js * js obj proxy: override MutationObserver.observe() to retrieve original object from proxy wombat.js: cleanup, remove commented out code, label new proxy system functions, bump version to 2.40
2017-08-05 10:37:32 -07:00
import os
if self.r.directory: #pragma: no cover
os.chdir(self.r.directory)
def run(self):
if self.r.autoindex:
from pywb.manager.autoindex import AutoIndexer
import os
indexer = AutoIndexer(interval=self.r.auto_interval)
if not os.path.isdir(indexer.root_path):
msg = 'No managed directory "{0}" for auto-indexing'
logging.error(msg.format(indexer.root_path))
import sys
sys.exit(2)
msg = 'Auto-Indexing Enabled on "{0}", checking every {1} secs'
logging.info(msg.format(indexer.root_path, self.r.auto_interval))
indexer.start()
super(ReplayCli, self).run()
#=============================================================================
class WarcServerCli(BaseCli):
def load(self):
from pywb.warcserver.warcserver import WarcServer
super(WarcServerCli, self).load()
return WarcServer(custom_config=self.extra_config)
#=============================================================================
class WaybackCli(ReplayCli):
def load(self):
from pywb.apps.frontendapp import FrontEndApp
super(WaybackCli, self).load()
return FrontEndApp(custom_config=self.extra_config)
#=============================================================================
class LiveCli(BaseCli):
def load(self):
from pywb.apps.frontendapp import FrontEndApp
self.r.live = True
super(LiveCli, self).load()
return FrontEndApp(config_file=None, custom_config=self.extra_config)
#=============================================================================
if __name__ == "__main__":
wayback()