From 4a85869427647c30cedc6001c24920bd0006dcd3 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Fri, 3 Apr 2015 10:13:27 -0700 Subject: [PATCH] cli refactor: use classes in cli to allow custom options get rid of custom init for live_rewrite_handler, just use create_wb_router() with custom config for consistent init --- pywb/apps/cli.py | 158 ++++++++++++++++++---------- pywb/apps/live_rewrite_server.py | 33 +----- pywb/webapp/live_rewrite_handler.py | 11 -- tests/test_live_proxy.py | 13 ++- tests/test_live_rewriter.py | 8 +- 5 files changed, 116 insertions(+), 107 deletions(-) diff --git a/pywb/apps/cli.py b/pywb/apps/cli.py index 092b8a38..05a9d7db 100644 --- a/pywb/apps/cli.py +++ b/pywb/apps/cli.py @@ -1,84 +1,126 @@ import os import logging from argparse import ArgumentParser +from pywb.framework.wsgi_wrappers import init_app +from pywb.webapp.pywb_init import create_cdx_server_app, create_wb_router #================================================================= def cdx_server(args=None): #pragma: no cover - def load(): - import pywb.apps.cdx_server as mod - return mod - - cli(appload=load, - args=args, - default_port=8090, - desc='pywb CDX Index Server') + CdxCli(args=args, + default_port=8080, + desc='pywb CDX Index Server').run() #================================================================= def live_rewrite_server(args=None): #pragma: no cover - def load(): - import pywb.apps.live_rewrite_server as mod - return mod - - cli(appload=load, - args=args, - default_port=8090, - desc='pywb Live Rewrite Proxy Server') + LiveCli(args=args, + default_port=8090, + desc='pywb Live Rewrite Proxy Server').run() #================================================================= def wayback(args=None): - def load(): - import pywb.apps.wayback as mod - return mod - - cli(appload=load, - args=args, - default_port=8080, - desc='pywb Wayback Web Archive Replay') + WaybackCli(args=args, + default_port=8080, + desc='pywb Wayback Web Archive Replay').run() -#================================================================= -def cli(appload, args=None, default_port=8080, desc=''): - parser = ArgumentParser(desc) - parser.add_argument('-p', '--port', type=int, default=default_port) - parser.add_argument('-t', '--threads', type=int, default=4) - parser.add_argument('-a', '--autoindex', action='store_true') +#============================================================================= +class BaseCli(object): + def __init__(self, args=None, default_port=8080, desc=''): + parser = ArgumentParser(desc) + parser.add_argument('-p', '--port', type=int, default=default_port) + parser.add_argument('-t', '--threads', type=int, default=4) - help_dir='Specify root archive dir (default is current working directory)' - parser.add_argument('-d', '--directory', help=help_dir) + self.desc = desc - r = parser.parse_args(args) - if r.directory: #pragma: no cover - os.chdir(r.directory) + self._extend_parser(parser) - # Load App - #from pywb.apps.wayback import application - application = appload().application + self.r = parser.parse_args(args) - if r.autoindex: - from pywb.manager.manager import CollectionsManager - m = CollectionsManager('', must_exist=False) - if not os.path.isdir(m.colls_dir): - msg = 'No managed directory "{0}" for auto-indexing' - logging.error(msg.format(m.colls_dir)) - import sys - sys.exit(2) - else: - msg = 'Auto-Indexing Enabled on "{0}"' - logging.info(msg.format(m.colls_dir)) - m.autoindex(do_loop=False) + self.application = self.load() - try: - from waitress import serve - serve(application, port=r.port, threads=r.threads) - except ImportError: # pragma: no cover - # Shouldn't ever happen as installing waitress, but just in case.. - from pywb.framework.wsgi_wrappers import start_wsgi_ref_server - start_wsgi_ref_server(application, desc, port=r.port) + def _extend_parser(self, parser): #pragma: no cover + pass + + def load(self): #pragma: no cover + pass + + def run(self): + try: + from waitress import serve + print(self.desc) + serve(self.application, port=self.r.port, threads=self.r.threads) + except ImportError: # pragma: no cover + # Shouldn't ever happen as installing waitress, but just in case.. + from pywb.framework.wsgi_wrappers import start_wsgi_ref_server + start_wsgi_ref_server(self.application, self.desc, port=self.r.port) -#================================================================= +#============================================================================= +class LiveCli(BaseCli): + def _extend_parser(self, parser): + parser.add_argument('-x', '--proxy', + help='Specify host:port to use as HTTP/S proxy') + + parser.add_argument('-f', '--framed', action='store_true', + help='Replay using framed wrapping mode') + + def load(self): + config = dict(proxyhostport=self.r.proxy, + framed_replay=self.r.framed, + enable_auto_colls=False, + collections=dict(rewrite='$liveweb')) + + return init_app(create_wb_router, load_yaml=False, config=config) + + +#============================================================================= +class ReplayCli(BaseCli): + def _extend_parser(self, parser): + parser.add_argument('-a', '--autoindex', action='store_true') + + help_dir='Specify root archive dir (default is current working directory)' + parser.add_argument('-d', '--directory', help=help_dir) + + + def load(self): + if self.r.directory: #pragma: no cover + os.chdir(self.r.directory) + + def run(self): + if self.r.autoindex: + from pywb.manager.manager import CollectionsManager + m = CollectionsManager('', must_exist=False) + if not os.path.isdir(m.colls_dir): + msg = 'No managed directory "{0}" for auto-indexing' + logging.error(msg.format(m.colls_dir)) + import sys + sys.exit(2) + else: + msg = 'Auto-Indexing Enabled on "{0}"' + logging.info(msg.format(m.colls_dir)) + m.autoindex(do_loop=False) + + super(ReplayCli, self).run() + +#============================================================================= +class CdxCli(ReplayCli): #pragma: no cover + def load(self): + super(CdxCli, self).load() + return init_app(create_cdx_server_app, + load_yaml=True) + + +#============================================================================= +class WaybackCli(ReplayCli): + def load(self): + super(WaybackCli, self).load() + return init_app(create_wb_router, + load_yaml=True) + + +#============================================================================= if __name__ == "__main__": wayback() diff --git a/pywb/apps/live_rewrite_server.py b/pywb/apps/live_rewrite_server.py index 4ac94f01..5d4a6285 100644 --- a/pywb/apps/live_rewrite_server.py +++ b/pywb/apps/live_rewrite_server.py @@ -1,34 +1,7 @@ -from pywb.framework.wsgi_wrappers import init_app - -from pywb.webapp.live_rewrite_handler import create_live_rewriter_app - -from argparse import ArgumentParser - +from cli import LiveCli #================================================================= -# init rewrite server app +# init default live rewrite server app #================================================================= -def create_app(): - parser = ArgumentParser(description='Live Rewrite Server') - - parser.add_argument('-x', '--proxy', - action='store', - help='Specify host:port to use as HTTP/S proxy') - - parser.add_argument('-f', '--framed', - action='store_true', - help='Replay using framed wrapping mode') - - result, unknown = parser.parse_known_args() - - config = dict(proxyhostport=result.proxy, - framed_replay=result.framed) - - app = init_app(create_live_rewriter_app, load_yaml=False, - config=config) - - return app - - -application = create_app() +application = LiveCli([]).application diff --git a/pywb/webapp/live_rewrite_handler.py b/pywb/webapp/live_rewrite_handler.py index a232cb63..f6fec497 100644 --- a/pywb/webapp/live_rewrite_handler.py +++ b/pywb/webapp/live_rewrite_handler.py @@ -1,6 +1,4 @@ -from pywb.framework.basehandlers import WbUrlHandler from pywb.framework.wbrequestresponse import WbResponse -from pywb.framework.archivalrouter import ArchivalRouter, Route from pywb.framework.cache import create_cache from pywb.rewrite.rewrite_live import LiveRewriter @@ -281,12 +279,3 @@ class YoutubeDLWrapper(object): self.htmlparser.locatestarttagend = self.orig_tagregex return info - - -#================================================================= -def create_live_rewriter_app(config={}): - routes = [Route('rewrite', RewriteHandler(config)), - Route('static/__pywb', StaticHandler('pywb/static/')) - ] - - return ArchivalRouter(routes, hostpaths=['http://localhost:8080']) diff --git a/tests/test_live_proxy.py b/tests/test_live_proxy.py index 700dc7e2..7bd86706 100644 --- a/tests/test_live_proxy.py +++ b/tests/test_live_proxy.py @@ -3,7 +3,8 @@ from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler from server_thread import ServerThreadRunner -from pywb.webapp.live_rewrite_handler import create_live_rewriter_app, RewriteHandler +from pywb.webapp.live_rewrite_handler import RewriteHandler +from pywb.webapp.pywb_init import create_wb_router from pywb.framework.wsgi_wrappers import init_app import webtest @@ -61,9 +62,13 @@ class TestProxyLiveRewriter: self.server = ServerThreadRunner(make_httpd) - self.app = init_app(create_live_rewriter_app, load_yaml=False, - config=dict(framed_replay=True, - proxyhostport=self.server.proxy_dict)) + config = dict(collections=dict(rewrite='$liveweb'), + framed_replay=True, + proxyhostport=self.server.proxy_dict) + + self.app = init_app(create_wb_router, + load_yaml=False, + config=config) def create_cache(): return self.cache diff --git a/tests/test_live_rewriter.py b/tests/test_live_rewriter.py index 73b9559b..494fc336 100644 --- a/tests/test_live_rewriter.py +++ b/tests/test_live_rewriter.py @@ -1,11 +1,11 @@ -from pywb.webapp.live_rewrite_handler import create_live_rewriter_app, RewriteHandler +from pywb.webapp.live_rewrite_handler import RewriteHandler +from pywb.apps.cli import LiveCli from pywb.framework.wsgi_wrappers import init_app import webtest class TestLiveRewriter: def setup(self): - self.app = init_app(create_live_rewriter_app, load_yaml=False, - config=dict(framed_replay=True)) + self.app = LiveCli(['-f']).application self.testapp = webtest.TestApp(self.app) def test_live_rewrite_1(self): @@ -28,7 +28,7 @@ class TestLiveRewriter: resp = self.testapp.get('/rewrite/tf_/http://example.com/') assert resp.status_int == 200 assert '