diff --git a/pywb/apps/live_rewrite_server.py b/pywb/apps/live_rewrite_server.py index 9b29e42b..8d3544f3 100644 --- a/pywb/apps/live_rewrite_server.py +++ b/pywb/apps/live_rewrite_server.py @@ -2,15 +2,36 @@ from pywb.framework.wsgi_wrappers import init_app, start_wsgi_server from pywb.webapp.live_rewrite_handler import create_live_rewriter_app +from argparse import ArgumentParser + + #================================================================= -# init cdx server app +# init rewrite server app #================================================================= -application = init_app(create_live_rewriter_app, load_yaml=False) +def create_app(): + parser = ArgumentParser(description='Live Rewrite Server') + + parser.add_argument('-x', '--proxy', + action='store', + help='Specify host:port to use as HTTP/S proxy') + + result, unknown = parser.parse_known_args() + + config=dict(proxyhostport=result.proxy, framed_replay=True) + + app = init_app(create_live_rewriter_app, load_yaml=False, + config=config) + + return app + + +application = create_app() def main(): # pragma: no cover start_wsgi_server(application, 'Live Rewriter App', default_port=8090) + if __name__ == "__main__": main() diff --git a/pywb/framework/test/test_wsgi_wrapper.py b/pywb/framework/test/test_wsgi_wrapper.py index f3d65135..e46cded5 100644 --- a/pywb/framework/test/test_wsgi_wrapper.py +++ b/pywb/framework/test/test_wsgi_wrapper.py @@ -22,7 +22,7 @@ class TestCustomErrApp: def initer(app_class): - def init(): + def init(config=None): return app_class() return init diff --git a/pywb/framework/wsgi_wrappers.py b/pywb/framework/wsgi_wrappers.py index 837a7c74..3729a660 100644 --- a/pywb/framework/wsgi_wrappers.py +++ b/pywb/framework/wsgi_wrappers.py @@ -112,7 +112,7 @@ DEFAULT_CONFIG_FILE = 'config.yaml' #================================================================= -def init_app(init_func, load_yaml=True, config_file=None): +def init_app(init_func, load_yaml=True, config_file=None, config={}): logging.basicConfig(format='%(asctime)s: [%(levelname)s]: %(message)s', level=logging.DEBUG) logging.debug('') @@ -129,9 +129,7 @@ def init_app(init_func, load_yaml=True, config_file=None): config = load_yaml_config(config_file) - wb_router = init_func(config) - else: - wb_router = init_func() + wb_router = init_func(config) except: msg = '*** pywb app init FAILED config from "%s"!\n' logging.exception(msg, init_func.__name__) @@ -146,17 +144,8 @@ def init_app(init_func, load_yaml=True, config_file=None): #================================================================= def start_wsgi_server(the_app, name, default_port=None): # pragma: no cover from wsgiref.simple_server import make_server - from optparse import OptionParser - opt = OptionParser('%prog [OPTIONS]') - opt.add_option('-p', '--port', type='int', default=None) - - options, args = opt.parse_args() - - port = options.port - - if not port: - port = the_app.port + port = the_app.port if not port: if default_port: diff --git a/pywb/rewrite/rewrite_live.py b/pywb/rewrite/rewrite_live.py index de137ae3..fbda24f4 100644 --- a/pywb/rewrite/rewrite_live.py +++ b/pywb/rewrite/rewrite_live.py @@ -5,6 +5,7 @@ Fetch a url from live web and apply rewriting rules import requests import datetime import mimetypes +import logging from urlparse import urlsplit @@ -19,24 +20,11 @@ from pywb.rewrite.rewrite_content import RewriteContent #================================================================= class LiveRewriter(object): - PROXY_HEADER_LIST = [('HTTP_USER_AGENT', 'User-Agent'), - ('HTTP_ACCEPT', 'Accept'), - ('HTTP_ACCEPT_LANGUAGE', 'Accept-Language'), - ('HTTP_ACCEPT_CHARSET', 'Accept-Charset'), - ('HTTP_ACCEPT_ENCODING', 'Accept-Encoding'), - ('HTTP_RANGE', 'Range'), - ('HTTP_CACHE_CONTROL', 'Cache-Control'), - ('HTTP_X_REQUESTED_WITH', 'X-Requested-With'), - ('HTTP_X_CSRF_TOKEN', 'X-CSRF-Token'), - ('HTTP_PE_TOKEN', 'PE-Token'), - ('HTTP_COOKIE', 'Cookie'), - ('CONTENT_TYPE', 'Content-Type'), - ('CONTENT_LENGTH', 'Content-Length'), - ('REL_REFERER', 'Referer'), - ] - - def __init__(self, defmod=''): + def __init__(self, defmod='', default_proxy=None): self.rewriter = RewriteContent(defmod=defmod) + self.default_proxy = default_proxy + if self.default_proxy: + logging.debug('Live Rewrite via proxy ' + self.default_proxy) def fetch_local_file(self, uri): fh = open(uri) @@ -89,6 +77,10 @@ class LiveRewriter(object): method = 'GET' data = None + if not proxies and self.default_proxy: + proxies = {'http': self.default_proxy, + 'https': self.default_proxy} + if env is not None: method = env['REQUEST_METHOD'].upper() input_ = env['wsgi.input'] diff --git a/pywb/webapp/live_rewrite_handler.py b/pywb/webapp/live_rewrite_handler.py index a69cf8e9..6b1d69e3 100644 --- a/pywb/webapp/live_rewrite_handler.py +++ b/pywb/webapp/live_rewrite_handler.py @@ -9,7 +9,7 @@ from replay_views import RewriteLiveView #================================================================= class RewriteHandler(WbUrlHandler): - def __init__(self, config=dict(framed_replay=True)): + def __init__(self, config): self.rewrite_view = RewriteLiveView(config) def __call__(self, wbrequest): @@ -17,8 +17,8 @@ class RewriteHandler(WbUrlHandler): #================================================================= -def create_live_rewriter_app(): - routes = [Route('rewrite', RewriteHandler()), +def create_live_rewriter_app(config={}): + routes = [Route('rewrite', RewriteHandler(config)), Route('static/default', StaticHandler('pywb/static/')) ] diff --git a/pywb/webapp/replay_views.py b/pywb/webapp/replay_views.py index 8cc14b7d..9cc0aa6a 100644 --- a/pywb/webapp/replay_views.py +++ b/pywb/webapp/replay_views.py @@ -88,7 +88,9 @@ class RewriteLiveView(BaseContentView): def __init__(self, config): super(RewriteLiveView, self).__init__(config) - self.rewriter = LiveRewriter(defmod=self._mp_mod) + default_proxy = config.get('proxyhostport') + self.rewriter = LiveRewriter(defmod=self._mp_mod, + default_proxy=default_proxy) def render_content(self, wbrequest, *args): head_insert_func = self.head_insert_view.create_insert_func(wbrequest) diff --git a/tests/test_live_rewriter.py b/tests/test_live_rewriter.py index b2a6dada..ca79c828 100644 --- a/tests/test_live_rewriter.py +++ b/tests/test_live_rewriter.py @@ -4,7 +4,8 @@ import webtest class TestLiveRewriter: def setup(self): - self.app = init_app(create_live_rewriter_app, load_yaml=False) + self.app = init_app(create_live_rewriter_app, load_yaml=False, + config=dict(framed_replay=True)) self.testapp = webtest.TestApp(self.app) def test_live_rewrite_1(self):