From 79eb3be44f17bc5bfccee171c6aac843e3b0d736 Mon Sep 17 00:00:00 2001 From: Kenji Nagahashi Date: Thu, 20 Feb 2014 09:58:08 +0000 Subject: [PATCH] rewrite wsgi_cdxserver with werkzeug use pkg_resources instead of pkgutil because pkgutil breaks with auto-reload. add --port command line option. --- pywb/cdx/cdxdomainspecific.py | 4 +- pywb/cdx/wsgi_cdxserver.py | 116 ++++++++++++++++++++-------------- setup.py | 2 + 3 files changed, 72 insertions(+), 50 deletions(-) diff --git a/pywb/cdx/cdxdomainspecific.py b/pywb/cdx/cdxdomainspecific.py index 2c733c8d..4d8d9b87 100644 --- a/pywb/cdx/cdxdomainspecific.py +++ b/pywb/cdx/cdxdomainspecific.py @@ -1,14 +1,14 @@ import yaml import re import logging -import pkgutil +import pkg_resources from canonicalize import unsurt, UrlCanonicalizer #================================================================= def load_domain_specific_cdx_rules(filename, surt_ordered): - fh = pkgutil.get_data(__package__, filename) + fh = pkg_resources.resource_string(__name__, filename) config = yaml.load(fh) # Load Canonicalizer Rules diff --git a/pywb/cdx/wsgi_cdxserver.py b/pywb/cdx/wsgi_cdxserver.py index b6ccc61c..609928a0 100644 --- a/pywb/cdx/wsgi_cdxserver.py +++ b/pywb/cdx/wsgi_cdxserver.py @@ -1,10 +1,11 @@ -from cdxserver import create_cdx_server, extract_params_from_wsgi_env +from werkzeug.wrappers import BaseRequest, BaseResponse +from cdxserver import create_cdx_server from pywb import get_test_dir import logging import os import yaml -import pkgutil +import pkg_resources #================================================================= CONFIG_FILE = 'config.yaml' @@ -13,66 +14,85 @@ RULES_FILE = 'rules.yaml' DEFAULT_PORT = 8080 -config = None -if __package__: - try: - config = pkgutil.get_data(__package__, CONFIG_FILE) - config = yaml.load(config) - except: - pass - - #================================================================= -def main(paths=None): +class CDXQueryRequest(BaseRequest): + def __init__(self, environ): + super(CDXQueryRequest, self).__init__(environ) + + @property + def output(self): + return self.args.get('output', 'text') + @property + def filter(self): + return self.args.getlist('filter', []) + @property + def params(self): + return dict(t if t[0] == 'filter' else (t[0], t[1][0]) + for t in self.args.iterlists()) + +class WSGICDXServer(object): + def __init__(self, paths, rules_file): + self.cdxserver = create_cdx_server(paths, rules_file) + + def __call__(self, environ, start_response): + request = CDXQueryRequest(environ) + try: + logging.debug('request.args=%s', request.params) + result = self.cdxserver.load_cdx(**request.params) + + # TODO: select response type by "output" parameter + response = PlainTextResponse(result) + return response(environ, start_response) + except Exception as exc: + logging.error('load_cdx failed', exc_info=1) + # TODO: error response should be different for each response + # type + start_response('400 Error', [('Content-Type', 'text/plain')]) + return [str(exc)] + +class PlainTextResponse(BaseResponse): + def __init__(self, cdxitr, status=200, content_type='text/plain'): + super(PlainTextResponse, self).__init__( + response=cdxitr, + status=status, content_type=content_type) + +# class JsonResponse(Response): +# pass +# class MementoResponse(Response): +# pass + +def create_app(paths=None): logging.basicConfig(format='%(asctime)s: [%(levelname)s]: %(message)s', level=logging.DEBUG) if not paths: - if config: - paths = config - else: - paths = get_test_dir() + 'cdx/' - - cdxserver = create_cdx_server(paths, RULES_FILE) - - def application(env, start_response): - try: - params = extract_params_from_wsgi_env(env) - response = cdxserver.load_cdx(**params) - start_response('200 OK', [('Content-Type', 'text/plain')]) - - response = list(response) - - except Exception as exc: - import traceback - err_details = traceback.format_exc(exc) - start_response('400 Error', [('Content-Type', 'text/plain')]) - response = [str(exc)] - print err_details - - return response - - return application + paths = config or get_test_dir() + 'cdx/' + return WSGICDXServer(paths, RULES_FILE) if __name__ == "__main__": - from wsgiref.simple_server import make_server + from optparse import OptionParser + from werkzeug.serving import run_simple - app = main() + opt = OptionParser('%prog [OPTIONS]') + opt.add_option('-p', '--port', type='int', default=None) - port = DEFAULT_PORT - if config: - port = config.get('port', DEFAULT_PORT) + options, args = opt.parse_args() - httpd = make_server('', port, app) + configdata = pkg_resources.resource_string(__name__, CONFIG_FILE) + config = yaml.load(configdata) - logging.debug('Starting CDX Server on port ' + str(port)) + port = options.port + if port is None: + port = (config and config.get('port')) or DEFAULT_PORT + app = create_app() + + logging.debug('Starting CDX Server on port %s', port) try: - httpd.serve_forever() - except KeyboardInterrupt: + run_simple('0.0.0.0', port, app, use_reloader=True, use_debugger=True) + except KeyboardInterrupt as ex: pass - logging.debug('Stopping CDX Server') else: - application = main() + application = create_app() diff --git a/setup.py b/setup.py index e8d92e16..70dba6d6 100755 --- a/setup.py +++ b/setup.py @@ -34,6 +34,8 @@ setup( 'pyyaml', 'WebTest', 'pytest', + 'werkzeug>=0.9.4', + 'setuptools==0.9.7', ], # tests_require=['WebTest', 'pytest'], zip_safe=False