mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
rewrite wsgi_cdxserver with werkzeug
use pkg_resources instead of pkgutil because pkgutil breaks with auto-reload. add --port command line option.
This commit is contained in:
parent
d0229b6b2d
commit
79eb3be44f
@ -1,14 +1,14 @@
|
||||
import yaml
|
||||
import re
|
||||
import logging
|
||||
import pkgutil
|
||||
import pkg_resources
|
||||
|
||||
from canonicalize import unsurt, UrlCanonicalizer
|
||||
|
||||
|
||||
#=================================================================
|
||||
def load_domain_specific_cdx_rules(filename, surt_ordered):
|
||||
fh = pkgutil.get_data(__package__, filename)
|
||||
fh = pkg_resources.resource_string(__name__, filename)
|
||||
config = yaml.load(fh)
|
||||
|
||||
# Load Canonicalizer Rules
|
||||
|
@ -1,10 +1,11 @@
|
||||
from cdxserver import create_cdx_server, extract_params_from_wsgi_env
|
||||
from werkzeug.wrappers import BaseRequest, BaseResponse
|
||||
from cdxserver import create_cdx_server
|
||||
from pywb import get_test_dir
|
||||
|
||||
import logging
|
||||
import os
|
||||
import yaml
|
||||
import pkgutil
|
||||
import pkg_resources
|
||||
|
||||
#=================================================================
|
||||
CONFIG_FILE = 'config.yaml'
|
||||
@ -13,66 +14,85 @@ RULES_FILE = 'rules.yaml'
|
||||
|
||||
DEFAULT_PORT = 8080
|
||||
|
||||
config = None
|
||||
if __package__:
|
||||
try:
|
||||
config = pkgutil.get_data(__package__, CONFIG_FILE)
|
||||
config = yaml.load(config)
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
#=================================================================
|
||||
def main(paths=None):
|
||||
class CDXQueryRequest(BaseRequest):
|
||||
def __init__(self, environ):
|
||||
super(CDXQueryRequest, self).__init__(environ)
|
||||
|
||||
@property
|
||||
def output(self):
|
||||
return self.args.get('output', 'text')
|
||||
@property
|
||||
def filter(self):
|
||||
return self.args.getlist('filter', [])
|
||||
@property
|
||||
def params(self):
|
||||
return dict(t if t[0] == 'filter' else (t[0], t[1][0])
|
||||
for t in self.args.iterlists())
|
||||
|
||||
class WSGICDXServer(object):
|
||||
def __init__(self, paths, rules_file):
|
||||
self.cdxserver = create_cdx_server(paths, rules_file)
|
||||
|
||||
def __call__(self, environ, start_response):
|
||||
request = CDXQueryRequest(environ)
|
||||
try:
|
||||
logging.debug('request.args=%s', request.params)
|
||||
result = self.cdxserver.load_cdx(**request.params)
|
||||
|
||||
# TODO: select response type by "output" parameter
|
||||
response = PlainTextResponse(result)
|
||||
return response(environ, start_response)
|
||||
except Exception as exc:
|
||||
logging.error('load_cdx failed', exc_info=1)
|
||||
# TODO: error response should be different for each response
|
||||
# type
|
||||
start_response('400 Error', [('Content-Type', 'text/plain')])
|
||||
return [str(exc)]
|
||||
|
||||
class PlainTextResponse(BaseResponse):
|
||||
def __init__(self, cdxitr, status=200, content_type='text/plain'):
|
||||
super(PlainTextResponse, self).__init__(
|
||||
response=cdxitr,
|
||||
status=status, content_type=content_type)
|
||||
|
||||
# class JsonResponse(Response):
|
||||
# pass
|
||||
# class MementoResponse(Response):
|
||||
# pass
|
||||
|
||||
def create_app(paths=None):
|
||||
logging.basicConfig(format='%(asctime)s: [%(levelname)s]: %(message)s',
|
||||
level=logging.DEBUG)
|
||||
|
||||
if not paths:
|
||||
if config:
|
||||
paths = config
|
||||
else:
|
||||
paths = get_test_dir() + 'cdx/'
|
||||
|
||||
cdxserver = create_cdx_server(paths, RULES_FILE)
|
||||
|
||||
def application(env, start_response):
|
||||
try:
|
||||
params = extract_params_from_wsgi_env(env)
|
||||
response = cdxserver.load_cdx(**params)
|
||||
start_response('200 OK', [('Content-Type', 'text/plain')])
|
||||
|
||||
response = list(response)
|
||||
|
||||
except Exception as exc:
|
||||
import traceback
|
||||
err_details = traceback.format_exc(exc)
|
||||
start_response('400 Error', [('Content-Type', 'text/plain')])
|
||||
response = [str(exc)]
|
||||
print err_details
|
||||
|
||||
return response
|
||||
|
||||
return application
|
||||
paths = config or get_test_dir() + 'cdx/'
|
||||
|
||||
return WSGICDXServer(paths, RULES_FILE)
|
||||
|
||||
if __name__ == "__main__":
|
||||
from wsgiref.simple_server import make_server
|
||||
from optparse import OptionParser
|
||||
from werkzeug.serving import run_simple
|
||||
|
||||
app = main()
|
||||
opt = OptionParser('%prog [OPTIONS]')
|
||||
opt.add_option('-p', '--port', type='int', default=None)
|
||||
|
||||
port = DEFAULT_PORT
|
||||
if config:
|
||||
port = config.get('port', DEFAULT_PORT)
|
||||
options, args = opt.parse_args()
|
||||
|
||||
httpd = make_server('', port, app)
|
||||
configdata = pkg_resources.resource_string(__name__, CONFIG_FILE)
|
||||
config = yaml.load(configdata)
|
||||
|
||||
logging.debug('Starting CDX Server on port ' + str(port))
|
||||
port = options.port
|
||||
if port is None:
|
||||
port = (config and config.get('port')) or DEFAULT_PORT
|
||||
|
||||
app = create_app()
|
||||
|
||||
logging.debug('Starting CDX Server on port %s', port)
|
||||
try:
|
||||
httpd.serve_forever()
|
||||
except KeyboardInterrupt:
|
||||
run_simple('0.0.0.0', port, app, use_reloader=True, use_debugger=True)
|
||||
except KeyboardInterrupt as ex:
|
||||
pass
|
||||
|
||||
logging.debug('Stopping CDX Server')
|
||||
else:
|
||||
application = main()
|
||||
application = create_app()
|
||||
|
Loading…
x
Reference in New Issue
Block a user