1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-24 06:59:52 +01:00

- cdx handler refactoring: factor out CDXHandler and init to

seperate cdx_handler module
- Make wsgi app a class, add port as an optional field in wsgi app
and router. (not required to be specified)
This commit is contained in:
Ilya Kreymer 2014-03-03 10:35:57 -08:00
parent 0bf651c2e3
commit 2d4ae62fbe
9 changed files with 102 additions and 64 deletions

View File

@ -1,27 +1,14 @@
from pywb.cdx.cdxserver import create_cdx_server
from pywb.framework.wsgi_wrappers import init_app, start_wsgi_server from pywb.framework.wsgi_wrappers import init_app, start_wsgi_server
from pywb.framework.archivalrouter import ArchivalRouter, Route
from pywb.core.handlers import CDXHandler from pywb.core.cdx_handler import create_cdx_server_app
DEFAULT_RULES = 'pywb/rules.yaml' #=================================================================
# init cdx server app
#=================================================================
# cdx-server only config # cdx-server only config
DEFAULT_CONFIG = 'pywb/cdx/config.yaml' DEFAULT_CONFIG = 'pywb/cdx/config.yaml'
#=================================================================
# create simple cdx server under '/cdx' using config file
# TODO: support multiple collections like full wayback?
def create_cdx_server_app(config):
cdx_server = create_cdx_server(config, DEFAULT_RULES)
routes = [Route('cdx', CDXHandler(cdx_server))]
return ArchivalRouter(routes)
#=================================================================
# init pywb app
#=================================================================
application = init_app(create_cdx_server_app, application = init_app(create_cdx_server_app,
load_yaml=True, load_yaml=True,
config_file=DEFAULT_CONFIG) config_file=DEFAULT_CONFIG)

43
pywb/core/cdx_handler.py Normal file
View File

@ -0,0 +1,43 @@
from pywb.cdx.query import CDXQuery
from pywb.cdx.cdxserver import create_cdx_server
from pywb.framework.archivalrouter import ArchivalRouter, Route
from pywb.framework.basehandlers import BaseHandler
from views import TextCapturesView
#=================================================================
class CDXHandler(BaseHandler):
"""
Handler which passes wsgi request to cdx server and
returns a text-based cdx response
"""
def __init__(self, index_reader, view=None):
self.index_reader = index_reader
self.view = view if view else TextCapturesView()
def __call__(self, wbrequest):
params = CDXQuery.extract_params_from_wsgi_env(wbrequest.env)
cdx_lines = self.index_reader.load_cdx(**params)
return self.view.render_response(wbrequest, cdx_lines)
def __str__(self):
return 'CDX Handler: ' + str(self.index_reader)
#=================================================================
DEFAULT_RULES = 'pywb/rules.yaml'
#=================================================================
def create_cdx_server_app(config):
"""
Create a cdx server config to be wrapped in a wsgi app
Currently using single access point '/cdx'
TODO: more complex example with multiple collections?
"""
cdx_server = create_cdx_server(config, DEFAULT_RULES)
port = config.get('port')
routes = [Route('cdx', CDXHandler(cdx_server))]
return ArchivalRouter(routes, port=port)

View File

@ -1,9 +1,7 @@
import urlparse
import pkgutil import pkgutil
import mimetypes import mimetypes
import time import time
from pywb.cdx.query import CDXQuery
from pywb.framework.basehandlers import BaseHandler, WbUrlHandler from pywb.framework.basehandlers import BaseHandler, WbUrlHandler
from pywb.framework.wbrequestresponse import WbResponse from pywb.framework.wbrequestresponse import WbResponse
from pywb.framework.wbexceptions import WbException, NotFoundException from pywb.framework.wbexceptions import WbException, NotFoundException
@ -58,24 +56,6 @@ class WBHandler(WbUrlHandler):
return 'WBHandler: ' + str(self.index_reader) + ', ' + str(self.replay) return 'WBHandler: ' + str(self.index_reader) + ', ' + str(self.replay)
#=================================================================
# CDX-Server Handler -- pass all params to cdx server
#=================================================================
class CDXHandler(BaseHandler):
def __init__(self, index_reader, view = None):
self.index_reader = index_reader
self.view = view if view else TextCapturesView()
def __call__(self, wbrequest):
params = CDXQuery.extract_params_from_wsgi_env(wbrequest.env)
cdx_lines = self.index_reader.load_cdx(**params)
return self.view.render_response(wbrequest, cdx_lines)
def __str__(self):
return 'Index Reader: ' + str(self.index_reader)
#================================================================= #=================================================================
# Static Content Handler # Static Content Handler
#================================================================= #=================================================================

View File

@ -11,7 +11,8 @@ from views import J2TemplateView, J2HtmlCapturesView
from replay_views import ReplayView from replay_views import ReplayView
from handlers import WBHandler from handlers import WBHandler
from handlers import CDXHandler, StaticHandler from handlers import StaticHandler
from cdx_handler import CDXHandler
from handlers import DebugEchoHandler, DebugEchoEnvHandler from handlers import DebugEchoHandler, DebugEchoEnvHandler
@ -115,6 +116,8 @@ def create_wb_router(passed_config = {}):
hostpaths = config.get('hostpaths') hostpaths = config.get('hostpaths')
port = config.get('port')
# collections based on cdx source # collections based on cdx source
collections = config.get('collections') collections = config.get('collections')
@ -169,6 +172,7 @@ def create_wb_router(passed_config = {}):
# This will help catch occasionally missed rewrites that fall-through to the host # This will help catch occasionally missed rewrites that fall-through to the host
# (See archivalrouter.ReferRedirect) # (See archivalrouter.ReferRedirect)
hostpaths = hostpaths, hostpaths = hostpaths,
port = port,
abs_path = config.get('absolute_paths', True), abs_path = config.get('absolute_paths', True),

View File

@ -9,11 +9,18 @@ from wbrequestresponse import WbRequest, WbResponse
# ArchivalRouter -- route WB requests in archival mode # ArchivalRouter -- route WB requests in archival mode
#================================================================= #=================================================================
class ArchivalRouter(object): class ArchivalRouter(object):
def __init__(self, routes, hostpaths=None, abs_path=True, def __init__(self, routes,
home_view=None, error_view=None): hostpaths=None,
port=None,
abs_path=True,
home_view=None,
error_view=None):
self.routes = routes self.routes = routes
# optional port setting may be ignored by wsgi container
self.port = port
if hostpaths: if hostpaths:
self.fallback = ReferRedirect(hostpaths) self.fallback = ReferRedirect(hostpaths)
else: else:

View File

@ -8,21 +8,31 @@ import urlparse
# http proxy mode support is very simple so far: # http proxy mode support is very simple so far:
# only latest capture is available currently # only latest capture is available currently
#================================================================= #=================================================================
class ProxyArchivalRouter: class ProxyArchivalRouter(ArchivalRouter):
def __init__(self, routes, hostpaths=None, abs_path=True, def __init__(self, routes,
home_view=None, error_view=None): hostpaths=None,
port=None,
abs_path=True,
home_view=None,
error_view=None):
(super(ProxyArchivalRouter, self).
__init__(routes,
hostpaths=hostpaths,
port=port,
abs_path=abs_path,
home_view=home_view,
error_view=error_view))
self.archival = ArchivalRouter(routes, hostpaths, abs_path,
home_view, error_view)
self.proxy = ProxyRouter(routes[0].handler, hostpaths, error_view) self.proxy = ProxyRouter(routes[0].handler, hostpaths, error_view)
self.error_view = error_view #self.error_view = error_view
def __call__(self, env): def __call__(self, env):
response = self.archival(env) response = self.proxy(env)
if response: if response:
return response return response
response = self.proxy(env) response = super(ProxyArchivalRouter, self).__call__(env)
if response: if response:
return response return response

View File

@ -5,17 +5,18 @@ class NotFoundException(WbException):
def status(self): def status(self):
return '404 Not Found' return '404 Not Found'
# Exceptions that effect a specific capture and result in a retry # Exceptions that effect a specific capture and result in a retry
class CaptureException(WbException): class CaptureException(WbException):
def status(self): def status(self):
return '500 Internal Server Error' return '500 Internal Server Error'
class InternalRedirect(WbException): class InternalRedirect(WbException):
def __init__(self, location, status = '302 Internal Redirect'): def __init__(self, location, status='302 Internal Redirect'):
WbException.__init__(self, 'Redirecting -> ' + location) WbException.__init__(self, 'Redirecting -> ' + location)
self.status = status self.status = status
self.httpHeaders = [('Location', location)] self.httpHeaders = [('Location', location)]
def status(self): def status(self):
return self.status return self.status

View File

@ -10,6 +10,8 @@ import importlib
import logging import logging
DEFAULT_PORT = 8080
#================================================================= #=================================================================
# adapted from wsgiref.request_uri, but doesn't include domain name # adapted from wsgiref.request_uri, but doesn't include domain name
# and allows all characters which are allowed in the path segment # and allows all characters which are allowed in the path segment
@ -18,6 +20,7 @@ import logging
# http://stackoverflow.com/questions/4669692/ # http://stackoverflow.com/questions/4669692/
# valid-characters-for-directory-part-of-a-url-for-short-links # valid-characters-for-directory-part-of-a-url-for-short-links
def rel_request_uri(environ, include_query=1): def rel_request_uri(environ, include_query=1):
""" """
Return the requested path, optionally including the query string Return the requested path, optionally including the query string
@ -40,14 +43,21 @@ def rel_request_uri(environ, include_query=1):
#================================================================= #=================================================================
def create_wb_app(wb_router): class WSGIApp(object):
def __init__(self, wb_router):
self.wb_router = wb_router
self.port = DEFAULT_PORT
if hasattr(wb_router, 'port'):
self.port = wb_router.port
# Top-level wsgi application # Top-level wsgi application
def application(env, start_response): def __call__(self, env, start_response):
if env.get('SCRIPT_NAME') or not env.get('REQUEST_URI'): if env.get('SCRIPT_NAME') or not env.get('REQUEST_URI'):
env['REL_REQUEST_URI'] = rel_request_uri(env) env['REL_REQUEST_URI'] = rel_request_uri(env)
else: else:
env['REL_REQUEST_URI'] = env['REQUEST_URI'] env['REL_REQUEST_URI'] = env['REQUEST_URI']
wb_router = self.wb_router
response = None response = None
try: try:
@ -68,8 +78,6 @@ def create_wb_app(wb_router):
return response(env, start_response) return response(env, start_response)
return application
#================================================================= #=================================================================
def handle_exception(env, error_view, exc, print_trace): def handle_exception(env, error_view, exc, print_trace):
@ -126,13 +134,10 @@ def init_app(init_func, load_yaml=True, config_file=None):
msg = '*** pywb app inited with config from "%s"!\n' msg = '*** pywb app inited with config from "%s"!\n'
logging.info(msg, init_func.__name__) logging.info(msg, init_func.__name__)
return create_wb_app(wb_router) return WSGIApp(wb_router)
#================================================================= #=================================================================
DEFAULT_PORT = 8080
def start_wsgi_server(the_app): def start_wsgi_server(the_app):
from wsgiref.simple_server import make_server from wsgiref.simple_server import make_server
from optparse import OptionParser from optparse import OptionParser
@ -144,12 +149,10 @@ def start_wsgi_server(the_app):
port = options.port port = options.port
if port is None: port = the_app.port
try:
config = load_default_config() if not port:
port = config.get('port', DEFAULT_PORT) port = DEFAULT_PORT
except:
port = DEFAULT_PORT
logging.debug('Starting CDX Server on port %s', port) logging.debug('Starting CDX Server on port %s', port)

View File

@ -90,6 +90,9 @@ enable_http_proxy: true
# enable cdx server api for querying cdx directly (experimental) # enable cdx server api for querying cdx directly (experimental)
enable_cdx_api: true enable_cdx_api: true
# test different port
port: 9000
# optional reporter callback func # optional reporter callback func
# if set, called with request and cdx object # if set, called with request and cdx object
reporter: !!python/object/new:tests.fixture.PrintReporter [] reporter: !!python/object/new:tests.fixture.PrintReporter []