mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
- cdx handler refactoring: factor out CDXHandler and init to
seperate cdx_handler module - Make wsgi app a class, add port as an optional field in wsgi app and router. (not required to be specified)
This commit is contained in:
parent
0bf651c2e3
commit
2d4ae62fbe
@ -1,27 +1,14 @@
|
|||||||
from pywb.cdx.cdxserver import create_cdx_server
|
|
||||||
|
|
||||||
from pywb.framework.wsgi_wrappers import init_app, start_wsgi_server
|
from pywb.framework.wsgi_wrappers import init_app, start_wsgi_server
|
||||||
from pywb.framework.archivalrouter import ArchivalRouter, Route
|
|
||||||
|
|
||||||
from pywb.core.handlers import CDXHandler
|
from pywb.core.cdx_handler import create_cdx_server_app
|
||||||
|
|
||||||
DEFAULT_RULES = 'pywb/rules.yaml'
|
#=================================================================
|
||||||
|
# init cdx server app
|
||||||
|
#=================================================================
|
||||||
|
|
||||||
# cdx-server only config
|
# cdx-server only config
|
||||||
DEFAULT_CONFIG = 'pywb/cdx/config.yaml'
|
DEFAULT_CONFIG = 'pywb/cdx/config.yaml'
|
||||||
|
|
||||||
#=================================================================
|
|
||||||
# create simple cdx server under '/cdx' using config file
|
|
||||||
# TODO: support multiple collections like full wayback?
|
|
||||||
|
|
||||||
def create_cdx_server_app(config):
|
|
||||||
cdx_server = create_cdx_server(config, DEFAULT_RULES)
|
|
||||||
routes = [Route('cdx', CDXHandler(cdx_server))]
|
|
||||||
return ArchivalRouter(routes)
|
|
||||||
|
|
||||||
#=================================================================
|
|
||||||
# init pywb app
|
|
||||||
#=================================================================
|
|
||||||
application = init_app(create_cdx_server_app,
|
application = init_app(create_cdx_server_app,
|
||||||
load_yaml=True,
|
load_yaml=True,
|
||||||
config_file=DEFAULT_CONFIG)
|
config_file=DEFAULT_CONFIG)
|
||||||
|
43
pywb/core/cdx_handler.py
Normal file
43
pywb/core/cdx_handler.py
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
from pywb.cdx.query import CDXQuery
|
||||||
|
from pywb.cdx.cdxserver import create_cdx_server
|
||||||
|
|
||||||
|
from pywb.framework.archivalrouter import ArchivalRouter, Route
|
||||||
|
from pywb.framework.basehandlers import BaseHandler
|
||||||
|
|
||||||
|
from views import TextCapturesView
|
||||||
|
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
class CDXHandler(BaseHandler):
|
||||||
|
"""
|
||||||
|
Handler which passes wsgi request to cdx server and
|
||||||
|
returns a text-based cdx response
|
||||||
|
"""
|
||||||
|
def __init__(self, index_reader, view=None):
|
||||||
|
self.index_reader = index_reader
|
||||||
|
self.view = view if view else TextCapturesView()
|
||||||
|
|
||||||
|
def __call__(self, wbrequest):
|
||||||
|
params = CDXQuery.extract_params_from_wsgi_env(wbrequest.env)
|
||||||
|
cdx_lines = self.index_reader.load_cdx(**params)
|
||||||
|
|
||||||
|
return self.view.render_response(wbrequest, cdx_lines)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return 'CDX Handler: ' + str(self.index_reader)
|
||||||
|
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
DEFAULT_RULES = 'pywb/rules.yaml'
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
def create_cdx_server_app(config):
|
||||||
|
"""
|
||||||
|
Create a cdx server config to be wrapped in a wsgi app
|
||||||
|
Currently using single access point '/cdx'
|
||||||
|
TODO: more complex example with multiple collections?
|
||||||
|
"""
|
||||||
|
cdx_server = create_cdx_server(config, DEFAULT_RULES)
|
||||||
|
port = config.get('port')
|
||||||
|
routes = [Route('cdx', CDXHandler(cdx_server))]
|
||||||
|
return ArchivalRouter(routes, port=port)
|
@ -1,9 +1,7 @@
|
|||||||
import urlparse
|
|
||||||
import pkgutil
|
import pkgutil
|
||||||
import mimetypes
|
import mimetypes
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from pywb.cdx.query import CDXQuery
|
|
||||||
from pywb.framework.basehandlers import BaseHandler, WbUrlHandler
|
from pywb.framework.basehandlers import BaseHandler, WbUrlHandler
|
||||||
from pywb.framework.wbrequestresponse import WbResponse
|
from pywb.framework.wbrequestresponse import WbResponse
|
||||||
from pywb.framework.wbexceptions import WbException, NotFoundException
|
from pywb.framework.wbexceptions import WbException, NotFoundException
|
||||||
@ -58,24 +56,6 @@ class WBHandler(WbUrlHandler):
|
|||||||
return 'WBHandler: ' + str(self.index_reader) + ', ' + str(self.replay)
|
return 'WBHandler: ' + str(self.index_reader) + ', ' + str(self.replay)
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
|
||||||
# CDX-Server Handler -- pass all params to cdx server
|
|
||||||
#=================================================================
|
|
||||||
class CDXHandler(BaseHandler):
|
|
||||||
def __init__(self, index_reader, view = None):
|
|
||||||
self.index_reader = index_reader
|
|
||||||
self.view = view if view else TextCapturesView()
|
|
||||||
|
|
||||||
def __call__(self, wbrequest):
|
|
||||||
params = CDXQuery.extract_params_from_wsgi_env(wbrequest.env)
|
|
||||||
cdx_lines = self.index_reader.load_cdx(**params)
|
|
||||||
|
|
||||||
return self.view.render_response(wbrequest, cdx_lines)
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return 'Index Reader: ' + str(self.index_reader)
|
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
# Static Content Handler
|
# Static Content Handler
|
||||||
#=================================================================
|
#=================================================================
|
||||||
|
@ -11,7 +11,8 @@ from views import J2TemplateView, J2HtmlCapturesView
|
|||||||
from replay_views import ReplayView
|
from replay_views import ReplayView
|
||||||
|
|
||||||
from handlers import WBHandler
|
from handlers import WBHandler
|
||||||
from handlers import CDXHandler, StaticHandler
|
from handlers import StaticHandler
|
||||||
|
from cdx_handler import CDXHandler
|
||||||
from handlers import DebugEchoHandler, DebugEchoEnvHandler
|
from handlers import DebugEchoHandler, DebugEchoEnvHandler
|
||||||
|
|
||||||
|
|
||||||
@ -115,6 +116,8 @@ def create_wb_router(passed_config = {}):
|
|||||||
|
|
||||||
hostpaths = config.get('hostpaths')
|
hostpaths = config.get('hostpaths')
|
||||||
|
|
||||||
|
port = config.get('port')
|
||||||
|
|
||||||
# collections based on cdx source
|
# collections based on cdx source
|
||||||
collections = config.get('collections')
|
collections = config.get('collections')
|
||||||
|
|
||||||
@ -169,6 +172,7 @@ def create_wb_router(passed_config = {}):
|
|||||||
# This will help catch occasionally missed rewrites that fall-through to the host
|
# This will help catch occasionally missed rewrites that fall-through to the host
|
||||||
# (See archivalrouter.ReferRedirect)
|
# (See archivalrouter.ReferRedirect)
|
||||||
hostpaths = hostpaths,
|
hostpaths = hostpaths,
|
||||||
|
port = port,
|
||||||
|
|
||||||
abs_path = config.get('absolute_paths', True),
|
abs_path = config.get('absolute_paths', True),
|
||||||
|
|
||||||
|
@ -9,11 +9,18 @@ from wbrequestresponse import WbRequest, WbResponse
|
|||||||
# ArchivalRouter -- route WB requests in archival mode
|
# ArchivalRouter -- route WB requests in archival mode
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class ArchivalRouter(object):
|
class ArchivalRouter(object):
|
||||||
def __init__(self, routes, hostpaths=None, abs_path=True,
|
def __init__(self, routes,
|
||||||
home_view=None, error_view=None):
|
hostpaths=None,
|
||||||
|
port=None,
|
||||||
|
abs_path=True,
|
||||||
|
home_view=None,
|
||||||
|
error_view=None):
|
||||||
|
|
||||||
self.routes = routes
|
self.routes = routes
|
||||||
|
|
||||||
|
# optional port setting may be ignored by wsgi container
|
||||||
|
self.port = port
|
||||||
|
|
||||||
if hostpaths:
|
if hostpaths:
|
||||||
self.fallback = ReferRedirect(hostpaths)
|
self.fallback = ReferRedirect(hostpaths)
|
||||||
else:
|
else:
|
||||||
|
@ -8,21 +8,31 @@ import urlparse
|
|||||||
# http proxy mode support is very simple so far:
|
# http proxy mode support is very simple so far:
|
||||||
# only latest capture is available currently
|
# only latest capture is available currently
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class ProxyArchivalRouter:
|
class ProxyArchivalRouter(ArchivalRouter):
|
||||||
def __init__(self, routes, hostpaths=None, abs_path=True,
|
def __init__(self, routes,
|
||||||
home_view=None, error_view=None):
|
hostpaths=None,
|
||||||
|
port=None,
|
||||||
|
abs_path=True,
|
||||||
|
home_view=None,
|
||||||
|
error_view=None):
|
||||||
|
|
||||||
|
(super(ProxyArchivalRouter, self).
|
||||||
|
__init__(routes,
|
||||||
|
hostpaths=hostpaths,
|
||||||
|
port=port,
|
||||||
|
abs_path=abs_path,
|
||||||
|
home_view=home_view,
|
||||||
|
error_view=error_view))
|
||||||
|
|
||||||
self.archival = ArchivalRouter(routes, hostpaths, abs_path,
|
|
||||||
home_view, error_view)
|
|
||||||
self.proxy = ProxyRouter(routes[0].handler, hostpaths, error_view)
|
self.proxy = ProxyRouter(routes[0].handler, hostpaths, error_view)
|
||||||
self.error_view = error_view
|
#self.error_view = error_view
|
||||||
|
|
||||||
def __call__(self, env):
|
def __call__(self, env):
|
||||||
response = self.archival(env)
|
response = self.proxy(env)
|
||||||
if response:
|
if response:
|
||||||
return response
|
return response
|
||||||
|
|
||||||
response = self.proxy(env)
|
response = super(ProxyArchivalRouter, self).__call__(env)
|
||||||
if response:
|
if response:
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
@ -5,17 +5,18 @@ class NotFoundException(WbException):
|
|||||||
def status(self):
|
def status(self):
|
||||||
return '404 Not Found'
|
return '404 Not Found'
|
||||||
|
|
||||||
|
|
||||||
# Exceptions that effect a specific capture and result in a retry
|
# Exceptions that effect a specific capture and result in a retry
|
||||||
class CaptureException(WbException):
|
class CaptureException(WbException):
|
||||||
def status(self):
|
def status(self):
|
||||||
return '500 Internal Server Error'
|
return '500 Internal Server Error'
|
||||||
|
|
||||||
|
|
||||||
class InternalRedirect(WbException):
|
class InternalRedirect(WbException):
|
||||||
def __init__(self, location, status = '302 Internal Redirect'):
|
def __init__(self, location, status='302 Internal Redirect'):
|
||||||
WbException.__init__(self, 'Redirecting -> ' + location)
|
WbException.__init__(self, 'Redirecting -> ' + location)
|
||||||
self.status = status
|
self.status = status
|
||||||
self.httpHeaders = [('Location', location)]
|
self.httpHeaders = [('Location', location)]
|
||||||
|
|
||||||
def status(self):
|
def status(self):
|
||||||
return self.status
|
return self.status
|
||||||
|
|
||||||
|
@ -10,6 +10,8 @@ import importlib
|
|||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
|
||||||
|
DEFAULT_PORT = 8080
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
# adapted from wsgiref.request_uri, but doesn't include domain name
|
# adapted from wsgiref.request_uri, but doesn't include domain name
|
||||||
# and allows all characters which are allowed in the path segment
|
# and allows all characters which are allowed in the path segment
|
||||||
@ -18,6 +20,7 @@ import logging
|
|||||||
# http://stackoverflow.com/questions/4669692/
|
# http://stackoverflow.com/questions/4669692/
|
||||||
# valid-characters-for-directory-part-of-a-url-for-short-links
|
# valid-characters-for-directory-part-of-a-url-for-short-links
|
||||||
|
|
||||||
|
|
||||||
def rel_request_uri(environ, include_query=1):
|
def rel_request_uri(environ, include_query=1):
|
||||||
"""
|
"""
|
||||||
Return the requested path, optionally including the query string
|
Return the requested path, optionally including the query string
|
||||||
@ -40,14 +43,21 @@ def rel_request_uri(environ, include_query=1):
|
|||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
def create_wb_app(wb_router):
|
class WSGIApp(object):
|
||||||
|
def __init__(self, wb_router):
|
||||||
|
self.wb_router = wb_router
|
||||||
|
self.port = DEFAULT_PORT
|
||||||
|
if hasattr(wb_router, 'port'):
|
||||||
|
self.port = wb_router.port
|
||||||
|
|
||||||
# Top-level wsgi application
|
# Top-level wsgi application
|
||||||
def application(env, start_response):
|
def __call__(self, env, start_response):
|
||||||
if env.get('SCRIPT_NAME') or not env.get('REQUEST_URI'):
|
if env.get('SCRIPT_NAME') or not env.get('REQUEST_URI'):
|
||||||
env['REL_REQUEST_URI'] = rel_request_uri(env)
|
env['REL_REQUEST_URI'] = rel_request_uri(env)
|
||||||
else:
|
else:
|
||||||
env['REL_REQUEST_URI'] = env['REQUEST_URI']
|
env['REL_REQUEST_URI'] = env['REQUEST_URI']
|
||||||
|
|
||||||
|
wb_router = self.wb_router
|
||||||
response = None
|
response = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -68,8 +78,6 @@ def create_wb_app(wb_router):
|
|||||||
|
|
||||||
return response(env, start_response)
|
return response(env, start_response)
|
||||||
|
|
||||||
return application
|
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
def handle_exception(env, error_view, exc, print_trace):
|
def handle_exception(env, error_view, exc, print_trace):
|
||||||
@ -126,13 +134,10 @@ def init_app(init_func, load_yaml=True, config_file=None):
|
|||||||
msg = '*** pywb app inited with config from "%s"!\n'
|
msg = '*** pywb app inited with config from "%s"!\n'
|
||||||
logging.info(msg, init_func.__name__)
|
logging.info(msg, init_func.__name__)
|
||||||
|
|
||||||
return create_wb_app(wb_router)
|
return WSGIApp(wb_router)
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
DEFAULT_PORT = 8080
|
|
||||||
|
|
||||||
|
|
||||||
def start_wsgi_server(the_app):
|
def start_wsgi_server(the_app):
|
||||||
from wsgiref.simple_server import make_server
|
from wsgiref.simple_server import make_server
|
||||||
from optparse import OptionParser
|
from optparse import OptionParser
|
||||||
@ -144,12 +149,10 @@ def start_wsgi_server(the_app):
|
|||||||
|
|
||||||
port = options.port
|
port = options.port
|
||||||
|
|
||||||
if port is None:
|
port = the_app.port
|
||||||
try:
|
|
||||||
config = load_default_config()
|
if not port:
|
||||||
port = config.get('port', DEFAULT_PORT)
|
port = DEFAULT_PORT
|
||||||
except:
|
|
||||||
port = DEFAULT_PORT
|
|
||||||
|
|
||||||
logging.debug('Starting CDX Server on port %s', port)
|
logging.debug('Starting CDX Server on port %s', port)
|
||||||
|
|
||||||
|
@ -90,6 +90,9 @@ enable_http_proxy: true
|
|||||||
# enable cdx server api for querying cdx directly (experimental)
|
# enable cdx server api for querying cdx directly (experimental)
|
||||||
enable_cdx_api: true
|
enable_cdx_api: true
|
||||||
|
|
||||||
|
# test different port
|
||||||
|
port: 9000
|
||||||
|
|
||||||
# optional reporter callback func
|
# optional reporter callback func
|
||||||
# if set, called with request and cdx object
|
# if set, called with request and cdx object
|
||||||
reporter: !!python/object/new:tests.fixture.PrintReporter []
|
reporter: !!python/object/new:tests.fixture.PrintReporter []
|
||||||
|
Loading…
x
Reference in New Issue
Block a user