mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
- cdx handler refactoring: factor out CDXHandler and init to
seperate cdx_handler module - Make wsgi app a class, add port as an optional field in wsgi app and router. (not required to be specified)
This commit is contained in:
parent
0bf651c2e3
commit
2d4ae62fbe
@ -1,27 +1,14 @@
|
||||
from pywb.cdx.cdxserver import create_cdx_server
|
||||
|
||||
from pywb.framework.wsgi_wrappers import init_app, start_wsgi_server
|
||||
from pywb.framework.archivalrouter import ArchivalRouter, Route
|
||||
|
||||
from pywb.core.handlers import CDXHandler
|
||||
from pywb.core.cdx_handler import create_cdx_server_app
|
||||
|
||||
DEFAULT_RULES = 'pywb/rules.yaml'
|
||||
#=================================================================
|
||||
# init cdx server app
|
||||
#=================================================================
|
||||
|
||||
# cdx-server only config
|
||||
DEFAULT_CONFIG = 'pywb/cdx/config.yaml'
|
||||
|
||||
#=================================================================
|
||||
# create simple cdx server under '/cdx' using config file
|
||||
# TODO: support multiple collections like full wayback?
|
||||
|
||||
def create_cdx_server_app(config):
|
||||
cdx_server = create_cdx_server(config, DEFAULT_RULES)
|
||||
routes = [Route('cdx', CDXHandler(cdx_server))]
|
||||
return ArchivalRouter(routes)
|
||||
|
||||
#=================================================================
|
||||
# init pywb app
|
||||
#=================================================================
|
||||
application = init_app(create_cdx_server_app,
|
||||
load_yaml=True,
|
||||
config_file=DEFAULT_CONFIG)
|
||||
|
43
pywb/core/cdx_handler.py
Normal file
43
pywb/core/cdx_handler.py
Normal file
@ -0,0 +1,43 @@
|
||||
from pywb.cdx.query import CDXQuery
|
||||
from pywb.cdx.cdxserver import create_cdx_server
|
||||
|
||||
from pywb.framework.archivalrouter import ArchivalRouter, Route
|
||||
from pywb.framework.basehandlers import BaseHandler
|
||||
|
||||
from views import TextCapturesView
|
||||
|
||||
|
||||
#=================================================================
|
||||
class CDXHandler(BaseHandler):
|
||||
"""
|
||||
Handler which passes wsgi request to cdx server and
|
||||
returns a text-based cdx response
|
||||
"""
|
||||
def __init__(self, index_reader, view=None):
|
||||
self.index_reader = index_reader
|
||||
self.view = view if view else TextCapturesView()
|
||||
|
||||
def __call__(self, wbrequest):
|
||||
params = CDXQuery.extract_params_from_wsgi_env(wbrequest.env)
|
||||
cdx_lines = self.index_reader.load_cdx(**params)
|
||||
|
||||
return self.view.render_response(wbrequest, cdx_lines)
|
||||
|
||||
def __str__(self):
|
||||
return 'CDX Handler: ' + str(self.index_reader)
|
||||
|
||||
|
||||
#=================================================================
|
||||
DEFAULT_RULES = 'pywb/rules.yaml'
|
||||
|
||||
#=================================================================
|
||||
def create_cdx_server_app(config):
|
||||
"""
|
||||
Create a cdx server config to be wrapped in a wsgi app
|
||||
Currently using single access point '/cdx'
|
||||
TODO: more complex example with multiple collections?
|
||||
"""
|
||||
cdx_server = create_cdx_server(config, DEFAULT_RULES)
|
||||
port = config.get('port')
|
||||
routes = [Route('cdx', CDXHandler(cdx_server))]
|
||||
return ArchivalRouter(routes, port=port)
|
@ -1,9 +1,7 @@
|
||||
import urlparse
|
||||
import pkgutil
|
||||
import mimetypes
|
||||
import time
|
||||
|
||||
from pywb.cdx.query import CDXQuery
|
||||
from pywb.framework.basehandlers import BaseHandler, WbUrlHandler
|
||||
from pywb.framework.wbrequestresponse import WbResponse
|
||||
from pywb.framework.wbexceptions import WbException, NotFoundException
|
||||
@ -58,24 +56,6 @@ class WBHandler(WbUrlHandler):
|
||||
return 'WBHandler: ' + str(self.index_reader) + ', ' + str(self.replay)
|
||||
|
||||
|
||||
#=================================================================
|
||||
# CDX-Server Handler -- pass all params to cdx server
|
||||
#=================================================================
|
||||
class CDXHandler(BaseHandler):
|
||||
def __init__(self, index_reader, view = None):
|
||||
self.index_reader = index_reader
|
||||
self.view = view if view else TextCapturesView()
|
||||
|
||||
def __call__(self, wbrequest):
|
||||
params = CDXQuery.extract_params_from_wsgi_env(wbrequest.env)
|
||||
cdx_lines = self.index_reader.load_cdx(**params)
|
||||
|
||||
return self.view.render_response(wbrequest, cdx_lines)
|
||||
|
||||
def __str__(self):
|
||||
return 'Index Reader: ' + str(self.index_reader)
|
||||
|
||||
|
||||
#=================================================================
|
||||
# Static Content Handler
|
||||
#=================================================================
|
||||
|
@ -11,7 +11,8 @@ from views import J2TemplateView, J2HtmlCapturesView
|
||||
from replay_views import ReplayView
|
||||
|
||||
from handlers import WBHandler
|
||||
from handlers import CDXHandler, StaticHandler
|
||||
from handlers import StaticHandler
|
||||
from cdx_handler import CDXHandler
|
||||
from handlers import DebugEchoHandler, DebugEchoEnvHandler
|
||||
|
||||
|
||||
@ -115,6 +116,8 @@ def create_wb_router(passed_config = {}):
|
||||
|
||||
hostpaths = config.get('hostpaths')
|
||||
|
||||
port = config.get('port')
|
||||
|
||||
# collections based on cdx source
|
||||
collections = config.get('collections')
|
||||
|
||||
@ -169,6 +172,7 @@ def create_wb_router(passed_config = {}):
|
||||
# This will help catch occasionally missed rewrites that fall-through to the host
|
||||
# (See archivalrouter.ReferRedirect)
|
||||
hostpaths = hostpaths,
|
||||
port = port,
|
||||
|
||||
abs_path = config.get('absolute_paths', True),
|
||||
|
||||
|
@ -9,11 +9,18 @@ from wbrequestresponse import WbRequest, WbResponse
|
||||
# ArchivalRouter -- route WB requests in archival mode
|
||||
#=================================================================
|
||||
class ArchivalRouter(object):
|
||||
def __init__(self, routes, hostpaths=None, abs_path=True,
|
||||
home_view=None, error_view=None):
|
||||
def __init__(self, routes,
|
||||
hostpaths=None,
|
||||
port=None,
|
||||
abs_path=True,
|
||||
home_view=None,
|
||||
error_view=None):
|
||||
|
||||
self.routes = routes
|
||||
|
||||
# optional port setting may be ignored by wsgi container
|
||||
self.port = port
|
||||
|
||||
if hostpaths:
|
||||
self.fallback = ReferRedirect(hostpaths)
|
||||
else:
|
||||
|
@ -8,21 +8,31 @@ import urlparse
|
||||
# http proxy mode support is very simple so far:
|
||||
# only latest capture is available currently
|
||||
#=================================================================
|
||||
class ProxyArchivalRouter:
|
||||
def __init__(self, routes, hostpaths=None, abs_path=True,
|
||||
home_view=None, error_view=None):
|
||||
class ProxyArchivalRouter(ArchivalRouter):
|
||||
def __init__(self, routes,
|
||||
hostpaths=None,
|
||||
port=None,
|
||||
abs_path=True,
|
||||
home_view=None,
|
||||
error_view=None):
|
||||
|
||||
(super(ProxyArchivalRouter, self).
|
||||
__init__(routes,
|
||||
hostpaths=hostpaths,
|
||||
port=port,
|
||||
abs_path=abs_path,
|
||||
home_view=home_view,
|
||||
error_view=error_view))
|
||||
|
||||
self.archival = ArchivalRouter(routes, hostpaths, abs_path,
|
||||
home_view, error_view)
|
||||
self.proxy = ProxyRouter(routes[0].handler, hostpaths, error_view)
|
||||
self.error_view = error_view
|
||||
#self.error_view = error_view
|
||||
|
||||
def __call__(self, env):
|
||||
response = self.archival(env)
|
||||
response = self.proxy(env)
|
||||
if response:
|
||||
return response
|
||||
|
||||
response = self.proxy(env)
|
||||
response = super(ProxyArchivalRouter, self).__call__(env)
|
||||
if response:
|
||||
return response
|
||||
|
||||
|
@ -5,17 +5,18 @@ class NotFoundException(WbException):
|
||||
def status(self):
|
||||
return '404 Not Found'
|
||||
|
||||
|
||||
# Exceptions that effect a specific capture and result in a retry
|
||||
class CaptureException(WbException):
|
||||
def status(self):
|
||||
return '500 Internal Server Error'
|
||||
|
||||
|
||||
class InternalRedirect(WbException):
|
||||
def __init__(self, location, status = '302 Internal Redirect'):
|
||||
def __init__(self, location, status='302 Internal Redirect'):
|
||||
WbException.__init__(self, 'Redirecting -> ' + location)
|
||||
self.status = status
|
||||
self.httpHeaders = [('Location', location)]
|
||||
|
||||
def status(self):
|
||||
return self.status
|
||||
|
||||
|
@ -10,6 +10,8 @@ import importlib
|
||||
import logging
|
||||
|
||||
|
||||
DEFAULT_PORT = 8080
|
||||
|
||||
#=================================================================
|
||||
# adapted from wsgiref.request_uri, but doesn't include domain name
|
||||
# and allows all characters which are allowed in the path segment
|
||||
@ -18,6 +20,7 @@ import logging
|
||||
# http://stackoverflow.com/questions/4669692/
|
||||
# valid-characters-for-directory-part-of-a-url-for-short-links
|
||||
|
||||
|
||||
def rel_request_uri(environ, include_query=1):
|
||||
"""
|
||||
Return the requested path, optionally including the query string
|
||||
@ -40,14 +43,21 @@ def rel_request_uri(environ, include_query=1):
|
||||
|
||||
|
||||
#=================================================================
|
||||
def create_wb_app(wb_router):
|
||||
class WSGIApp(object):
|
||||
def __init__(self, wb_router):
|
||||
self.wb_router = wb_router
|
||||
self.port = DEFAULT_PORT
|
||||
if hasattr(wb_router, 'port'):
|
||||
self.port = wb_router.port
|
||||
|
||||
# Top-level wsgi application
|
||||
def application(env, start_response):
|
||||
def __call__(self, env, start_response):
|
||||
if env.get('SCRIPT_NAME') or not env.get('REQUEST_URI'):
|
||||
env['REL_REQUEST_URI'] = rel_request_uri(env)
|
||||
else:
|
||||
env['REL_REQUEST_URI'] = env['REQUEST_URI']
|
||||
|
||||
wb_router = self.wb_router
|
||||
response = None
|
||||
|
||||
try:
|
||||
@ -68,8 +78,6 @@ def create_wb_app(wb_router):
|
||||
|
||||
return response(env, start_response)
|
||||
|
||||
return application
|
||||
|
||||
|
||||
#=================================================================
|
||||
def handle_exception(env, error_view, exc, print_trace):
|
||||
@ -126,13 +134,10 @@ def init_app(init_func, load_yaml=True, config_file=None):
|
||||
msg = '*** pywb app inited with config from "%s"!\n'
|
||||
logging.info(msg, init_func.__name__)
|
||||
|
||||
return create_wb_app(wb_router)
|
||||
return WSGIApp(wb_router)
|
||||
|
||||
|
||||
#=================================================================
|
||||
DEFAULT_PORT = 8080
|
||||
|
||||
|
||||
def start_wsgi_server(the_app):
|
||||
from wsgiref.simple_server import make_server
|
||||
from optparse import OptionParser
|
||||
@ -144,12 +149,10 @@ def start_wsgi_server(the_app):
|
||||
|
||||
port = options.port
|
||||
|
||||
if port is None:
|
||||
try:
|
||||
config = load_default_config()
|
||||
port = config.get('port', DEFAULT_PORT)
|
||||
except:
|
||||
port = DEFAULT_PORT
|
||||
port = the_app.port
|
||||
|
||||
if not port:
|
||||
port = DEFAULT_PORT
|
||||
|
||||
logging.debug('Starting CDX Server on port %s', port)
|
||||
|
||||
|
@ -90,6 +90,9 @@ enable_http_proxy: true
|
||||
# enable cdx server api for querying cdx directly (experimental)
|
||||
enable_cdx_api: true
|
||||
|
||||
# test different port
|
||||
port: 9000
|
||||
|
||||
# optional reporter callback func
|
||||
# if set, called with request and cdx object
|
||||
reporter: !!python/object/new:tests.fixture.PrintReporter []
|
||||
|
Loading…
x
Reference in New Issue
Block a user