1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-26 07:49:24 +01:00
pywb/pywb/webapp/pywb_init.py

274 lines
8.1 KiB
Python

from pywb.utils.dsrules import DEFAULT_RULES_FILE
from pywb.framework.archivalrouter import ArchivalRouter, Route
from pywb.framework.proxy import ProxyArchivalRouter
from pywb.framework.wbrequestresponse import WbRequest
from pywb.framework.memento import MementoRequest
from pywb.framework.basehandlers import BaseHandler
from views import J2TemplateView, add_env_globals
from views import J2HtmlCapturesView, HeadInsertView
from live_rewrite_handler import RewriteHandler
from query_handler import QueryHandler
from handlers import WBHandler
from handlers import StaticHandler
from handlers import DebugEchoHandler, DebugEchoEnvHandler
from cdx_api_handler import CDXAPIHandler
import os
import logging
#=================================================================
DEFAULTS = {
'hostpaths': ['http://localhost:8080'],
'collections': {'pywb': './sample_archive/cdx/'},
'archive_paths': './sample_archive/warcs/',
'head_insert_html': 'ui/head_insert.html',
'query_html': 'ui/query.html',
'search_html': 'ui/search.html',
'home_html': 'ui/index.html',
'error_html': 'ui/error.html',
'proxy_select_html': 'ui/proxy_select.html',
'proxy_cert_download_html': 'ui/proxy_cert_download.html',
'template_globals': {'static_path': 'static/default'},
'static_routes': {'static/default': 'pywb/static/'},
'domain_specific_rules': DEFAULT_RULES_FILE,
'enable_memento': True
}
#=================================================================
class DictChain:
def __init__(self, *dicts):
self.dicts = dicts
def get(self, key, default_val=None):
for d in self.dicts:
val = d.get(key)
if val is not None:
return val
return default_val
#=================================================================
def create_wb_handler(query_handler, config):
wb_handler_class = config.get('wb_handler_class', WBHandler)
wb_handler = wb_handler_class(
query_handler,
config=config,
)
return wb_handler
#=================================================================
def create_live_handler(config):
wb_handler_class = config.get('wb_handler_class', RewriteHandler)
live_handler = wb_handler_class(config)
return live_handler
#=================================================================
def init_route_config(value, config):
if isinstance(value, str) or isinstance(value, list):
value = dict(index_paths=value)
route_config = DictChain(value, config)
return route_config
#=================================================================
def init_collection(route_config):
ds_rules_file = route_config.get('domain_specific_rules', None)
html_view = (J2HtmlCapturesView.
create_template(route_config.get('query_html'),
'Captures Page'))
server_cls = route_config.get('server_cls')
query_handler = QueryHandler.init_from_config(route_config,
ds_rules_file,
html_view,
server_cls)
return query_handler
#=================================================================
def add_cdx_api_handler(name, cdx_api_suffix, routes, query_handler):
# if bool, use -cdx suffix, else use custom string
# as the suffix
if isinstance(cdx_api_suffix, bool):
name += '-cdx'
else:
name += str(cdx_api_suffix)
routes.append(Route(name, CDXAPIHandler(query_handler)))
#=================================================================
def create_cdx_server_app(passed_config):
"""
Create a cdx server api-only app
For each collection, create a /<coll>-cdx access point
which follows the cdx api
"""
config = DictChain(passed_config, DEFAULTS)
collections = config.get('collections')
routes = []
for name, value in collections.iteritems():
route_config = init_route_config(value, config)
query_handler = init_collection(route_config)
cdx_api_suffix = route_config.get('enable_cdx_api', True)
add_cdx_api_handler(name, cdx_api_suffix, routes, query_handler)
return ArchivalRouter(routes)
#=================================================================
def create_wb_router(passed_config={}):
config = DictChain(passed_config, DEFAULTS)
routes = []
# TODO: examine this more
hostname = os.environ.get('PYWB_HOST_NAME')
if hostname:
hostpaths = [hostname]
else:
hostpaths = config.get('hostpaths')
port = config.get('port')
# collections based on cdx source
collections = config.get('collections')
if config.get('enable_memento', False):
request_class = MementoRequest
else:
request_class = WbRequest
# store live and replay handlers
handler_dict = {}
# setup template globals
template_globals = config.get('template_globals')
if template_globals:
add_env_globals(template_globals)
for name, value in collections.iteritems():
if isinstance(value, BaseHandler):
handler_dict[name] = value
routes.append(Route(name, value))
continue
route_config = init_route_config(value, config)
if route_config.get('index_paths') == '$liveweb':
live = create_live_handler(route_config)
handler_dict[name] = live
routes.append(Route(name, live))
continue
query_handler = init_collection(route_config)
wb_handler = create_wb_handler(
query_handler=query_handler,
config=route_config,
)
handler_dict[name] = wb_handler
logging.debug('Adding Collection: ' + name)
route_class = route_config.get('route_class', Route)
routes.append(route_class(name, wb_handler,
config=route_config,
request_class=request_class))
# cdx query handler
cdx_api_suffix = route_config.get('enable_cdx_api', False)
if cdx_api_suffix:
add_cdx_api_handler(name, cdx_api_suffix, routes, query_handler)
if config.get('debug_echo_env', False):
routes.append(Route('echo_env', DebugEchoEnvHandler()))
if config.get('debug_echo_req', False):
routes.append(Route('echo_req', DebugEchoHandler()))
static_routes = config.get('static_routes')
for static_name, static_path in static_routes.iteritems():
routes.append(Route(static_name, StaticHandler(static_path)))
# resolve any cross handler references
for route in routes:
if hasattr(route.handler, 'resolve_refs'):
route.handler.resolve_refs(handler_dict)
# default to regular archival mode
router = ArchivalRouter
if config.get('enable_http_proxy', False):
router = ProxyArchivalRouter
view = J2TemplateView.create_template(
config.get('proxy_select_html'),
'Proxy Coll Selector')
if not 'proxy_options' in passed_config:
passed_config['proxy_options'] = {}
if view:
passed_config['proxy_options']['proxy_select_view'] = view
view = J2TemplateView.create_template(
config.get('proxy_cert_download_html'),
'Proxy Cert Download')
if view:
passed_config['proxy_options']['proxy_cert_download_view'] = view
# Finally, create wb router
return router(
routes,
# Specify hostnames that pywb will be running on
# This will help catch occasionally missed rewrites that
# fall-through to the host
# (See archivalrouter.ReferRedirect)
hostpaths=hostpaths,
port=port,
abs_path=config.get('absolute_paths', True),
home_view=J2TemplateView.create_template(config.get('home_html'),
'Home Page'),
error_view=J2TemplateView.create_template(config.get('error_html'),
'Error Page'),
config=config
)