mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
live handler: allow live rewrite handler to be specified as one of the collections in pywb
by settings index_paths to '$liveweb'. When used, creates a RewriteHandler instead of WBHandler Can also specify 'proxyhostport' to set the live rewrite to go through a proxy fallback: allow fallback to a different handler (usually live rewrite) by specifying 'redir_fallback' with name of handler. Instead of 404, a not found response will internally call the fallback handler to get a response
This commit is contained in:
parent
b785cd6f08
commit
6da27789eb
@ -25,6 +25,8 @@ class LiveRewriter(object):
|
||||
self.default_proxy = default_proxy
|
||||
if self.default_proxy:
|
||||
logging.debug('Live Rewrite via proxy ' + self.default_proxy)
|
||||
else:
|
||||
logging.debug('Live Rewrite Direct (no proxy)')
|
||||
|
||||
def fetch_local_file(self, uri):
|
||||
fh = open(uri)
|
||||
@ -148,7 +150,8 @@ class LiveRewriter(object):
|
||||
'timestamp': timestamp,
|
||||
'original': url,
|
||||
'statuscode': status_headers.get_statuscode(),
|
||||
'mimetype': status_headers.get_header('Content-Type')
|
||||
'mimetype': status_headers.get_header('Content-Type'),
|
||||
'is_live': True,
|
||||
}
|
||||
|
||||
result = (self.rewriter.
|
||||
|
@ -25,7 +25,7 @@ class CDXAPIHandler(BaseHandler):
|
||||
return WbResponse.text_stream(cdx_iter)
|
||||
|
||||
def __str__(self):
|
||||
return 'CDX Handler: ' + str(self.index_handler)
|
||||
return 'CDX Index Handler'
|
||||
|
||||
@staticmethod
|
||||
def extract_params_from_wsgi_env(env):
|
||||
|
@ -14,7 +14,7 @@ from pywb.framework.wbrequestresponse import WbResponse
|
||||
#=================================================================
|
||||
class WBHandler(WbUrlHandler):
|
||||
def __init__(self, index_reader, replay,
|
||||
search_view=None, config=None):
|
||||
search_view=None, config=None, handler_dict=None):
|
||||
|
||||
self.index_reader = index_reader
|
||||
|
||||
@ -22,24 +22,45 @@ class WBHandler(WbUrlHandler):
|
||||
|
||||
self.search_view = search_view
|
||||
|
||||
self.fallback_handler = None
|
||||
|
||||
if handler_dict:
|
||||
fallback = config.get('redir_fallback')
|
||||
if fallback:
|
||||
self.fallback_handler = handler_dict.get(fallback)
|
||||
|
||||
def __call__(self, wbrequest):
|
||||
if wbrequest.wb_url_str == '/':
|
||||
return self.render_search_page(wbrequest)
|
||||
|
||||
with PerfTimer(wbrequest.env.get('X_PERF'), 'query') as t:
|
||||
response = self.index_reader.load_for_request(wbrequest)
|
||||
try:
|
||||
with PerfTimer(wbrequest.env.get('X_PERF'), 'query') as t:
|
||||
response = self.index_reader.load_for_request(wbrequest)
|
||||
except NotFoundException as nfe:
|
||||
return self.handle_not_found(wbrequest, nfe)
|
||||
|
||||
if isinstance(response, WbResponse):
|
||||
return response
|
||||
|
||||
cdx_lines = response[0]
|
||||
cdx_callback = response[1]
|
||||
cdx_lines, cdx_callback = response
|
||||
return self.handle_replay(wbrequest, cdx_lines, cdx_callback)
|
||||
|
||||
def handle_replay(self, wbrequest, cdx_lines, cdx_callback):
|
||||
with PerfTimer(wbrequest.env.get('X_PERF'), 'replay') as t:
|
||||
return self.replay(wbrequest,
|
||||
cdx_lines,
|
||||
cdx_callback)
|
||||
|
||||
def handle_not_found(self, wbrequest, nfe):
|
||||
if (not self.fallback_handler or
|
||||
wbrequest.wb_url.is_query() or
|
||||
wbrequest.wb_url.is_identity):
|
||||
raise
|
||||
|
||||
return self.fallback_handler(wbrequest)
|
||||
#new_url = (self.redir_fallback + wbrequest.wb_url.to_str(timestamp=''))
|
||||
#return WbResponse.redir_response(new_url)
|
||||
|
||||
def render_search_page(self, wbrequest, **kwargs):
|
||||
if self.search_view:
|
||||
return self.search_view.render_response(wbrequest=wbrequest,
|
||||
|
@ -15,6 +15,9 @@ class RewriteHandler(WbUrlHandler):
|
||||
def __call__(self, wbrequest):
|
||||
return self.rewrite_view(wbrequest)
|
||||
|
||||
def __str__(self):
|
||||
return 'Live Web Rewrite Handler'
|
||||
|
||||
|
||||
#=================================================================
|
||||
def create_live_rewriter_app(config={}):
|
||||
|
@ -13,6 +13,7 @@ from views import J2TemplateView, add_env_globals
|
||||
from views import J2HtmlCapturesView, HeadInsertView
|
||||
|
||||
from replay_views import ReplayView
|
||||
from live_rewrite_handler import RewriteHandler
|
||||
|
||||
from query_handler import QueryHandler
|
||||
from handlers import WBHandler
|
||||
@ -61,7 +62,7 @@ class DictChain:
|
||||
|
||||
|
||||
#=================================================================
|
||||
def create_wb_handler(query_handler, config):
|
||||
def create_wb_handler(query_handler, config, handler_dict={}):
|
||||
|
||||
cookie_maker = config.get('cookie_maker')
|
||||
record_loader = ArcWarcRecordLoader(cookie_maker=cookie_maker)
|
||||
@ -88,29 +89,40 @@ def create_wb_handler(query_handler, config):
|
||||
replayer,
|
||||
search_view=search_view,
|
||||
config=config,
|
||||
handler_dict=handler_dict,
|
||||
)
|
||||
|
||||
return wb_handler
|
||||
|
||||
|
||||
#=================================================================
|
||||
def init_collection(value, config):
|
||||
def create_live_handler(config):
|
||||
live_handler = RewriteHandler(config)
|
||||
return live_handler
|
||||
|
||||
|
||||
#=================================================================
|
||||
def init_route_config(value, config):
|
||||
if isinstance(value, str):
|
||||
value = {'index_paths': value}
|
||||
value = dict(index_paths=value)
|
||||
|
||||
route_config = DictChain(value, config)
|
||||
return route_config
|
||||
|
||||
|
||||
#=================================================================
|
||||
def init_collection(route_config):
|
||||
ds_rules_file = route_config.get('domain_specific_rules', None)
|
||||
|
||||
html_view = (J2HtmlCapturesView.
|
||||
create_template(config.get('query_html'),
|
||||
create_template(route_config.get('query_html'),
|
||||
'Captures Page'))
|
||||
|
||||
query_handler = QueryHandler.init_from_config(route_config,
|
||||
ds_rules_file,
|
||||
html_view)
|
||||
|
||||
return route_config, query_handler
|
||||
return query_handler
|
||||
|
||||
|
||||
#=================================================================
|
||||
@ -139,8 +151,8 @@ def create_cdx_server_app(passed_config):
|
||||
routes = []
|
||||
|
||||
for name, value in collections.iteritems():
|
||||
result = init_collection(value, config)
|
||||
route_config, query_handler = result
|
||||
route_config = init_route_config(value, config)
|
||||
query_handler = init_collection(route_config)
|
||||
|
||||
cdx_api_suffix = route_config.get('enable_cdx_api', True)
|
||||
|
||||
@ -173,23 +185,33 @@ def create_wb_router(passed_config={}):
|
||||
else:
|
||||
request_class = WbRequest
|
||||
|
||||
#if config.get('use_lxml_parser', False):
|
||||
# use_lxml_parser()
|
||||
# store live and replay handlers
|
||||
handler_dict = {}
|
||||
|
||||
for name, value in collections.iteritems():
|
||||
|
||||
if isinstance(value, BaseHandler):
|
||||
handler_dict[name] = value
|
||||
routes.append(Route(name, value))
|
||||
continue
|
||||
|
||||
result = init_collection(value, config)
|
||||
route_config, query_handler = result
|
||||
route_config = init_route_config(value, config)
|
||||
|
||||
if route_config.get('index_paths') == '$liveweb':
|
||||
live = create_live_handler(route_config)
|
||||
handler_dict[name] = live
|
||||
routes.append(Route(name, live))
|
||||
continue
|
||||
|
||||
query_handler = init_collection(route_config)
|
||||
|
||||
wb_handler = create_wb_handler(
|
||||
query_handler=query_handler,
|
||||
config=route_config
|
||||
config=route_config,
|
||||
handler_dict=handler_dict,
|
||||
)
|
||||
|
||||
handler_dict[name] = wb_handler
|
||||
|
||||
logging.debug('Adding Collection: ' + name)
|
||||
|
||||
route_class = route_config.get('route_class', Route)
|
||||
|
Loading…
x
Reference in New Issue
Block a user