mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
live handler: allow live rewrite handler to be specified as one of the collections in pywb
by settings index_paths to '$liveweb'. When used, creates a RewriteHandler instead of WBHandler Can also specify 'proxyhostport' to set the live rewrite to go through a proxy fallback: allow fallback to a different handler (usually live rewrite) by specifying 'redir_fallback' with name of handler. Instead of 404, a not found response will internally call the fallback handler to get a response
This commit is contained in:
parent
b785cd6f08
commit
6da27789eb
@ -25,6 +25,8 @@ class LiveRewriter(object):
|
|||||||
self.default_proxy = default_proxy
|
self.default_proxy = default_proxy
|
||||||
if self.default_proxy:
|
if self.default_proxy:
|
||||||
logging.debug('Live Rewrite via proxy ' + self.default_proxy)
|
logging.debug('Live Rewrite via proxy ' + self.default_proxy)
|
||||||
|
else:
|
||||||
|
logging.debug('Live Rewrite Direct (no proxy)')
|
||||||
|
|
||||||
def fetch_local_file(self, uri):
|
def fetch_local_file(self, uri):
|
||||||
fh = open(uri)
|
fh = open(uri)
|
||||||
@ -148,7 +150,8 @@ class LiveRewriter(object):
|
|||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'original': url,
|
'original': url,
|
||||||
'statuscode': status_headers.get_statuscode(),
|
'statuscode': status_headers.get_statuscode(),
|
||||||
'mimetype': status_headers.get_header('Content-Type')
|
'mimetype': status_headers.get_header('Content-Type'),
|
||||||
|
'is_live': True,
|
||||||
}
|
}
|
||||||
|
|
||||||
result = (self.rewriter.
|
result = (self.rewriter.
|
||||||
|
@ -25,7 +25,7 @@ class CDXAPIHandler(BaseHandler):
|
|||||||
return WbResponse.text_stream(cdx_iter)
|
return WbResponse.text_stream(cdx_iter)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return 'CDX Handler: ' + str(self.index_handler)
|
return 'CDX Index Handler'
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def extract_params_from_wsgi_env(env):
|
def extract_params_from_wsgi_env(env):
|
||||||
|
@ -14,7 +14,7 @@ from pywb.framework.wbrequestresponse import WbResponse
|
|||||||
#=================================================================
|
#=================================================================
|
||||||
class WBHandler(WbUrlHandler):
|
class WBHandler(WbUrlHandler):
|
||||||
def __init__(self, index_reader, replay,
|
def __init__(self, index_reader, replay,
|
||||||
search_view=None, config=None):
|
search_view=None, config=None, handler_dict=None):
|
||||||
|
|
||||||
self.index_reader = index_reader
|
self.index_reader = index_reader
|
||||||
|
|
||||||
@ -22,24 +22,45 @@ class WBHandler(WbUrlHandler):
|
|||||||
|
|
||||||
self.search_view = search_view
|
self.search_view = search_view
|
||||||
|
|
||||||
|
self.fallback_handler = None
|
||||||
|
|
||||||
|
if handler_dict:
|
||||||
|
fallback = config.get('redir_fallback')
|
||||||
|
if fallback:
|
||||||
|
self.fallback_handler = handler_dict.get(fallback)
|
||||||
|
|
||||||
def __call__(self, wbrequest):
|
def __call__(self, wbrequest):
|
||||||
if wbrequest.wb_url_str == '/':
|
if wbrequest.wb_url_str == '/':
|
||||||
return self.render_search_page(wbrequest)
|
return self.render_search_page(wbrequest)
|
||||||
|
|
||||||
with PerfTimer(wbrequest.env.get('X_PERF'), 'query') as t:
|
try:
|
||||||
response = self.index_reader.load_for_request(wbrequest)
|
with PerfTimer(wbrequest.env.get('X_PERF'), 'query') as t:
|
||||||
|
response = self.index_reader.load_for_request(wbrequest)
|
||||||
|
except NotFoundException as nfe:
|
||||||
|
return self.handle_not_found(wbrequest, nfe)
|
||||||
|
|
||||||
if isinstance(response, WbResponse):
|
if isinstance(response, WbResponse):
|
||||||
return response
|
return response
|
||||||
|
|
||||||
cdx_lines = response[0]
|
cdx_lines, cdx_callback = response
|
||||||
cdx_callback = response[1]
|
return self.handle_replay(wbrequest, cdx_lines, cdx_callback)
|
||||||
|
|
||||||
|
def handle_replay(self, wbrequest, cdx_lines, cdx_callback):
|
||||||
with PerfTimer(wbrequest.env.get('X_PERF'), 'replay') as t:
|
with PerfTimer(wbrequest.env.get('X_PERF'), 'replay') as t:
|
||||||
return self.replay(wbrequest,
|
return self.replay(wbrequest,
|
||||||
cdx_lines,
|
cdx_lines,
|
||||||
cdx_callback)
|
cdx_callback)
|
||||||
|
|
||||||
|
def handle_not_found(self, wbrequest, nfe):
|
||||||
|
if (not self.fallback_handler or
|
||||||
|
wbrequest.wb_url.is_query() or
|
||||||
|
wbrequest.wb_url.is_identity):
|
||||||
|
raise
|
||||||
|
|
||||||
|
return self.fallback_handler(wbrequest)
|
||||||
|
#new_url = (self.redir_fallback + wbrequest.wb_url.to_str(timestamp=''))
|
||||||
|
#return WbResponse.redir_response(new_url)
|
||||||
|
|
||||||
def render_search_page(self, wbrequest, **kwargs):
|
def render_search_page(self, wbrequest, **kwargs):
|
||||||
if self.search_view:
|
if self.search_view:
|
||||||
return self.search_view.render_response(wbrequest=wbrequest,
|
return self.search_view.render_response(wbrequest=wbrequest,
|
||||||
|
@ -15,6 +15,9 @@ class RewriteHandler(WbUrlHandler):
|
|||||||
def __call__(self, wbrequest):
|
def __call__(self, wbrequest):
|
||||||
return self.rewrite_view(wbrequest)
|
return self.rewrite_view(wbrequest)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return 'Live Web Rewrite Handler'
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
def create_live_rewriter_app(config={}):
|
def create_live_rewriter_app(config={}):
|
||||||
|
@ -13,6 +13,7 @@ from views import J2TemplateView, add_env_globals
|
|||||||
from views import J2HtmlCapturesView, HeadInsertView
|
from views import J2HtmlCapturesView, HeadInsertView
|
||||||
|
|
||||||
from replay_views import ReplayView
|
from replay_views import ReplayView
|
||||||
|
from live_rewrite_handler import RewriteHandler
|
||||||
|
|
||||||
from query_handler import QueryHandler
|
from query_handler import QueryHandler
|
||||||
from handlers import WBHandler
|
from handlers import WBHandler
|
||||||
@ -61,7 +62,7 @@ class DictChain:
|
|||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
def create_wb_handler(query_handler, config):
|
def create_wb_handler(query_handler, config, handler_dict={}):
|
||||||
|
|
||||||
cookie_maker = config.get('cookie_maker')
|
cookie_maker = config.get('cookie_maker')
|
||||||
record_loader = ArcWarcRecordLoader(cookie_maker=cookie_maker)
|
record_loader = ArcWarcRecordLoader(cookie_maker=cookie_maker)
|
||||||
@ -88,29 +89,40 @@ def create_wb_handler(query_handler, config):
|
|||||||
replayer,
|
replayer,
|
||||||
search_view=search_view,
|
search_view=search_view,
|
||||||
config=config,
|
config=config,
|
||||||
|
handler_dict=handler_dict,
|
||||||
)
|
)
|
||||||
|
|
||||||
return wb_handler
|
return wb_handler
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
def init_collection(value, config):
|
def create_live_handler(config):
|
||||||
|
live_handler = RewriteHandler(config)
|
||||||
|
return live_handler
|
||||||
|
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
def init_route_config(value, config):
|
||||||
if isinstance(value, str):
|
if isinstance(value, str):
|
||||||
value = {'index_paths': value}
|
value = dict(index_paths=value)
|
||||||
|
|
||||||
route_config = DictChain(value, config)
|
route_config = DictChain(value, config)
|
||||||
|
return route_config
|
||||||
|
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
def init_collection(route_config):
|
||||||
ds_rules_file = route_config.get('domain_specific_rules', None)
|
ds_rules_file = route_config.get('domain_specific_rules', None)
|
||||||
|
|
||||||
html_view = (J2HtmlCapturesView.
|
html_view = (J2HtmlCapturesView.
|
||||||
create_template(config.get('query_html'),
|
create_template(route_config.get('query_html'),
|
||||||
'Captures Page'))
|
'Captures Page'))
|
||||||
|
|
||||||
query_handler = QueryHandler.init_from_config(route_config,
|
query_handler = QueryHandler.init_from_config(route_config,
|
||||||
ds_rules_file,
|
ds_rules_file,
|
||||||
html_view)
|
html_view)
|
||||||
|
|
||||||
return route_config, query_handler
|
return query_handler
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
@ -139,8 +151,8 @@ def create_cdx_server_app(passed_config):
|
|||||||
routes = []
|
routes = []
|
||||||
|
|
||||||
for name, value in collections.iteritems():
|
for name, value in collections.iteritems():
|
||||||
result = init_collection(value, config)
|
route_config = init_route_config(value, config)
|
||||||
route_config, query_handler = result
|
query_handler = init_collection(route_config)
|
||||||
|
|
||||||
cdx_api_suffix = route_config.get('enable_cdx_api', True)
|
cdx_api_suffix = route_config.get('enable_cdx_api', True)
|
||||||
|
|
||||||
@ -173,23 +185,33 @@ def create_wb_router(passed_config={}):
|
|||||||
else:
|
else:
|
||||||
request_class = WbRequest
|
request_class = WbRequest
|
||||||
|
|
||||||
#if config.get('use_lxml_parser', False):
|
# store live and replay handlers
|
||||||
# use_lxml_parser()
|
handler_dict = {}
|
||||||
|
|
||||||
for name, value in collections.iteritems():
|
for name, value in collections.iteritems():
|
||||||
|
|
||||||
if isinstance(value, BaseHandler):
|
if isinstance(value, BaseHandler):
|
||||||
|
handler_dict[name] = value
|
||||||
routes.append(Route(name, value))
|
routes.append(Route(name, value))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
result = init_collection(value, config)
|
route_config = init_route_config(value, config)
|
||||||
route_config, query_handler = result
|
|
||||||
|
if route_config.get('index_paths') == '$liveweb':
|
||||||
|
live = create_live_handler(route_config)
|
||||||
|
handler_dict[name] = live
|
||||||
|
routes.append(Route(name, live))
|
||||||
|
continue
|
||||||
|
|
||||||
|
query_handler = init_collection(route_config)
|
||||||
|
|
||||||
wb_handler = create_wb_handler(
|
wb_handler = create_wb_handler(
|
||||||
query_handler=query_handler,
|
query_handler=query_handler,
|
||||||
config=route_config
|
config=route_config,
|
||||||
|
handler_dict=handler_dict,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
handler_dict[name] = wb_handler
|
||||||
|
|
||||||
logging.debug('Adding Collection: ' + name)
|
logging.debug('Adding Collection: ' + name)
|
||||||
|
|
||||||
route_class = route_config.get('route_class', Route)
|
route_class = route_config.get('route_class', Route)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user