1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

live handler: allow live rewrite handler to be specified as one of the collections in pywb

by settings index_paths to '$liveweb'. When used, creates a RewriteHandler instead of WBHandler
Can also specify 'proxyhostport' to set the live rewrite to go through a proxy

fallback: allow fallback to a different handler (usually live rewrite) by specifying
'redir_fallback' with name of handler. Instead of 404, a not found response will
internally call the fallback handler to get a response
This commit is contained in:
Ilya Kreymer 2014-07-20 16:36:49 -07:00
parent b785cd6f08
commit 6da27789eb
5 changed files with 69 additions and 20 deletions

View File

@ -25,6 +25,8 @@ class LiveRewriter(object):
self.default_proxy = default_proxy
if self.default_proxy:
logging.debug('Live Rewrite via proxy ' + self.default_proxy)
else:
logging.debug('Live Rewrite Direct (no proxy)')
def fetch_local_file(self, uri):
fh = open(uri)
@ -148,7 +150,8 @@ class LiveRewriter(object):
'timestamp': timestamp,
'original': url,
'statuscode': status_headers.get_statuscode(),
'mimetype': status_headers.get_header('Content-Type')
'mimetype': status_headers.get_header('Content-Type'),
'is_live': True,
}
result = (self.rewriter.

View File

@ -25,7 +25,7 @@ class CDXAPIHandler(BaseHandler):
return WbResponse.text_stream(cdx_iter)
def __str__(self):
return 'CDX Handler: ' + str(self.index_handler)
return 'CDX Index Handler'
@staticmethod
def extract_params_from_wsgi_env(env):

View File

@ -14,7 +14,7 @@ from pywb.framework.wbrequestresponse import WbResponse
#=================================================================
class WBHandler(WbUrlHandler):
def __init__(self, index_reader, replay,
search_view=None, config=None):
search_view=None, config=None, handler_dict=None):
self.index_reader = index_reader
@ -22,24 +22,45 @@ class WBHandler(WbUrlHandler):
self.search_view = search_view
self.fallback_handler = None
if handler_dict:
fallback = config.get('redir_fallback')
if fallback:
self.fallback_handler = handler_dict.get(fallback)
def __call__(self, wbrequest):
if wbrequest.wb_url_str == '/':
return self.render_search_page(wbrequest)
with PerfTimer(wbrequest.env.get('X_PERF'), 'query') as t:
response = self.index_reader.load_for_request(wbrequest)
try:
with PerfTimer(wbrequest.env.get('X_PERF'), 'query') as t:
response = self.index_reader.load_for_request(wbrequest)
except NotFoundException as nfe:
return self.handle_not_found(wbrequest, nfe)
if isinstance(response, WbResponse):
return response
cdx_lines = response[0]
cdx_callback = response[1]
cdx_lines, cdx_callback = response
return self.handle_replay(wbrequest, cdx_lines, cdx_callback)
def handle_replay(self, wbrequest, cdx_lines, cdx_callback):
with PerfTimer(wbrequest.env.get('X_PERF'), 'replay') as t:
return self.replay(wbrequest,
cdx_lines,
cdx_callback)
def handle_not_found(self, wbrequest, nfe):
if (not self.fallback_handler or
wbrequest.wb_url.is_query() or
wbrequest.wb_url.is_identity):
raise
return self.fallback_handler(wbrequest)
#new_url = (self.redir_fallback + wbrequest.wb_url.to_str(timestamp=''))
#return WbResponse.redir_response(new_url)
def render_search_page(self, wbrequest, **kwargs):
if self.search_view:
return self.search_view.render_response(wbrequest=wbrequest,

View File

@ -15,6 +15,9 @@ class RewriteHandler(WbUrlHandler):
def __call__(self, wbrequest):
return self.rewrite_view(wbrequest)
def __str__(self):
return 'Live Web Rewrite Handler'
#=================================================================
def create_live_rewriter_app(config={}):

View File

@ -13,6 +13,7 @@ from views import J2TemplateView, add_env_globals
from views import J2HtmlCapturesView, HeadInsertView
from replay_views import ReplayView
from live_rewrite_handler import RewriteHandler
from query_handler import QueryHandler
from handlers import WBHandler
@ -61,7 +62,7 @@ class DictChain:
#=================================================================
def create_wb_handler(query_handler, config):
def create_wb_handler(query_handler, config, handler_dict={}):
cookie_maker = config.get('cookie_maker')
record_loader = ArcWarcRecordLoader(cookie_maker=cookie_maker)
@ -88,29 +89,40 @@ def create_wb_handler(query_handler, config):
replayer,
search_view=search_view,
config=config,
handler_dict=handler_dict,
)
return wb_handler
#=================================================================
def init_collection(value, config):
def create_live_handler(config):
live_handler = RewriteHandler(config)
return live_handler
#=================================================================
def init_route_config(value, config):
if isinstance(value, str):
value = {'index_paths': value}
value = dict(index_paths=value)
route_config = DictChain(value, config)
return route_config
#=================================================================
def init_collection(route_config):
ds_rules_file = route_config.get('domain_specific_rules', None)
html_view = (J2HtmlCapturesView.
create_template(config.get('query_html'),
create_template(route_config.get('query_html'),
'Captures Page'))
query_handler = QueryHandler.init_from_config(route_config,
ds_rules_file,
html_view)
return route_config, query_handler
return query_handler
#=================================================================
@ -139,8 +151,8 @@ def create_cdx_server_app(passed_config):
routes = []
for name, value in collections.iteritems():
result = init_collection(value, config)
route_config, query_handler = result
route_config = init_route_config(value, config)
query_handler = init_collection(route_config)
cdx_api_suffix = route_config.get('enable_cdx_api', True)
@ -173,23 +185,33 @@ def create_wb_router(passed_config={}):
else:
request_class = WbRequest
#if config.get('use_lxml_parser', False):
# use_lxml_parser()
# store live and replay handlers
handler_dict = {}
for name, value in collections.iteritems():
if isinstance(value, BaseHandler):
handler_dict[name] = value
routes.append(Route(name, value))
continue
result = init_collection(value, config)
route_config, query_handler = result
route_config = init_route_config(value, config)
if route_config.get('index_paths') == '$liveweb':
live = create_live_handler(route_config)
handler_dict[name] = live
routes.append(Route(name, live))
continue
query_handler = init_collection(route_config)
wb_handler = create_wb_handler(
query_handler=query_handler,
config=route_config
config=route_config,
handler_dict=handler_dict,
)
handler_dict[name] = wb_handler
logging.debug('Adding Collection: ' + name)
route_class = route_config.get('route_class', Route)