1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-24 06:59:52 +01:00

live handler: allow live rewrite handler to be specified as one of the collections in pywb

by settings index_paths to '$liveweb'. When used, creates a RewriteHandler instead of WBHandler
Can also specify 'proxyhostport' to set the live rewrite to go through a proxy

fallback: allow fallback to a different handler (usually live rewrite) by specifying
'redir_fallback' with name of handler. Instead of 404, a not found response will
internally call the fallback handler to get a response
This commit is contained in:
Ilya Kreymer 2014-07-20 16:36:49 -07:00
parent b785cd6f08
commit 6da27789eb
5 changed files with 69 additions and 20 deletions

View File

@ -25,6 +25,8 @@ class LiveRewriter(object):
self.default_proxy = default_proxy self.default_proxy = default_proxy
if self.default_proxy: if self.default_proxy:
logging.debug('Live Rewrite via proxy ' + self.default_proxy) logging.debug('Live Rewrite via proxy ' + self.default_proxy)
else:
logging.debug('Live Rewrite Direct (no proxy)')
def fetch_local_file(self, uri): def fetch_local_file(self, uri):
fh = open(uri) fh = open(uri)
@ -148,7 +150,8 @@ class LiveRewriter(object):
'timestamp': timestamp, 'timestamp': timestamp,
'original': url, 'original': url,
'statuscode': status_headers.get_statuscode(), 'statuscode': status_headers.get_statuscode(),
'mimetype': status_headers.get_header('Content-Type') 'mimetype': status_headers.get_header('Content-Type'),
'is_live': True,
} }
result = (self.rewriter. result = (self.rewriter.

View File

@ -25,7 +25,7 @@ class CDXAPIHandler(BaseHandler):
return WbResponse.text_stream(cdx_iter) return WbResponse.text_stream(cdx_iter)
def __str__(self): def __str__(self):
return 'CDX Handler: ' + str(self.index_handler) return 'CDX Index Handler'
@staticmethod @staticmethod
def extract_params_from_wsgi_env(env): def extract_params_from_wsgi_env(env):

View File

@ -14,7 +14,7 @@ from pywb.framework.wbrequestresponse import WbResponse
#================================================================= #=================================================================
class WBHandler(WbUrlHandler): class WBHandler(WbUrlHandler):
def __init__(self, index_reader, replay, def __init__(self, index_reader, replay,
search_view=None, config=None): search_view=None, config=None, handler_dict=None):
self.index_reader = index_reader self.index_reader = index_reader
@ -22,24 +22,45 @@ class WBHandler(WbUrlHandler):
self.search_view = search_view self.search_view = search_view
self.fallback_handler = None
if handler_dict:
fallback = config.get('redir_fallback')
if fallback:
self.fallback_handler = handler_dict.get(fallback)
def __call__(self, wbrequest): def __call__(self, wbrequest):
if wbrequest.wb_url_str == '/': if wbrequest.wb_url_str == '/':
return self.render_search_page(wbrequest) return self.render_search_page(wbrequest)
with PerfTimer(wbrequest.env.get('X_PERF'), 'query') as t: try:
response = self.index_reader.load_for_request(wbrequest) with PerfTimer(wbrequest.env.get('X_PERF'), 'query') as t:
response = self.index_reader.load_for_request(wbrequest)
except NotFoundException as nfe:
return self.handle_not_found(wbrequest, nfe)
if isinstance(response, WbResponse): if isinstance(response, WbResponse):
return response return response
cdx_lines = response[0] cdx_lines, cdx_callback = response
cdx_callback = response[1] return self.handle_replay(wbrequest, cdx_lines, cdx_callback)
def handle_replay(self, wbrequest, cdx_lines, cdx_callback):
with PerfTimer(wbrequest.env.get('X_PERF'), 'replay') as t: with PerfTimer(wbrequest.env.get('X_PERF'), 'replay') as t:
return self.replay(wbrequest, return self.replay(wbrequest,
cdx_lines, cdx_lines,
cdx_callback) cdx_callback)
def handle_not_found(self, wbrequest, nfe):
if (not self.fallback_handler or
wbrequest.wb_url.is_query() or
wbrequest.wb_url.is_identity):
raise
return self.fallback_handler(wbrequest)
#new_url = (self.redir_fallback + wbrequest.wb_url.to_str(timestamp=''))
#return WbResponse.redir_response(new_url)
def render_search_page(self, wbrequest, **kwargs): def render_search_page(self, wbrequest, **kwargs):
if self.search_view: if self.search_view:
return self.search_view.render_response(wbrequest=wbrequest, return self.search_view.render_response(wbrequest=wbrequest,

View File

@ -15,6 +15,9 @@ class RewriteHandler(WbUrlHandler):
def __call__(self, wbrequest): def __call__(self, wbrequest):
return self.rewrite_view(wbrequest) return self.rewrite_view(wbrequest)
def __str__(self):
return 'Live Web Rewrite Handler'
#================================================================= #=================================================================
def create_live_rewriter_app(config={}): def create_live_rewriter_app(config={}):

View File

@ -13,6 +13,7 @@ from views import J2TemplateView, add_env_globals
from views import J2HtmlCapturesView, HeadInsertView from views import J2HtmlCapturesView, HeadInsertView
from replay_views import ReplayView from replay_views import ReplayView
from live_rewrite_handler import RewriteHandler
from query_handler import QueryHandler from query_handler import QueryHandler
from handlers import WBHandler from handlers import WBHandler
@ -61,7 +62,7 @@ class DictChain:
#================================================================= #=================================================================
def create_wb_handler(query_handler, config): def create_wb_handler(query_handler, config, handler_dict={}):
cookie_maker = config.get('cookie_maker') cookie_maker = config.get('cookie_maker')
record_loader = ArcWarcRecordLoader(cookie_maker=cookie_maker) record_loader = ArcWarcRecordLoader(cookie_maker=cookie_maker)
@ -88,29 +89,40 @@ def create_wb_handler(query_handler, config):
replayer, replayer,
search_view=search_view, search_view=search_view,
config=config, config=config,
handler_dict=handler_dict,
) )
return wb_handler return wb_handler
#================================================================= #=================================================================
def init_collection(value, config): def create_live_handler(config):
live_handler = RewriteHandler(config)
return live_handler
#=================================================================
def init_route_config(value, config):
if isinstance(value, str): if isinstance(value, str):
value = {'index_paths': value} value = dict(index_paths=value)
route_config = DictChain(value, config) route_config = DictChain(value, config)
return route_config
#=================================================================
def init_collection(route_config):
ds_rules_file = route_config.get('domain_specific_rules', None) ds_rules_file = route_config.get('domain_specific_rules', None)
html_view = (J2HtmlCapturesView. html_view = (J2HtmlCapturesView.
create_template(config.get('query_html'), create_template(route_config.get('query_html'),
'Captures Page')) 'Captures Page'))
query_handler = QueryHandler.init_from_config(route_config, query_handler = QueryHandler.init_from_config(route_config,
ds_rules_file, ds_rules_file,
html_view) html_view)
return route_config, query_handler return query_handler
#================================================================= #=================================================================
@ -139,8 +151,8 @@ def create_cdx_server_app(passed_config):
routes = [] routes = []
for name, value in collections.iteritems(): for name, value in collections.iteritems():
result = init_collection(value, config) route_config = init_route_config(value, config)
route_config, query_handler = result query_handler = init_collection(route_config)
cdx_api_suffix = route_config.get('enable_cdx_api', True) cdx_api_suffix = route_config.get('enable_cdx_api', True)
@ -173,23 +185,33 @@ def create_wb_router(passed_config={}):
else: else:
request_class = WbRequest request_class = WbRequest
#if config.get('use_lxml_parser', False): # store live and replay handlers
# use_lxml_parser() handler_dict = {}
for name, value in collections.iteritems(): for name, value in collections.iteritems():
if isinstance(value, BaseHandler): if isinstance(value, BaseHandler):
handler_dict[name] = value
routes.append(Route(name, value)) routes.append(Route(name, value))
continue continue
result = init_collection(value, config) route_config = init_route_config(value, config)
route_config, query_handler = result
if route_config.get('index_paths') == '$liveweb':
live = create_live_handler(route_config)
handler_dict[name] = live
routes.append(Route(name, live))
continue
query_handler = init_collection(route_config)
wb_handler = create_wb_handler( wb_handler = create_wb_handler(
query_handler=query_handler, query_handler=query_handler,
config=route_config config=route_config,
handler_dict=handler_dict,
) )
handler_dict[name] = wb_handler
logging.debug('Adding Collection: ' + name) logging.debug('Adding Collection: ' + name)
route_class = route_config.get('route_class', Route) route_class = route_config.get('route_class', Route)