diff --git a/pywb/rewrite/rewrite_live.py b/pywb/rewrite/rewrite_live.py index fbda24f4..b81b0144 100644 --- a/pywb/rewrite/rewrite_live.py +++ b/pywb/rewrite/rewrite_live.py @@ -25,6 +25,8 @@ class LiveRewriter(object): self.default_proxy = default_proxy if self.default_proxy: logging.debug('Live Rewrite via proxy ' + self.default_proxy) + else: + logging.debug('Live Rewrite Direct (no proxy)') def fetch_local_file(self, uri): fh = open(uri) @@ -148,7 +150,8 @@ class LiveRewriter(object): 'timestamp': timestamp, 'original': url, 'statuscode': status_headers.get_statuscode(), - 'mimetype': status_headers.get_header('Content-Type') + 'mimetype': status_headers.get_header('Content-Type'), + 'is_live': True, } result = (self.rewriter. diff --git a/pywb/webapp/cdx_api_handler.py b/pywb/webapp/cdx_api_handler.py index e3e16a72..659e6048 100644 --- a/pywb/webapp/cdx_api_handler.py +++ b/pywb/webapp/cdx_api_handler.py @@ -25,7 +25,7 @@ class CDXAPIHandler(BaseHandler): return WbResponse.text_stream(cdx_iter) def __str__(self): - return 'CDX Handler: ' + str(self.index_handler) + return 'CDX Index Handler' @staticmethod def extract_params_from_wsgi_env(env): diff --git a/pywb/webapp/handlers.py b/pywb/webapp/handlers.py index 2299d2e1..8ebe5ec2 100644 --- a/pywb/webapp/handlers.py +++ b/pywb/webapp/handlers.py @@ -14,7 +14,7 @@ from pywb.framework.wbrequestresponse import WbResponse #================================================================= class WBHandler(WbUrlHandler): def __init__(self, index_reader, replay, - search_view=None, config=None): + search_view=None, config=None, handler_dict=None): self.index_reader = index_reader @@ -22,24 +22,45 @@ class WBHandler(WbUrlHandler): self.search_view = search_view + self.fallback_handler = None + + if handler_dict: + fallback = config.get('redir_fallback') + if fallback: + self.fallback_handler = handler_dict.get(fallback) + def __call__(self, wbrequest): if wbrequest.wb_url_str == '/': return self.render_search_page(wbrequest) - with PerfTimer(wbrequest.env.get('X_PERF'), 'query') as t: - response = self.index_reader.load_for_request(wbrequest) + try: + with PerfTimer(wbrequest.env.get('X_PERF'), 'query') as t: + response = self.index_reader.load_for_request(wbrequest) + except NotFoundException as nfe: + return self.handle_not_found(wbrequest, nfe) if isinstance(response, WbResponse): return response - cdx_lines = response[0] - cdx_callback = response[1] + cdx_lines, cdx_callback = response + return self.handle_replay(wbrequest, cdx_lines, cdx_callback) + def handle_replay(self, wbrequest, cdx_lines, cdx_callback): with PerfTimer(wbrequest.env.get('X_PERF'), 'replay') as t: return self.replay(wbrequest, cdx_lines, cdx_callback) + def handle_not_found(self, wbrequest, nfe): + if (not self.fallback_handler or + wbrequest.wb_url.is_query() or + wbrequest.wb_url.is_identity): + raise + + return self.fallback_handler(wbrequest) + #new_url = (self.redir_fallback + wbrequest.wb_url.to_str(timestamp='')) + #return WbResponse.redir_response(new_url) + def render_search_page(self, wbrequest, **kwargs): if self.search_view: return self.search_view.render_response(wbrequest=wbrequest, diff --git a/pywb/webapp/live_rewrite_handler.py b/pywb/webapp/live_rewrite_handler.py index 6b1d69e3..d2af7028 100644 --- a/pywb/webapp/live_rewrite_handler.py +++ b/pywb/webapp/live_rewrite_handler.py @@ -15,6 +15,9 @@ class RewriteHandler(WbUrlHandler): def __call__(self, wbrequest): return self.rewrite_view(wbrequest) + def __str__(self): + return 'Live Web Rewrite Handler' + #================================================================= def create_live_rewriter_app(config={}): diff --git a/pywb/webapp/pywb_init.py b/pywb/webapp/pywb_init.py index b3ff1448..ffa2101b 100644 --- a/pywb/webapp/pywb_init.py +++ b/pywb/webapp/pywb_init.py @@ -13,6 +13,7 @@ from views import J2TemplateView, add_env_globals from views import J2HtmlCapturesView, HeadInsertView from replay_views import ReplayView +from live_rewrite_handler import RewriteHandler from query_handler import QueryHandler from handlers import WBHandler @@ -61,7 +62,7 @@ class DictChain: #================================================================= -def create_wb_handler(query_handler, config): +def create_wb_handler(query_handler, config, handler_dict={}): cookie_maker = config.get('cookie_maker') record_loader = ArcWarcRecordLoader(cookie_maker=cookie_maker) @@ -88,29 +89,40 @@ def create_wb_handler(query_handler, config): replayer, search_view=search_view, config=config, + handler_dict=handler_dict, ) return wb_handler #================================================================= -def init_collection(value, config): +def create_live_handler(config): + live_handler = RewriteHandler(config) + return live_handler + + +#================================================================= +def init_route_config(value, config): if isinstance(value, str): - value = {'index_paths': value} + value = dict(index_paths=value) route_config = DictChain(value, config) + return route_config + +#================================================================= +def init_collection(route_config): ds_rules_file = route_config.get('domain_specific_rules', None) html_view = (J2HtmlCapturesView. - create_template(config.get('query_html'), + create_template(route_config.get('query_html'), 'Captures Page')) query_handler = QueryHandler.init_from_config(route_config, ds_rules_file, html_view) - return route_config, query_handler + return query_handler #================================================================= @@ -139,8 +151,8 @@ def create_cdx_server_app(passed_config): routes = [] for name, value in collections.iteritems(): - result = init_collection(value, config) - route_config, query_handler = result + route_config = init_route_config(value, config) + query_handler = init_collection(route_config) cdx_api_suffix = route_config.get('enable_cdx_api', True) @@ -173,23 +185,33 @@ def create_wb_router(passed_config={}): else: request_class = WbRequest - #if config.get('use_lxml_parser', False): - # use_lxml_parser() + # store live and replay handlers + handler_dict = {} for name, value in collections.iteritems(): - if isinstance(value, BaseHandler): + handler_dict[name] = value routes.append(Route(name, value)) continue - result = init_collection(value, config) - route_config, query_handler = result + route_config = init_route_config(value, config) + + if route_config.get('index_paths') == '$liveweb': + live = create_live_handler(route_config) + handler_dict[name] = live + routes.append(Route(name, live)) + continue + + query_handler = init_collection(route_config) wb_handler = create_wb_handler( query_handler=query_handler, - config=route_config + config=route_config, + handler_dict=handler_dict, ) + handler_dict[name] = wb_handler + logging.debug('Adding Collection: ' + name) route_class = route_config.get('route_class', Route)