From 3020606608bcce8f101111fcec4ee88b2e058cdb Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Sun, 25 Feb 2018 14:06:23 -0800 Subject: [PATCH] simplify exception handling: - use WbException throughout, only catch HTTPException from werkzeug routing - only apply refer redirect check for 404 not found errors - xmlquery index: log unexpected exceptions, treat missing element as not found --- pywb/apps/frontendapp.py | 18 +++++++++++++----- pywb/warcserver/index/indexsource.py | 23 ++++++++++++++--------- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/pywb/apps/frontendapp.py b/pywb/apps/frontendapp.py index 9aabe7be..35adf401 100644 --- a/pywb/apps/frontendapp.py +++ b/pywb/apps/frontendapp.py @@ -447,7 +447,7 @@ class FrontEndApp(object): :param dict environ: The WSGI environment dictionary for the request :param str msg: The error message """ - raise NotFound(response=self.rewriterapp._error_response(environ, NotFoundException(msg))) + raise NotFoundException(msg) def _check_refer_redirect(self, environ): """Returns a WbResponse for a HTTP 307 redirection if the HTTP referer header is the same as the HTTP host header @@ -501,21 +501,29 @@ class FrontEndApp(object): environ['pywb.app_prefix'] = environ.get('SCRIPT_NAME', '') response = endpoint(environ, **args) - return response(environ, start_response) - except HTTPException as e: + except HTTPException as hte: redir = self._check_refer_redirect(environ) if redir: return redir(environ, start_response) - return e(environ, start_response) + response = hte + + except WbException as wbe: + if wbe.status_code == 404: + redir = self._check_refer_redirect(environ) + if redir: + return redir(environ, start_response) + + response = self.rewriterapp.handle_error(environ, wbe) except Exception as e: if self.debug: traceback.print_exc() response = self.rewriterapp._error_response(environ, WbException('Internal Error: ' + str(e))) - return response(environ, start_response) + + return response(environ, start_response) @classmethod def create_app(cls, port): diff --git a/pywb/warcserver/index/indexsource.py b/pywb/warcserver/index/indexsource.py index 55038616..4d1d7136 100644 --- a/pywb/warcserver/index/indexsource.py +++ b/pywb/warcserver/index/indexsource.py @@ -250,15 +250,7 @@ class XmlQueryIndexSource(BaseIndexSource): results = etree.fromstring(response.text) - items = results.find('results').findall('result') - - if matchType == 'exact': - cdx_iter = [self.convert_to_cdx(item) for item in items] - if closest: - cdx_iter = cdx_sort_closest(closest, cdx_iter, limit=10000) - - else: - cdx_iter = self.prefix_query_iter(items) + items = results.find('results') except Exception: if self.logger.getEffectiveLevel() == logging.DEBUG: @@ -267,6 +259,19 @@ class XmlQueryIndexSource(BaseIndexSource): raise NotFoundException('url {0} not found'.format(url)) + if not items: + raise NotFoundException('url {0} not found'.format(url)) + + items = items.findall('result') + + if matchType == 'exact': + cdx_iter = [self.convert_to_cdx(item) for item in items] + if closest: + cdx_iter = cdx_sort_closest(closest, cdx_iter, limit=10000) + + else: + cdx_iter = self.prefix_query_iter(items) + return cdx_iter def prefix_query_iter(self, items):