diff --git a/pywb/apps/frontendapp.py b/pywb/apps/frontendapp.py index ac942c06..e03c632f 100644 --- a/pywb/apps/frontendapp.py +++ b/pywb/apps/frontendapp.py @@ -251,7 +251,7 @@ class FrontEndApp(object): response.add_access_control_headers(env=environ) return response except: - self.raise_not_found(environ, 'Static File Not Found: {0}'.format(filepath)) + self.raise_not_found(environ, 'static_file_not_found', filepath) def get_metadata(self, coll): """Retrieve the metadata associated with a collection @@ -282,7 +282,7 @@ class FrontEndApp(object): :rtype: WbResponse """ if not self.is_valid_coll(coll): - self.raise_not_found(environ, 'No handler for "/{0}"'.format(coll)) + self.raise_not_found(environ, 'coll_not_found', coll) self.setup_paths(environ, coll) @@ -358,7 +358,7 @@ class FrontEndApp(object): :rtype: WbResponse """ if not self.is_valid_coll(coll): - self.raise_not_found(environ, 'No handler for "/{0}"'.format(coll)) + self.raise_not_found(environ, 'coll_not_found', coll) self.setup_paths(environ, coll, record) @@ -440,14 +440,8 @@ class FrontEndApp(object): return (coll in self.warcserver.list_fixed_routes() or coll in self.warcserver.list_dynamic_routes()) - def raise_not_found(self, environ, msg): - """Utility function for raising a werkzeug.exceptions.NotFound execption with the supplied WSGI environment - and message. - - :param dict environ: The WSGI environment dictionary for the request - :param str msg: The error message - """ - raise NotFoundException(msg) + def raise_not_found(self, environ, err_type, url): + raise AppPageNotFound(err_type, url) def _check_refer_redirect(self, environ): """Returns a WbResponse for a HTTP 307 redirection if the HTTP referer header is the same as the HTTP host header @@ -642,6 +636,13 @@ class FrontEndApp(object): return response +# ============================================================================ +class AppPageNotFound(WbException): + @property + def status_code(self): + return 404 + + # ============================================================================ class MetadataCache(object): """This class holds the collection medata template string and diff --git a/pywb/apps/rewriterapp.py b/pywb/apps/rewriterapp.py index e41b5f3c..cfc5c649 100644 --- a/pywb/apps/rewriterapp.py +++ b/pywb/apps/rewriterapp.py @@ -15,6 +15,9 @@ from pywb.rewrite.rewriteinputreq import RewriteInputRequest from pywb.rewrite.templateview import BaseInsertView, HeadInsertView, JinjaEnv, TopFrameView from pywb.rewrite.url_rewriter import IdentityUrlRewriter, UrlRewriter from pywb.rewrite.wburl import WbUrl +from pywb.rewrite.url_rewriter import UrlRewriter, IdentityUrlRewriter + +from pywb.utils.wbexception import WbException, NotFoundException from pywb.rewrite.cookies import CookieTracker from pywb.utils.canonicalize import canonicalize from pywb.utils.io import BUFF_SIZE, OffsetLimitReader, no_except_close @@ -566,7 +569,7 @@ class RewriterApp(object): return top_url def handle_error(self, environ, wbe): - if wbe.status_code == 404: + if isinstance(wbe, NotFoundException): return self._not_found_response(environ, wbe.url) else: return self._error_response(environ, wbe) diff --git a/tests/test_integration.py b/tests/test_integration.py index 2ce940ba..807bc266 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -24,11 +24,13 @@ class TestWbIntegration(BaseConfigTest): assert resp.content_type == 'text/html' assert resp.status_int == 200 - def test_pywb_invalid_path(self): - resp = self.testapp.head('/blah/', status=404) + def test_pywb_invalid_collection(self): + resp = self.testapp.get('/blah/http://example.com/', status=404) assert resp.content_type == 'text/html' assert resp.status_int == 404 + assert 'Collection not found: blah' in resp.text + def test_calendar_query(self): resp = self.testapp.get('/pywb/*/iana.org') self._assert_basic_html(resp) @@ -429,6 +431,8 @@ class TestWbIntegration(BaseConfigTest): resp = self.testapp.get('/static/notfound.css', status = 404) assert resp.status_int == 404 + assert 'Static file not found: notfound.css' in resp.text + def test_cdx_server_filters(self): resp = self.testapp.get('/pywb/cdx?url=http://www.iana.org/_css/2013.1/screen.css&filter=mime:warc/revisit&filter=filename:dupes.warc.gz') assert resp.content_type == 'text/x-cdxj' @@ -451,7 +455,6 @@ class TestWbIntegration(BaseConfigTest): origfilenames = list(map(lambda cdx: cdx['orig.filename'], cdxs)) assert origfilenames == ['iana.warc.gz', 'iana.warc.gz', '-'] - # surt() no longer errors on this in 0.3b #def test_error(self): # resp = self.testapp.get('/pywb/?abc', status = 400)