diff --git a/pywb/ui/error.html b/pywb/ui/error.html index b122fc38..b3a8c478 100644 --- a/pywb/ui/error.html +++ b/pywb/ui/error.html @@ -9,10 +9,3 @@

{% endif %} - -{% if env.pywb_proxy_magic and err_url and status == '404 Not Found' %} -

-Try Different Collection -

-{% endif %} - diff --git a/pywb/ui/not_found.html b/pywb/ui/not_found.html new file mode 100644 index 00000000..39faa3b3 --- /dev/null +++ b/pywb/ui/not_found.html @@ -0,0 +1,10 @@ +

Url Not Found

+ +The url {{ url }} could not be found in this collection. + +{% if env.pywb_proxy_magic and url %} +

+Try Different Collection +

+{% endif %} + diff --git a/pywb/ui/query.html b/pywb/ui/query.html index 2d1f5c86..3e54534b 100644 --- a/pywb/ui/query.html +++ b/pywb/ui/query.html @@ -24,7 +24,8 @@ function ts_to_date(ts, is_gmt) -

pywb Sample Calendar Results

+

pywb Query Results

+ {% if cdx_lines | length > 0 %} {{ cdx_lines | length }} captures of {{ url }} @@ -47,5 +48,8 @@ function ts_to_date(ts, is_gmt)

* Unique captures are bold. Other captures are duplicates of a previous capture.

+ {% else %} + No captures found for {{ url }} + {% endif %} diff --git a/pywb/webapp/handlers.py b/pywb/webapp/handlers.py index 54ef92e4..ed5a5af4 100644 --- a/pywb/webapp/handlers.py +++ b/pywb/webapp/handlers.py @@ -49,12 +49,9 @@ class SearchPageWbUrlHandler(WbUrlHandler): self.banner_html = None def render_search_page(self, wbrequest, **kwargs): - if self.search_view: - return self.search_view.render_response(wbrequest=wbrequest, - prefix=wbrequest.wb_prefix, - **kwargs) - else: - return WbResponse.text_response('No Lookup Url Specified') + return self.search_view.render_response(wbrequest=wbrequest, + prefix=wbrequest.wb_prefix, + **kwargs) def __call__(self, wbrequest): # root search page @@ -110,6 +107,9 @@ class WBHandler(SearchPageWbUrlHandler): super(WBHandler, self).__init__(config) self.index_reader = query_handler + self.not_found_view = (J2TemplateView. + create_template(config.get('not_found_html'), + 'Not Found Error')) cookie_maker = config.get('cookie_maker') record_loader = ArcWarcRecordLoader(cookie_maker=cookie_maker) @@ -152,12 +152,19 @@ class WBHandler(SearchPageWbUrlHandler): cdx_callback) def handle_not_found(self, wbrequest, nfe): - if (not self.fallback_handler or - wbrequest.wb_url.is_query() or - wbrequest.wb_url.is_identity): - raise + # check fallback: only for replay queries and not for identity + if (self.fallback_handler and + not wbrequest.wb_url.is_query() and + not wbrequest.wb_url.is_identity): + return self.fallback_handler(wbrequest) - return self.fallback_handler(wbrequest) + # if capture query, just return capture page + if wbrequest.wb_url.is_query(): + return self.index_reader.make_cdx_response(wbrequest, [], 'html') + else: + return self.not_found_view.render_response(status='404 Not Found', + env=wbrequest.env, + url=wbrequest.wb_url.url) def __str__(self): return 'Web Archive Replay Handler' diff --git a/pywb/webapp/pywb_init.py b/pywb/webapp/pywb_init.py index bcd329b3..d31a91c7 100644 --- a/pywb/webapp/pywb_init.py +++ b/pywb/webapp/pywb_init.py @@ -34,6 +34,7 @@ DEFAULTS = { 'search_html': 'ui/search.html', 'home_html': 'ui/index.html', 'error_html': 'ui/error.html', + 'not_found_html': 'ui/not_found.html', 'proxy_select_html': 'ui/proxy_select.html', 'proxy_cert_download_html': 'ui/proxy_cert_download.html', diff --git a/tests/test_config.yaml b/tests/test_config.yaml index ad010789..1d034671 100644 --- a/tests/test_config.yaml +++ b/tests/test_config.yaml @@ -86,6 +86,10 @@ home_html: ui/index.html # if omitted, a text response is returned error_html: ui/error.html + +# template for 404 not found error, may be customized per collection +not_found_html: ui/not_found.html + # ==== Other Paths ==== # Rewrite urls with absolute paths instead of relative diff --git a/tests/test_integration.py b/tests/test_integration.py index 3322613b..17161ae3 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -77,6 +77,13 @@ class TestWb: # 17 Captures + header assert len(resp.html.find_all('tr')) == 18 + def test_calendar_not_found(self): + # query with no results + resp = self.testapp.get('/pywb/*/http://not-exist.example.com') + self._assert_basic_html(resp) + assert 'No captures found' in resp.body, resp.body + assert len(resp.html.find_all('tr')) == 0 + def test_cdx_query(self): resp = self.testapp.get('/pywb/cdx_/*/http://www.iana.org/') self._assert_basic_text(resp) @@ -374,6 +381,11 @@ class TestWb: assert resp.status_int == 403 assert 'Excluded' in resp.body + def test_replay_not_found(self): + resp = self.testapp.head('/pywb/http://not-exist.example.com', status=404) + assert resp.content_type == 'text/html' + assert resp.status_int == 404 + def test_static_content(self): resp = self.testapp.get('/static/test/route/wb.css') assert resp.status_int == 200