diff --git a/pywb/warcserver/handlers.py b/pywb/warcserver/handlers.py index 69d7f210..67b74ea4 100644 --- a/pywb/warcserver/handlers.py +++ b/pywb/warcserver/handlers.py @@ -4,6 +4,7 @@ from pywb.utils.memento import MementoUtils from warcio.recordloader import ArchiveLoadFailed +from pywb.warcserver.index.cdxobject import CDXException from pywb.warcserver.index.fuzzymatcher import FuzzyMatcher from pywb.warcserver.resource.responseloader import WARCPathLoader, LiveWebLoader, VideoLoader @@ -98,13 +99,27 @@ class IndexHandler(object): content_type, res = handler(cdx_iter, fields, params) out_headers = {'Content-Type': content_type} - def check_str(lines): + first_line = None + try: + # raise exceptions early so that they can be handled properly + first_line = next(res) + except StopIteration: + pass + except CDXException as e: + errs = dict(last_exc=e) + return None, None, errs + + def check_str(first_line, lines): + if first_line is not None: + if isinstance(first_line, six.text_type): + first_line = first_line.encode('utf-8') + yield first_line for line in lines: if isinstance(line, six.text_type): line = line.encode('utf-8') yield line - return out_headers, check_str(res), errs + return out_headers, check_str(first_line, res), errs #============================================================================= diff --git a/tests/test_zipnum_auto_dir.py b/tests/test_zipnum_auto_dir.py index 7a3f77b5..c91c849e 100644 --- a/tests/test_zipnum_auto_dir.py +++ b/tests/test_zipnum_auto_dir.py @@ -46,5 +46,13 @@ class TestZipnumAutoDir(CollsDirMixin, BaseConfigTest): assert lines[2] == {"urlkey": "org,iana)/_css/2013.1/fonts/opensans-regular.ttf 20140126200654", "part": "zipnum", "offset": 1692, "length": 235, "lineno": 7} assert lines[3] == {"urlkey": "org,iana)/_css/2013.1/fonts/opensans-regular.ttf 20140126200816", "part": "zipnum", "offset": 1927, "length": 231, "lineno": 8} + def test_paged_index_query_out_of_range(self): + res = self.testapp.get( + '/testzip/cdx?url=http://iana.org/domains/&matchType=domain&output=json&showPagedIndex=true&pageSize=4&page=10', + expect_errors=True) + + assert res.status_code == 400 + assert res.json == {"message": "Page 10 invalid: First Page is 0, Last Page is 9"} +