1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

Handle CDXException and respond with HTTP 400 Bad Request (#626)

* FrontendApp: forward HTTP status of CDX backend to allow clients
to handle errors more easily

* Handle CDXExceptions properly, returning the exception status code
- make that CDXException is raised early so that it can be handled
  in the IndexHandler
This commit is contained in:
Sebastian Nagel 2021-04-27 05:51:33 +02:00 committed by GitHub
parent 13ea5baee5
commit 212691bd38
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 25 additions and 2 deletions

View File

@ -4,6 +4,7 @@ from pywb.utils.memento import MementoUtils
from warcio.recordloader import ArchiveLoadFailed from warcio.recordloader import ArchiveLoadFailed
from pywb.warcserver.index.cdxobject import CDXException
from pywb.warcserver.index.fuzzymatcher import FuzzyMatcher from pywb.warcserver.index.fuzzymatcher import FuzzyMatcher
from pywb.warcserver.resource.responseloader import WARCPathLoader, LiveWebLoader, VideoLoader from pywb.warcserver.resource.responseloader import WARCPathLoader, LiveWebLoader, VideoLoader
@ -98,13 +99,27 @@ class IndexHandler(object):
content_type, res = handler(cdx_iter, fields, params) content_type, res = handler(cdx_iter, fields, params)
out_headers = {'Content-Type': content_type} out_headers = {'Content-Type': content_type}
def check_str(lines): first_line = None
try:
# raise exceptions early so that they can be handled properly
first_line = next(res)
except StopIteration:
pass
except CDXException as e:
errs = dict(last_exc=e)
return None, None, errs
def check_str(first_line, lines):
if first_line is not None:
if isinstance(first_line, six.text_type):
first_line = first_line.encode('utf-8')
yield first_line
for line in lines: for line in lines:
if isinstance(line, six.text_type): if isinstance(line, six.text_type):
line = line.encode('utf-8') line = line.encode('utf-8')
yield line yield line
return out_headers, check_str(res), errs return out_headers, check_str(first_line, res), errs
#============================================================================= #=============================================================================

View File

@ -46,5 +46,13 @@ class TestZipnumAutoDir(CollsDirMixin, BaseConfigTest):
assert lines[2] == {"urlkey": "org,iana)/_css/2013.1/fonts/opensans-regular.ttf 20140126200654", "part": "zipnum", "offset": 1692, "length": 235, "lineno": 7} assert lines[2] == {"urlkey": "org,iana)/_css/2013.1/fonts/opensans-regular.ttf 20140126200654", "part": "zipnum", "offset": 1692, "length": 235, "lineno": 7}
assert lines[3] == {"urlkey": "org,iana)/_css/2013.1/fonts/opensans-regular.ttf 20140126200816", "part": "zipnum", "offset": 1927, "length": 231, "lineno": 8} assert lines[3] == {"urlkey": "org,iana)/_css/2013.1/fonts/opensans-regular.ttf 20140126200816", "part": "zipnum", "offset": 1927, "length": 231, "lineno": 8}
def test_paged_index_query_out_of_range(self):
res = self.testapp.get(
'/testzip/cdx?url=http://iana.org/domains/&matchType=domain&output=json&showPagedIndex=true&pageSize=4&page=10',
expect_errors=True)
assert res.status_code == 400
assert res.json == {"message": "Page 10 invalid: First Page is 0, Last Page is 9"}