1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

Handle CDXException and respond with HTTP 400 Bad Request (#626)

* FrontendApp: forward HTTP status of CDX backend to allow clients
to handle errors more easily

* Handle CDXExceptions properly, returning the exception status code
- make that CDXException is raised early so that it can be handled
  in the IndexHandler
This commit is contained in:
Sebastian Nagel 2021-04-27 05:51:33 +02:00 committed by GitHub
parent 13ea5baee5
commit 212691bd38
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 25 additions and 2 deletions

View File

@ -4,6 +4,7 @@ from pywb.utils.memento import MementoUtils
from warcio.recordloader import ArchiveLoadFailed
from pywb.warcserver.index.cdxobject import CDXException
from pywb.warcserver.index.fuzzymatcher import FuzzyMatcher
from pywb.warcserver.resource.responseloader import WARCPathLoader, LiveWebLoader, VideoLoader
@ -98,13 +99,27 @@ class IndexHandler(object):
content_type, res = handler(cdx_iter, fields, params)
out_headers = {'Content-Type': content_type}
def check_str(lines):
first_line = None
try:
# raise exceptions early so that they can be handled properly
first_line = next(res)
except StopIteration:
pass
except CDXException as e:
errs = dict(last_exc=e)
return None, None, errs
def check_str(first_line, lines):
if first_line is not None:
if isinstance(first_line, six.text_type):
first_line = first_line.encode('utf-8')
yield first_line
for line in lines:
if isinstance(line, six.text_type):
line = line.encode('utf-8')
yield line
return out_headers, check_str(res), errs
return out_headers, check_str(first_line, res), errs
#=============================================================================

View File

@ -46,5 +46,13 @@ class TestZipnumAutoDir(CollsDirMixin, BaseConfigTest):
assert lines[2] == {"urlkey": "org,iana)/_css/2013.1/fonts/opensans-regular.ttf 20140126200654", "part": "zipnum", "offset": 1692, "length": 235, "lineno": 7}
assert lines[3] == {"urlkey": "org,iana)/_css/2013.1/fonts/opensans-regular.ttf 20140126200816", "part": "zipnum", "offset": 1927, "length": 231, "lineno": 8}
def test_paged_index_query_out_of_range(self):
res = self.testapp.get(
'/testzip/cdx?url=http://iana.org/domains/&matchType=domain&output=json&showPagedIndex=true&pageSize=4&page=10',
expect_errors=True)
assert res.status_code == 400
assert res.json == {"message": "Page 10 invalid: First Page is 0, Last Page is 9"}