From f30b2804371c63b76ed1f63251512ce5bc6269ac Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Fri, 2 Mar 2018 14:50:17 -0800 Subject: [PATCH] self-redirect check: run redirect check if status code is blank or does not start with 2, 4, 5, to more aggressively check invalid status codes, should fix ukwa/ukwa-pywb#21 --- pywb/warcserver/resource/responseloader.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pywb/warcserver/resource/responseloader.py b/pywb/warcserver/resource/responseloader.py index dd964c1d..7f4aa297 100644 --- a/pywb/warcserver/resource/responseloader.py +++ b/pywb/warcserver/resource/responseloader.py @@ -204,8 +204,10 @@ class WARCPathLoader(DefaultResolverMixin, BaseLoader): http_headers_buff = None if payload.rec_type in ('response', 'revisit'): status = cdx.get('status') - # status may not be set for 'revisit' - if not status or status.startswith('3'): + + # if status is not set and not, 2xx, 4xx, 5xx + # go through self-redirect check just in case + if not status or not status.startswith(('2', '4', '5')): http_headers = self.headers_parser.parse(payload.raw_stream) try: