From 4fdcdc98ae88cf4a092b54494d519c3a51048bdc Mon Sep 17 00:00:00 2001 From: archiveit Date: Sat, 8 Mar 2014 23:46:59 +0000 Subject: [PATCH] replay: ignore 304 captures --- pywb/core/replay_views.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/pywb/core/replay_views.py b/pywb/core/replay_views.py index b627a05a..7de46f21 100644 --- a/pywb/core/replay_views.py +++ b/pywb/core/replay_views.py @@ -178,12 +178,20 @@ class ReplayView: Check if response is a 3xx redirect to the same url If so, reject this capture to avoid causing redirect loop """ - if status_headers.statusline.startswith('3'): - request_url = wbrequest.wb_url.url.lower() - location_url = status_headers.get_header('Location').lower() + if not status_headers.statusline.startswith('3'): + return - if (UrlRewriter.strip_protocol(request_url) == UrlRewriter.strip_protocol(location_url)): - raise CaptureException('Self Redirect: ' + str(cdx)) + request_url = wbrequest.wb_url.url.lower() + location_url = status_headers.get_header('Location') + if not location_url: + if status_headers.statusline.startswith('304'): + raise CaptureException('Skipping 304 Modified: ' + str(cdx)) + return + + location_url = location_url.lower() + + if (UrlRewriter.strip_protocol(request_url) == UrlRewriter.strip_protocol(location_url)): + raise CaptureException('Self Redirect: ' + str(cdx)) def _reject_referrer_self_redirect(self, wbrequest): """