From d820a8c06a2447fe1a1f7aea63ca8b6f23b5084c Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Fri, 3 Jan 2014 17:40:20 -0800 Subject: [PATCH] add some comments, make charset parsing lower() --- pywb/header_rewriter.py | 6 +++--- pywb/replay.py | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/pywb/header_rewriter.py b/pywb/header_rewriter.py index 3b539082..57d790b4 100644 --- a/pywb/header_rewriter.py +++ b/pywb/header_rewriter.py @@ -16,10 +16,10 @@ class RewrittenStatusAndHeaders: class HeaderRewriter: """ # Text with charset - >>> test_rewrite([('Date', 'Fri, 03 Jan 2014 03:03:21 GMT'), ('Content-Length', '5'), ('Content-Type', 'text/html;charset=utf-8')]) + >>> test_rewrite([('Date', 'Fri, 03 Jan 2014 03:03:21 GMT'), ('Content-Length', '5'), ('Content-Type', 'text/html;charset=UTF-8')]) {'status_headers': StatusAndHeaders(protocol = '', statusline = '200 OK', headers = [ ('X-Archive-Orig-Date', 'Fri, 03 Jan 2014 03:03:21 GMT'), ('X-Archive-Orig-Content-Length', '5'), - ('Content-Type', 'text/html;charset=utf-8')]), 'charset': 'utf-8', 'textType': 'html', 'removedHeaderDict': {}} + ('Content-Type', 'text/html;charset=UTF-8')]), 'charset': 'utf-8', 'textType': 'html', 'removedHeaderDict': {}} # Redirect >>> test_rewrite([('Connection', 'close'), ('Location', '/other.html')], '302 Redirect') @@ -91,7 +91,7 @@ class HeaderRewriter: if idx < 0: return None - return contentType[idx + len(CHARSET_TOKEN):] + return contentType[idx + len(CHARSET_TOKEN):].lower() def _rewriteHeaders(self, headers, urlrewriter, contentRewritten = False): newHeaders = [] diff --git a/pywb/replay.py b/pywb/replay.py index 1b8816e9..659f7aa9 100644 --- a/pywb/replay.py +++ b/pywb/replay.py @@ -40,10 +40,12 @@ class ReplayHandler(object): cdxlist = query_response.body last_e = None first = True - + # List of already failed w/arcs failedFiles = [] + # Iterate over the cdx until find one that works + # The cdx should already be sorted in closest-to-timestamp order (from the cdx server) for cdx in cdxlist: try: cdx = indexreader.CDXCaptureResult(cdx)