1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

add some comments, make charset parsing lower()

This commit is contained in:
Ilya Kreymer 2014-01-03 17:40:20 -08:00
parent c255f4e47f
commit d820a8c06a
2 changed files with 6 additions and 4 deletions

View File

@ -16,10 +16,10 @@ class RewrittenStatusAndHeaders:
class HeaderRewriter:
"""
# Text with charset
>>> test_rewrite([('Date', 'Fri, 03 Jan 2014 03:03:21 GMT'), ('Content-Length', '5'), ('Content-Type', 'text/html;charset=utf-8')])
>>> test_rewrite([('Date', 'Fri, 03 Jan 2014 03:03:21 GMT'), ('Content-Length', '5'), ('Content-Type', 'text/html;charset=UTF-8')])
{'status_headers': StatusAndHeaders(protocol = '', statusline = '200 OK', headers = [ ('X-Archive-Orig-Date', 'Fri, 03 Jan 2014 03:03:21 GMT'),
('X-Archive-Orig-Content-Length', '5'),
('Content-Type', 'text/html;charset=utf-8')]), 'charset': 'utf-8', 'textType': 'html', 'removedHeaderDict': {}}
('Content-Type', 'text/html;charset=UTF-8')]), 'charset': 'utf-8', 'textType': 'html', 'removedHeaderDict': {}}
# Redirect
>>> test_rewrite([('Connection', 'close'), ('Location', '/other.html')], '302 Redirect')
@ -91,7 +91,7 @@ class HeaderRewriter:
if idx < 0:
return None
return contentType[idx + len(CHARSET_TOKEN):]
return contentType[idx + len(CHARSET_TOKEN):].lower()
def _rewriteHeaders(self, headers, urlrewriter, contentRewritten = False):
newHeaders = []

View File

@ -40,10 +40,12 @@ class ReplayHandler(object):
cdxlist = query_response.body
last_e = None
first = True
# List of already failed w/arcs
failedFiles = []
# Iterate over the cdx until find one that works
# The cdx should already be sorted in closest-to-timestamp order (from the cdx server)
for cdx in cdxlist:
try:
cdx = indexreader.CDXCaptureResult(cdx)