mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
add some comments, make charset parsing lower()
This commit is contained in:
parent
c255f4e47f
commit
d820a8c06a
@ -16,10 +16,10 @@ class RewrittenStatusAndHeaders:
|
|||||||
class HeaderRewriter:
|
class HeaderRewriter:
|
||||||
"""
|
"""
|
||||||
# Text with charset
|
# Text with charset
|
||||||
>>> test_rewrite([('Date', 'Fri, 03 Jan 2014 03:03:21 GMT'), ('Content-Length', '5'), ('Content-Type', 'text/html;charset=utf-8')])
|
>>> test_rewrite([('Date', 'Fri, 03 Jan 2014 03:03:21 GMT'), ('Content-Length', '5'), ('Content-Type', 'text/html;charset=UTF-8')])
|
||||||
{'status_headers': StatusAndHeaders(protocol = '', statusline = '200 OK', headers = [ ('X-Archive-Orig-Date', 'Fri, 03 Jan 2014 03:03:21 GMT'),
|
{'status_headers': StatusAndHeaders(protocol = '', statusline = '200 OK', headers = [ ('X-Archive-Orig-Date', 'Fri, 03 Jan 2014 03:03:21 GMT'),
|
||||||
('X-Archive-Orig-Content-Length', '5'),
|
('X-Archive-Orig-Content-Length', '5'),
|
||||||
('Content-Type', 'text/html;charset=utf-8')]), 'charset': 'utf-8', 'textType': 'html', 'removedHeaderDict': {}}
|
('Content-Type', 'text/html;charset=UTF-8')]), 'charset': 'utf-8', 'textType': 'html', 'removedHeaderDict': {}}
|
||||||
|
|
||||||
# Redirect
|
# Redirect
|
||||||
>>> test_rewrite([('Connection', 'close'), ('Location', '/other.html')], '302 Redirect')
|
>>> test_rewrite([('Connection', 'close'), ('Location', '/other.html')], '302 Redirect')
|
||||||
@ -91,7 +91,7 @@ class HeaderRewriter:
|
|||||||
if idx < 0:
|
if idx < 0:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return contentType[idx + len(CHARSET_TOKEN):]
|
return contentType[idx + len(CHARSET_TOKEN):].lower()
|
||||||
|
|
||||||
def _rewriteHeaders(self, headers, urlrewriter, contentRewritten = False):
|
def _rewriteHeaders(self, headers, urlrewriter, contentRewritten = False):
|
||||||
newHeaders = []
|
newHeaders = []
|
||||||
|
@ -40,10 +40,12 @@ class ReplayHandler(object):
|
|||||||
cdxlist = query_response.body
|
cdxlist = query_response.body
|
||||||
last_e = None
|
last_e = None
|
||||||
first = True
|
first = True
|
||||||
|
|
||||||
# List of already failed w/arcs
|
# List of already failed w/arcs
|
||||||
failedFiles = []
|
failedFiles = []
|
||||||
|
|
||||||
|
# Iterate over the cdx until find one that works
|
||||||
|
# The cdx should already be sorted in closest-to-timestamp order (from the cdx server)
|
||||||
for cdx in cdxlist:
|
for cdx in cdxlist:
|
||||||
try:
|
try:
|
||||||
cdx = indexreader.CDXCaptureResult(cdx)
|
cdx = indexreader.CDXCaptureResult(cdx)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user