From 00121aa16520bb1957bbedc1b18761b00643c6b2 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Wed, 5 Nov 2014 20:26:23 -0800 Subject: [PATCH] statusandheaders parsing: properly skip multiline bad headers (missing header name and ':'), fixes #49 --- pywb/utils/statusandheaders.py | 3 ++- pywb/utils/test/test_statusandheaders.py | 12 ++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/pywb/utils/statusandheaders.py b/pywb/utils/statusandheaders.py index 70ba850c..3f429814 100644 --- a/pywb/utils/statusandheaders.py +++ b/pywb/utils/statusandheaders.py @@ -169,7 +169,8 @@ class StatusAndHeadersParser(object): # append continuation lines, if any while next_line and next_line.startswith((' ', '\t')): - value += next_line + if value is not None: + value += next_line next_line, total_read = _strip_count(stream.readline(), total_read) diff --git a/pywb/utils/test/test_statusandheaders.py b/pywb/utils/test/test_statusandheaders.py index 2ee894b9..1929d17d 100644 --- a/pywb/utils/test/test_statusandheaders.py +++ b/pywb/utils/test/test_statusandheaders.py @@ -32,6 +32,10 @@ False # empty >>> st2 = StatusAndHeadersParser(['HTTP/1.0']).parse(BytesIO(status_headers_2)); x = st2.validate_statusline('204 No Content'); st2 StatusAndHeaders(protocol = '', statusline = '204 No Content', headers = []) + + +>>> StatusAndHeadersParser(['HTTP/1.0']).parse(BytesIO(status_headers_3)) +StatusAndHeaders(protocol = 'HTTP/1.0', statusline = '204 Empty', headers = [('Content-Type', 'Value'), ('Content-Length', '0')]) """ @@ -54,6 +58,14 @@ status_headers_2 = """ """ +status_headers_3 = "\ +HTTP/1.0 204 Empty\r\n\ +Content-Type: Value\r\n\ +%Invalid%\r\n\ +\tMultiline\r\n\ +Content-Length: 0\r\n\ +\r\n" + if __name__ == "__main__": import doctest