From 0cab6fc4bf4b13e1ecadd862e84d44ce70b8e08c Mon Sep 17 00:00:00 2001 From: Vangelis Banos Date: Mon, 8 Apr 2019 16:13:14 +0000 Subject: [PATCH] Increase the MAXHEADERS limit of http client `http.client` has an arbitrary limit of MAXHEADERS=100. If a target URL has more it raises an HTTPException and the request fails. (The target pages are perfectly fine besides having more than 100 headers). https://github.com/python/cpython/blob/3.7/Lib/http/client.py#L113 We increase this limit to 7000. We currently use this in production WBM. We bumped into the same issue trying to replay pages with too many HTTP headers. We increased the limit progressively from 100 to 500, 1000 etc and we found that 7000 is a good place to stop. --- warcprox/mitmproxy.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/warcprox/mitmproxy.py b/warcprox/mitmproxy.py index b8e7d74..5b24f30 100644 --- a/warcprox/mitmproxy.py +++ b/warcprox/mitmproxy.py @@ -45,6 +45,11 @@ try: http_client._MAXLINE = 4194304 # 4 MiB except ImportError: import httplib as http_client +# http_client has an arbitrary limit of 100 HTTP Headers which is too low and +# it raises an HTTPException if the target URL has more. +# https://github.com/python/cpython/blob/3.7/Lib/http/client.py#L113 +http_client._MAXHEADERS = 7000 + import json import socket import logging