From 76abe4b7538ba1797350dd7343cb18c4fcc93367 Mon Sep 17 00:00:00 2001 From: Vangelis Banos Date: Mon, 10 Jun 2019 06:26:26 +0000 Subject: [PATCH] Catch BadStatusLine exception When trying to begin downloading from a remote host, we may get a `RemoteDisconnected` exception if it returns no data. We already handle that. We may also get `BadStatusLine` in case the response HTTP status is not fine. https://github.com/python/cpython/blob/3.7/Lib/http/client.py#L288 We should also add these cases in bad hosts cache. --- warcprox/mitmproxy.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/warcprox/mitmproxy.py b/warcprox/mitmproxy.py index d6a0593..deb3a52 100644 --- a/warcprox/mitmproxy.py +++ b/warcprox/mitmproxy.py @@ -572,9 +572,15 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler): # A common error is to connect to the remote server successfully # but raise a `RemoteDisconnected` exception when trying to begin # downloading. Its caused by prox_rec_res.begin(...) which calls - # http_client._read_status(). In that case, the host is also bad - # and we must add it to `bad_hostnames_ports` cache. - if isinstance(e, http_client.RemoteDisconnected): + # http_client._read_status(). The connection fails there. + # https://github.com/python/cpython/blob/3.7/Lib/http/client.py#L275 + # Another case is when the connection is fine but the response + # status is problematic, raising `BadStatusLine`. + # https://github.com/python/cpython/blob/3.7/Lib/http/client.py#L296 + # In both cases, the host is bad and we must add it to + # `bad_hostnames_ports` cache. + if isinstance(e, (http_client.RemoteDisconnected, + http_client.BadStatusLine)): host_port = self._hostname_port_cache_key() with self.server.bad_hostnames_ports_lock: self.server.bad_hostnames_ports[host_port] = 502