From 89041e83b4707a8e4229eef3e7384849c5f1c3fd Mon Sep 17 00:00:00 2001 From: Vangelis Banos Date: Fri, 10 May 2019 07:32:42 +0000 Subject: [PATCH] Catch RemoteDisconnected case when starting downloading A common error is to connect to the remote server successfully but raise a `http_client.RemoteDisconnected` exception when trying to begin downloading. Its caused by call `prox_rec_res.begin(...)` which calls `http_client._read_status()`. In that case, we also add the target `hostname:port` to the `bad_hostnames_ports` cache. Modify 2 unit tests to clear the `bad_hostnames_ports` cache because localhost is added from previous tests and this breaks them. --- tests/test_warcprox.py | 8 ++++++++ warcprox/mitmproxy.py | 12 ++++++++++++ 2 files changed, 20 insertions(+) diff --git a/tests/test_warcprox.py b/tests/test_warcprox.py index 6c49f0a..4323a6c 100755 --- a/tests/test_warcprox.py +++ b/tests/test_warcprox.py @@ -1986,6 +1986,10 @@ def test_socket_timeout_response( def test_empty_response( warcprox_, http_daemon, https_daemon, archiving_proxies, playback_proxies): + # localhost:server_port was added to the `bad_hostnames_ports` cache by + # previous tests and this causes subsequent tests to fail. We clear it. + warcprox_.proxy.bad_hostnames_ports.clear() + url = 'http://localhost:%s/empty-response' % http_daemon.server_port response = requests.get(url, proxies=archiving_proxies, verify=False) assert response.status_code == 502 @@ -2001,6 +2005,10 @@ def test_payload_digest(warcprox_, http_daemon): Tests that digest is of RFC2616 "entity body" (transfer-decoded but not content-decoded) ''' + # localhost:server_port was added to the `bad_hostnames_ports` cache by + # previous tests and this causes subsequent tests to fail. We clear it. + warcprox_.proxy.bad_hostnames_ports.clear() + class HalfMockedMitm(warcprox.mitmproxy.MitmProxyHandler): def __init__(self, url): self.path = url diff --git a/warcprox/mitmproxy.py b/warcprox/mitmproxy.py index 39469d5..cc16281 100644 --- a/warcprox/mitmproxy.py +++ b/warcprox/mitmproxy.py @@ -549,6 +549,18 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler): if not is_connection_dropped(self._remote_server_conn): self._conn_pool._put_conn(self._remote_server_conn) except Exception as e: + # A common error is to connect to the remote server successfully + # but raise a `RemoteDisconnected` exception when trying to begin + # downloading. Its caused by prox_rec_res.begin(...) which calls + # http_client._read_status(). In that case, the host is also bad + # and we must add it to `bad_hostnames_ports` cache. + if type(e) == http_client.RemoteDisconnected: + with self.server.bad_hostnames_ports_lock: + host_port = self._hostname_port_cache_key() + self.server.bad_hostnames_ports[host_port] = 1 + self.logger.info('bad_hostnames_ports cache size: %d', + len(self.server.bad_hostnames_ports)) + self._remote_server_conn.sock.shutdown(socket.SHUT_RDWR) self._remote_server_conn.sock.close() raise