Catch RemoteDisconnected case when starting downloading

A common error is to connect to the remote server successfully but raise a
`http_client.RemoteDisconnected` exception when trying to begin
downloading. Its caused by call `prox_rec_res.begin(...)` which calls
`http_client._read_status()`. In that case, we also add the target
`hostname:port` to the `bad_hostnames_ports` cache.

Modify 2 unit tests to clear the `bad_hostnames_ports` cache because
localhost is added from previous tests and this breaks them.
This commit is contained in:
Vangelis Banos 2019-05-10 07:32:42 +00:00
parent 75e789c15f
commit 89041e83b4
2 changed files with 20 additions and 0 deletions

View File

@ -1986,6 +1986,10 @@ def test_socket_timeout_response(
def test_empty_response(
warcprox_, http_daemon, https_daemon, archiving_proxies,
playback_proxies):
# localhost:server_port was added to the `bad_hostnames_ports` cache by
# previous tests and this causes subsequent tests to fail. We clear it.
warcprox_.proxy.bad_hostnames_ports.clear()
url = 'http://localhost:%s/empty-response' % http_daemon.server_port
response = requests.get(url, proxies=archiving_proxies, verify=False)
assert response.status_code == 502
@ -2001,6 +2005,10 @@ def test_payload_digest(warcprox_, http_daemon):
Tests that digest is of RFC2616 "entity body"
(transfer-decoded but not content-decoded)
'''
# localhost:server_port was added to the `bad_hostnames_ports` cache by
# previous tests and this causes subsequent tests to fail. We clear it.
warcprox_.proxy.bad_hostnames_ports.clear()
class HalfMockedMitm(warcprox.mitmproxy.MitmProxyHandler):
def __init__(self, url):
self.path = url

View File

@ -549,6 +549,18 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
if not is_connection_dropped(self._remote_server_conn):
self._conn_pool._put_conn(self._remote_server_conn)
except Exception as e:
# A common error is to connect to the remote server successfully
# but raise a `RemoteDisconnected` exception when trying to begin
# downloading. Its caused by prox_rec_res.begin(...) which calls
# http_client._read_status(). In that case, the host is also bad
# and we must add it to `bad_hostnames_ports` cache.
if type(e) == http_client.RemoteDisconnected:
with self.server.bad_hostnames_ports_lock:
host_port = self._hostname_port_cache_key()
self.server.bad_hostnames_ports[host_port] = 1
self.logger.info('bad_hostnames_ports cache size: %d',
len(self.server.bad_hostnames_ports))
self._remote_server_conn.sock.shutdown(socket.SHUT_RDWR)
self._remote_server_conn.sock.close()
raise