mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
Catch RemoteDisconnected case when starting downloading
A common error is to connect to the remote server successfully but raise a `http_client.RemoteDisconnected` exception when trying to begin downloading. Its caused by call `prox_rec_res.begin(...)` which calls `http_client._read_status()`. In that case, we also add the target `hostname:port` to the `bad_hostnames_ports` cache. Modify 2 unit tests to clear the `bad_hostnames_ports` cache because localhost is added from previous tests and this breaks them.
This commit is contained in:
parent
75e789c15f
commit
89041e83b4
@ -1986,6 +1986,10 @@ def test_socket_timeout_response(
|
||||
def test_empty_response(
|
||||
warcprox_, http_daemon, https_daemon, archiving_proxies,
|
||||
playback_proxies):
|
||||
# localhost:server_port was added to the `bad_hostnames_ports` cache by
|
||||
# previous tests and this causes subsequent tests to fail. We clear it.
|
||||
warcprox_.proxy.bad_hostnames_ports.clear()
|
||||
|
||||
url = 'http://localhost:%s/empty-response' % http_daemon.server_port
|
||||
response = requests.get(url, proxies=archiving_proxies, verify=False)
|
||||
assert response.status_code == 502
|
||||
@ -2001,6 +2005,10 @@ def test_payload_digest(warcprox_, http_daemon):
|
||||
Tests that digest is of RFC2616 "entity body"
|
||||
(transfer-decoded but not content-decoded)
|
||||
'''
|
||||
# localhost:server_port was added to the `bad_hostnames_ports` cache by
|
||||
# previous tests and this causes subsequent tests to fail. We clear it.
|
||||
warcprox_.proxy.bad_hostnames_ports.clear()
|
||||
|
||||
class HalfMockedMitm(warcprox.mitmproxy.MitmProxyHandler):
|
||||
def __init__(self, url):
|
||||
self.path = url
|
||||
|
@ -549,6 +549,18 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
||||
if not is_connection_dropped(self._remote_server_conn):
|
||||
self._conn_pool._put_conn(self._remote_server_conn)
|
||||
except Exception as e:
|
||||
# A common error is to connect to the remote server successfully
|
||||
# but raise a `RemoteDisconnected` exception when trying to begin
|
||||
# downloading. Its caused by prox_rec_res.begin(...) which calls
|
||||
# http_client._read_status(). In that case, the host is also bad
|
||||
# and we must add it to `bad_hostnames_ports` cache.
|
||||
if type(e) == http_client.RemoteDisconnected:
|
||||
with self.server.bad_hostnames_ports_lock:
|
||||
host_port = self._hostname_port_cache_key()
|
||||
self.server.bad_hostnames_ports[host_port] = 1
|
||||
self.logger.info('bad_hostnames_ports cache size: %d',
|
||||
len(self.server.bad_hostnames_ports))
|
||||
|
||||
self._remote_server_conn.sock.shutdown(socket.SHUT_RDWR)
|
||||
self._remote_server_conn.sock.close()
|
||||
raise
|
||||
|
Loading…
x
Reference in New Issue
Block a user