mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
Catch RemoteDisconnected case when starting downloading
A common error is to connect to the remote server successfully but raise a `http_client.RemoteDisconnected` exception when trying to begin downloading. Its caused by call `prox_rec_res.begin(...)` which calls `http_client._read_status()`. In that case, we also add the target `hostname:port` to the `bad_hostnames_ports` cache. Modify 2 unit tests to clear the `bad_hostnames_ports` cache because localhost is added from previous tests and this breaks them.
This commit is contained in:
parent
75e789c15f
commit
89041e83b4
@ -1986,6 +1986,10 @@ def test_socket_timeout_response(
|
|||||||
def test_empty_response(
|
def test_empty_response(
|
||||||
warcprox_, http_daemon, https_daemon, archiving_proxies,
|
warcprox_, http_daemon, https_daemon, archiving_proxies,
|
||||||
playback_proxies):
|
playback_proxies):
|
||||||
|
# localhost:server_port was added to the `bad_hostnames_ports` cache by
|
||||||
|
# previous tests and this causes subsequent tests to fail. We clear it.
|
||||||
|
warcprox_.proxy.bad_hostnames_ports.clear()
|
||||||
|
|
||||||
url = 'http://localhost:%s/empty-response' % http_daemon.server_port
|
url = 'http://localhost:%s/empty-response' % http_daemon.server_port
|
||||||
response = requests.get(url, proxies=archiving_proxies, verify=False)
|
response = requests.get(url, proxies=archiving_proxies, verify=False)
|
||||||
assert response.status_code == 502
|
assert response.status_code == 502
|
||||||
@ -2001,6 +2005,10 @@ def test_payload_digest(warcprox_, http_daemon):
|
|||||||
Tests that digest is of RFC2616 "entity body"
|
Tests that digest is of RFC2616 "entity body"
|
||||||
(transfer-decoded but not content-decoded)
|
(transfer-decoded but not content-decoded)
|
||||||
'''
|
'''
|
||||||
|
# localhost:server_port was added to the `bad_hostnames_ports` cache by
|
||||||
|
# previous tests and this causes subsequent tests to fail. We clear it.
|
||||||
|
warcprox_.proxy.bad_hostnames_ports.clear()
|
||||||
|
|
||||||
class HalfMockedMitm(warcprox.mitmproxy.MitmProxyHandler):
|
class HalfMockedMitm(warcprox.mitmproxy.MitmProxyHandler):
|
||||||
def __init__(self, url):
|
def __init__(self, url):
|
||||||
self.path = url
|
self.path = url
|
||||||
|
@ -549,6 +549,18 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
|||||||
if not is_connection_dropped(self._remote_server_conn):
|
if not is_connection_dropped(self._remote_server_conn):
|
||||||
self._conn_pool._put_conn(self._remote_server_conn)
|
self._conn_pool._put_conn(self._remote_server_conn)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
# A common error is to connect to the remote server successfully
|
||||||
|
# but raise a `RemoteDisconnected` exception when trying to begin
|
||||||
|
# downloading. Its caused by prox_rec_res.begin(...) which calls
|
||||||
|
# http_client._read_status(). In that case, the host is also bad
|
||||||
|
# and we must add it to `bad_hostnames_ports` cache.
|
||||||
|
if type(e) == http_client.RemoteDisconnected:
|
||||||
|
with self.server.bad_hostnames_ports_lock:
|
||||||
|
host_port = self._hostname_port_cache_key()
|
||||||
|
self.server.bad_hostnames_ports[host_port] = 1
|
||||||
|
self.logger.info('bad_hostnames_ports cache size: %d',
|
||||||
|
len(self.server.bad_hostnames_ports))
|
||||||
|
|
||||||
self._remote_server_conn.sock.shutdown(socket.SHUT_RDWR)
|
self._remote_server_conn.sock.shutdown(socket.SHUT_RDWR)
|
||||||
self._remote_server_conn.sock.close()
|
self._remote_server_conn.sock.close()
|
||||||
raise
|
raise
|
||||||
|
Loading…
x
Reference in New Issue
Block a user