From 0002d29f0d734c5d1f230965f90f43db1d3eccb7 Mon Sep 17 00:00:00 2001 From: Vangelis Banos Date: Fri, 16 Mar 2018 21:06:34 +0000 Subject: [PATCH] Improve Connection Pool Set connection pool maxsize to 6 (borrowing from browser behavior). Set num_pools to `max_threads / 6` but set a minimum of 200 for the cases that we use a very low number of `max_threads`. Remove `connection_is_fine` variable from connection code. Fix http headers bug introduced in the previous commit. --- warcprox/mitmproxy.py | 16 +++++++--------- warcprox/warcproxy.py | 2 +- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/warcprox/mitmproxy.py b/warcprox/mitmproxy.py index 1482210..e87975c 100644 --- a/warcprox/mitmproxy.py +++ b/warcprox/mitmproxy.py @@ -176,7 +176,7 @@ class ProxyingRecordingHTTPResponse(http_client.HTTPResponse): for k,v in self.msg.items(): if k.lower() not in ( - 'connection', 'proxy-connection', + 'connection', 'proxy-connection', 'keep-alive', 'proxy-authenticate', 'proxy-authorization', 'upgrade', 'strict-transport-security'): status_and_headers += '{}: {}\r\n'.format(k, v) @@ -247,7 +247,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler): ''' self._conn_pool = self.server.remote_connection_pool.connection_from_host( host=self.hostname, port=int(self.port), scheme='http', - pool_kwargs={'maxsize': 30}) + pool_kwargs={'maxsize': 6}) self._remote_server_conn = self._conn_pool._get_conn() if is_connection_dropped(self._remote_server_conn): @@ -426,7 +426,6 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler): req += self.rfile.read(int(self.headers['Content-Length'])) prox_rec_res = None - connection_is_fine = False try: self.logger.debug('sending to remote server req=%r', req) @@ -450,17 +449,16 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler): self._max_resource_size, self.url) break - connection_is_fine = True self.log_request(prox_rec_res.status, prox_rec_res.recorder.len) - finally: # Let's close off the remote end. If remote connection is fine, # put it back in the pool to reuse it later. + if not is_connection_dropped(self._remote_server_conn): + self._conn_pool._put_conn(self._remote_server_conn) + except: + self._remote_server_conn.sock.close() + finally: if prox_rec_res: prox_rec_res.close() - if connection_is_fine and not is_connection_dropped(self._remote_server_conn): - self._conn_pool._put_conn(self._remote_server_conn) - else: - self._remote_server_conn.sock.close() return req, prox_rec_res diff --git a/warcprox/warcproxy.py b/warcprox/warcproxy.py index 88a9c34..97da984 100644 --- a/warcprox/warcproxy.py +++ b/warcprox/warcproxy.py @@ -389,7 +389,7 @@ class SingleThreadedWarcProxy(http_server.HTTPServer, object): self.stats_db = stats_db self.options = options self.remote_connection_pool = PoolManager( - num_pools=max(options.max_threads, 500) if options.max_threads else 500) + num_pools=max(round(options.max_threads / 6), 200) if options.max_threads else 200) server_address = ( options.address or 'localhost', options.port if options.port is not None else 8000)