From 3f9ecbacaca861f42a6d05d1c514e1c01c4e1227 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Wed, 4 Apr 2018 15:29:16 -0700 Subject: [PATCH] tweak tests to make them pass now that keepalive is enabled on the test server --- tests/test_warcprox.py | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/tests/test_warcprox.py b/tests/test_warcprox.py index d175309..9b789a4 100755 --- a/tests/test_warcprox.py +++ b/tests/test_warcprox.py @@ -279,6 +279,11 @@ class _TestHttpRequestHandler(http_server.BaseHTTPRequestHandler): headers, payload = self.build_response() self.connection.sendall(headers) self.connection.sendall(payload) + if self.path == '/missing-content-length': + # response without content-length (and not chunked) must close the + # connection, else client has no idea if there is more data coming + self.connection.shutdown(socket.SHUT_RDWR) + self.connection.close() def do_HEAD(self): logging.info('HEAD {}'.format(self.path)) @@ -364,8 +369,11 @@ def https_daemon(request, cert): return https_daemon +# specify http_daemon and https_daemon as dependencies so that their finalizers +# run after warcprox is shut down, otherwise warcprox holds connections open +# and prevents the servers from shutting down @pytest.fixture(scope="module") -def warcprox_(request): +def warcprox_(request, http_daemon, https_daemon): orig_dir = os.getcwd() work_dir = tempfile.mkdtemp() logging.info('changing to working directory %r', work_dir) @@ -971,6 +979,14 @@ def test_domain_doc_soft_limit( http_daemon, https_daemon, warcprox_, archiving_proxies): urls_before = warcprox_.proxy.running_stats.urls + # we need to clear the connection pool here because + # - connection pool already may already have an open connection localhost + # - we're about to make a connection to foo.localhost + # - but our test server, which handles all the hosts, is single threaded + # - so it will fail to connect (socket timeout) + # must close connections before each connection to a different hostname + warcprox_.proxy.remote_connection_pool.clear() + request_meta = { "stats": {"buckets": [{"bucket":"test_domain_doc_limit_bucket","tally-domains":["foo.localhost"]}]}, "soft-limits": {"test_domain_doc_limit_bucket:foo.localhost/total/urls":10}, @@ -988,6 +1004,8 @@ def test_domain_doc_soft_limit( # wait for postfetch chain wait(lambda: warcprox_.proxy.running_stats.urls - urls_before == 1) + warcprox_.proxy.remote_connection_pool.clear() + # make sure stats from different domain don't count url = 'http://bar.localhost:{}/o/p'.format(http_daemon.server_port) for i in range(10): @@ -1000,6 +1018,8 @@ def test_domain_doc_soft_limit( # wait for postfetch chain wait(lambda: warcprox_.proxy.running_stats.urls - urls_before == 11) + warcprox_.proxy.remote_connection_pool.clear() + # (2) same host but different scheme and port: domain limit applies url = 'https://foo.localhost:{}/o/p'.format(https_daemon.server_port) response = requests.get( @@ -1009,6 +1029,8 @@ def test_domain_doc_soft_limit( assert response.headers['warcprox-test-header'] == 'o!' assert response.content == b'I am the warcprox test payload! pppppppppp!\n' + warcprox_.proxy.remote_connection_pool.clear() + # (3-9) different subdomain: host limit applies url = 'https://baz.foo.localhost:{}/o/p'.format(https_daemon.server_port) for i in range(7): @@ -1037,6 +1059,8 @@ def test_domain_doc_soft_limit( # wait for postfetch chain wait(lambda: warcprox_.proxy.running_stats.urls - urls_before == 20) + warcprox_.proxy.remote_connection_pool.clear() + # (11) back to http, and this is the 11th request url = 'http://zuh.foo.localhost:{}/o/p'.format(http_daemon.server_port) response = requests.get( @@ -1048,6 +1072,8 @@ def test_domain_doc_soft_limit( assert response.headers["content-type"] == "text/plain;charset=utf-8" assert response.raw.data == b"request rejected by warcprox: reached soft limit test_domain_doc_limit_bucket:foo.localhost/total/urls=10\n" + warcprox_.proxy.remote_connection_pool.clear() + # make sure limit doesn't get applied to a different domain url = 'https://localhost:{}/o/p'.format(https_daemon.server_port) response = requests.get( @@ -1060,6 +1086,8 @@ def test_domain_doc_soft_limit( # wait for postfetch chain wait(lambda: warcprox_.proxy.running_stats.urls - urls_before == 21) + warcprox_.proxy.remote_connection_pool.clear() + # https also blocked url = 'https://zuh.foo.localhost:{}/o/p'.format(https_daemon.server_port) response = requests.get( @@ -1072,6 +1100,8 @@ def test_domain_doc_soft_limit( assert response.headers["content-type"] == "text/plain;charset=utf-8" assert response.raw.data == b"request rejected by warcprox: reached soft limit test_domain_doc_limit_bucket:foo.localhost/total/urls=10\n" + warcprox_.proxy.remote_connection_pool.clear() + # same host, different capitalization still blocked url = 'https://HEHEHE.fOO.lOcALhoST:{}/o/p'.format(https_daemon.server_port) response = requests.get( @@ -1096,6 +1126,8 @@ def test_domain_data_soft_limit( } headers = {"Warcprox-Meta": json.dumps(request_meta)} + warcprox_.proxy.remote_connection_pool.clear() + url = 'http://ÞZz.localhost:{}/y/z'.format(http_daemon.server_port) response = requests.get( url, proxies=archiving_proxies, headers=headers, stream=True) @@ -1106,6 +1138,8 @@ def test_domain_data_soft_limit( # wait for postfetch chain wait(lambda: warcprox_.proxy.running_stats.urls - urls_before == 1) + warcprox_.proxy.remote_connection_pool.clear() + # duplicate, does not count toward limit url = 'https://baz.Þzz.localhost:{}/y/z'.format(https_daemon.server_port) response = requests.get( @@ -1118,6 +1152,8 @@ def test_domain_data_soft_limit( # wait for postfetch chain wait(lambda: warcprox_.proxy.running_stats.urls - urls_before == 2) + warcprox_.proxy.remote_connection_pool.clear() + # novel, pushes stats over the limit url = 'https://muh.XN--Zz-2Ka.locALHOst:{}/z/~'.format(https_daemon.server_port) response = requests.get( @@ -1130,6 +1166,8 @@ def test_domain_data_soft_limit( # wait for postfetch chain wait(lambda: warcprox_.proxy.running_stats.urls - urls_before == 3) + warcprox_.proxy.remote_connection_pool.clear() + # make sure limit doesn't get applied to a different host url = 'http://baz.localhost:{}/z/~'.format(http_daemon.server_port) response = requests.get( @@ -1141,6 +1179,8 @@ def test_domain_data_soft_limit( # wait for postfetch chain wait(lambda: warcprox_.proxy.running_stats.urls - urls_before == 4) + warcprox_.proxy.remote_connection_pool.clear() + # blocked because we're over the limit now url = 'http://lOl.wHut.ÞZZ.lOcALHOst:{}/y/z'.format(http_daemon.server_port) response = requests.get(