tweak tests to make them pass now that keepalive

is enabled on the test server
This commit is contained in:
Noah Levitt 2018-04-04 15:29:16 -07:00
parent 8ac0420cb2
commit 3f9ecbacac

View File

@ -279,6 +279,11 @@ class _TestHttpRequestHandler(http_server.BaseHTTPRequestHandler):
headers, payload = self.build_response()
self.connection.sendall(headers)
self.connection.sendall(payload)
if self.path == '/missing-content-length':
# response without content-length (and not chunked) must close the
# connection, else client has no idea if there is more data coming
self.connection.shutdown(socket.SHUT_RDWR)
self.connection.close()
def do_HEAD(self):
logging.info('HEAD {}'.format(self.path))
@ -364,8 +369,11 @@ def https_daemon(request, cert):
return https_daemon
# specify http_daemon and https_daemon as dependencies so that their finalizers
# run after warcprox is shut down, otherwise warcprox holds connections open
# and prevents the servers from shutting down
@pytest.fixture(scope="module")
def warcprox_(request):
def warcprox_(request, http_daemon, https_daemon):
orig_dir = os.getcwd()
work_dir = tempfile.mkdtemp()
logging.info('changing to working directory %r', work_dir)
@ -971,6 +979,14 @@ def test_domain_doc_soft_limit(
http_daemon, https_daemon, warcprox_, archiving_proxies):
urls_before = warcprox_.proxy.running_stats.urls
# we need to clear the connection pool here because
# - connection pool already may already have an open connection localhost
# - we're about to make a connection to foo.localhost
# - but our test server, which handles all the hosts, is single threaded
# - so it will fail to connect (socket timeout)
# must close connections before each connection to a different hostname
warcprox_.proxy.remote_connection_pool.clear()
request_meta = {
"stats": {"buckets": [{"bucket":"test_domain_doc_limit_bucket","tally-domains":["foo.localhost"]}]},
"soft-limits": {"test_domain_doc_limit_bucket:foo.localhost/total/urls":10},
@ -988,6 +1004,8 @@ def test_domain_doc_soft_limit(
# wait for postfetch chain
wait(lambda: warcprox_.proxy.running_stats.urls - urls_before == 1)
warcprox_.proxy.remote_connection_pool.clear()
# make sure stats from different domain don't count
url = 'http://bar.localhost:{}/o/p'.format(http_daemon.server_port)
for i in range(10):
@ -1000,6 +1018,8 @@ def test_domain_doc_soft_limit(
# wait for postfetch chain
wait(lambda: warcprox_.proxy.running_stats.urls - urls_before == 11)
warcprox_.proxy.remote_connection_pool.clear()
# (2) same host but different scheme and port: domain limit applies
url = 'https://foo.localhost:{}/o/p'.format(https_daemon.server_port)
response = requests.get(
@ -1009,6 +1029,8 @@ def test_domain_doc_soft_limit(
assert response.headers['warcprox-test-header'] == 'o!'
assert response.content == b'I am the warcprox test payload! pppppppppp!\n'
warcprox_.proxy.remote_connection_pool.clear()
# (3-9) different subdomain: host limit applies
url = 'https://baz.foo.localhost:{}/o/p'.format(https_daemon.server_port)
for i in range(7):
@ -1037,6 +1059,8 @@ def test_domain_doc_soft_limit(
# wait for postfetch chain
wait(lambda: warcprox_.proxy.running_stats.urls - urls_before == 20)
warcprox_.proxy.remote_connection_pool.clear()
# (11) back to http, and this is the 11th request
url = 'http://zuh.foo.localhost:{}/o/p'.format(http_daemon.server_port)
response = requests.get(
@ -1048,6 +1072,8 @@ def test_domain_doc_soft_limit(
assert response.headers["content-type"] == "text/plain;charset=utf-8"
assert response.raw.data == b"request rejected by warcprox: reached soft limit test_domain_doc_limit_bucket:foo.localhost/total/urls=10\n"
warcprox_.proxy.remote_connection_pool.clear()
# make sure limit doesn't get applied to a different domain
url = 'https://localhost:{}/o/p'.format(https_daemon.server_port)
response = requests.get(
@ -1060,6 +1086,8 @@ def test_domain_doc_soft_limit(
# wait for postfetch chain
wait(lambda: warcprox_.proxy.running_stats.urls - urls_before == 21)
warcprox_.proxy.remote_connection_pool.clear()
# https also blocked
url = 'https://zuh.foo.localhost:{}/o/p'.format(https_daemon.server_port)
response = requests.get(
@ -1072,6 +1100,8 @@ def test_domain_doc_soft_limit(
assert response.headers["content-type"] == "text/plain;charset=utf-8"
assert response.raw.data == b"request rejected by warcprox: reached soft limit test_domain_doc_limit_bucket:foo.localhost/total/urls=10\n"
warcprox_.proxy.remote_connection_pool.clear()
# same host, different capitalization still blocked
url = 'https://HEHEHE.fOO.lOcALhoST:{}/o/p'.format(https_daemon.server_port)
response = requests.get(
@ -1096,6 +1126,8 @@ def test_domain_data_soft_limit(
}
headers = {"Warcprox-Meta": json.dumps(request_meta)}
warcprox_.proxy.remote_connection_pool.clear()
url = 'http://ÞZz.localhost:{}/y/z'.format(http_daemon.server_port)
response = requests.get(
url, proxies=archiving_proxies, headers=headers, stream=True)
@ -1106,6 +1138,8 @@ def test_domain_data_soft_limit(
# wait for postfetch chain
wait(lambda: warcprox_.proxy.running_stats.urls - urls_before == 1)
warcprox_.proxy.remote_connection_pool.clear()
# duplicate, does not count toward limit
url = 'https://baz.Þzz.localhost:{}/y/z'.format(https_daemon.server_port)
response = requests.get(
@ -1118,6 +1152,8 @@ def test_domain_data_soft_limit(
# wait for postfetch chain
wait(lambda: warcprox_.proxy.running_stats.urls - urls_before == 2)
warcprox_.proxy.remote_connection_pool.clear()
# novel, pushes stats over the limit
url = 'https://muh.XN--Zz-2Ka.locALHOst:{}/z/~'.format(https_daemon.server_port)
response = requests.get(
@ -1130,6 +1166,8 @@ def test_domain_data_soft_limit(
# wait for postfetch chain
wait(lambda: warcprox_.proxy.running_stats.urls - urls_before == 3)
warcprox_.proxy.remote_connection_pool.clear()
# make sure limit doesn't get applied to a different host
url = 'http://baz.localhost:{}/z/~'.format(http_daemon.server_port)
response = requests.get(
@ -1141,6 +1179,8 @@ def test_domain_data_soft_limit(
# wait for postfetch chain
wait(lambda: warcprox_.proxy.running_stats.urls - urls_before == 4)
warcprox_.proxy.remote_connection_pool.clear()
# blocked because we're over the limit now
url = 'http://lOl.wHut.ÞZZ.lOcALHOst:{}/y/z'.format(http_daemon.server_port)
response = requests.get(