diff --git a/tests/test_warcprox.py b/tests/test_warcprox.py index 7c5253b..0e60319 100755 --- a/tests/test_warcprox.py +++ b/tests/test_warcprox.py @@ -709,6 +709,7 @@ def test_limits(http_daemon, warcprox_, archiving_proxies): # wait for postfetch chain wait(lambda: warcprox_.proxy.running_stats.urls - urls_before == 10) + # next fetch hits the limit response = requests.get(url, proxies=archiving_proxies, headers=headers, stream=True) assert response.status_code == 420 assert response.reason == "Reached limit" @@ -717,6 +718,14 @@ def test_limits(http_daemon, warcprox_, archiving_proxies): assert response.headers["content-type"] == "text/plain;charset=utf-8" assert response.raw.data == b"request rejected by warcprox: reached limit test_limits_bucket/total/urls=10\n" + # make sure limit doesn't get applied to a different stats bucket + request_meta = {"stats":{"buckets":["no_limits_bucket"]},"limits":{"test_limits_bucket/total/urls":10}} + headers = {"Warcprox-Meta": json.dumps(request_meta)} + response = requests.get(url, proxies=archiving_proxies, headers=headers, stream=True) + assert response.status_code == 200 + assert response.headers['warcprox-test-header'] == 'i!' + assert response.content == b'I am the warcprox test payload! jjjjjjjjjj!\n' + def test_return_capture_timestamp(http_daemon, warcprox_, archiving_proxies): urls_before = warcprox_.proxy.running_stats.urls @@ -999,6 +1008,7 @@ def test_domain_doc_soft_limit( http_daemon, https_daemon, warcprox_, archiving_proxies): urls_before = warcprox_.proxy.running_stats.urls + # ** comment is obsolete (server is multithreaded) but still useful ** # we need to clear the connection pool here because # - connection pool already may already have an open connection localhost # - we're about to make a connection to foo.localhost @@ -1134,6 +1144,20 @@ def test_domain_doc_soft_limit( assert response.headers["content-type"] == "text/plain;charset=utf-8" assert response.raw.data == b"request rejected by warcprox: reached soft limit test_domain_doc_limit_bucket:foo.localhost/total/urls=10\n" + # make sure soft limit doesn't get applied to a different stats bucket + request_meta = { + "stats": {"buckets": [{"bucket":"no_limit_bucket","tally-domains":["foo.localhost"]}]}, + "soft-limits": {"test_domain_doc_limit_bucket:foo.localhost/total/urls":10}, + } + headers = {"Warcprox-Meta": json.dumps(request_meta)} + url = 'http://zuh.foo.localhost:{}/o/p'.format(http_daemon.server_port) + response = requests.get( + url, proxies=archiving_proxies, headers=headers, stream=True, + verify=False) + assert response.status_code == 200 + assert response.headers['warcprox-test-header'] == 'o!' + assert response.content == b'I am the warcprox test payload! pppppppppp!\n' + def test_domain_data_soft_limit( http_daemon, https_daemon, warcprox_, archiving_proxies): urls_before = warcprox_.proxy.running_stats.urls @@ -1228,6 +1252,19 @@ def test_domain_data_soft_limit( ### assert response.headers["content-type"] == "text/plain;charset=utf-8" ### assert response.raw.data == b"request rejected by warcprox: reached soft limit test_domain_data_limit_bucket:xn--zz-2ka.localhost/new/wire_bytes=200\n" + # make sure soft limit doesn't get applied to a different stats bucket + request_meta = { + "stats": {"buckets": [{"bucket":"no_limit_bucket","tally-domains":['ÞzZ.LOCALhost']}]}, + "soft-limits": {"test_domain_data_limit_bucket:ÞZZ.localhost/new/wire_bytes":200}, + } + headers = {"Warcprox-Meta": json.dumps(request_meta)} + url = 'http://ÞZz.localhost:{}/y/z'.format(http_daemon.server_port) + response = requests.get( + url, proxies=archiving_proxies, headers=headers, stream=True) + assert response.status_code == 200 + assert response.headers['warcprox-test-header'] == 'y!' + assert response.content == b'I am the warcprox test payload! zzzzzzzzzz!\n' + # XXX this test relies on a tor proxy running at localhost:9050 with a working # connection to the internet, and relies on a third party site (facebook) being # up and behaving a certain way