mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
new checks exposing bug in limits enforcement
This commit is contained in:
parent
1e76ed3302
commit
195faa5cff
@ -709,6 +709,7 @@ def test_limits(http_daemon, warcprox_, archiving_proxies):
|
|||||||
# wait for postfetch chain
|
# wait for postfetch chain
|
||||||
wait(lambda: warcprox_.proxy.running_stats.urls - urls_before == 10)
|
wait(lambda: warcprox_.proxy.running_stats.urls - urls_before == 10)
|
||||||
|
|
||||||
|
# next fetch hits the limit
|
||||||
response = requests.get(url, proxies=archiving_proxies, headers=headers, stream=True)
|
response = requests.get(url, proxies=archiving_proxies, headers=headers, stream=True)
|
||||||
assert response.status_code == 420
|
assert response.status_code == 420
|
||||||
assert response.reason == "Reached limit"
|
assert response.reason == "Reached limit"
|
||||||
@ -717,6 +718,14 @@ def test_limits(http_daemon, warcprox_, archiving_proxies):
|
|||||||
assert response.headers["content-type"] == "text/plain;charset=utf-8"
|
assert response.headers["content-type"] == "text/plain;charset=utf-8"
|
||||||
assert response.raw.data == b"request rejected by warcprox: reached limit test_limits_bucket/total/urls=10\n"
|
assert response.raw.data == b"request rejected by warcprox: reached limit test_limits_bucket/total/urls=10\n"
|
||||||
|
|
||||||
|
# make sure limit doesn't get applied to a different stats bucket
|
||||||
|
request_meta = {"stats":{"buckets":["no_limits_bucket"]},"limits":{"test_limits_bucket/total/urls":10}}
|
||||||
|
headers = {"Warcprox-Meta": json.dumps(request_meta)}
|
||||||
|
response = requests.get(url, proxies=archiving_proxies, headers=headers, stream=True)
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert response.headers['warcprox-test-header'] == 'i!'
|
||||||
|
assert response.content == b'I am the warcprox test payload! jjjjjjjjjj!\n'
|
||||||
|
|
||||||
def test_return_capture_timestamp(http_daemon, warcprox_, archiving_proxies):
|
def test_return_capture_timestamp(http_daemon, warcprox_, archiving_proxies):
|
||||||
urls_before = warcprox_.proxy.running_stats.urls
|
urls_before = warcprox_.proxy.running_stats.urls
|
||||||
|
|
||||||
@ -999,6 +1008,7 @@ def test_domain_doc_soft_limit(
|
|||||||
http_daemon, https_daemon, warcprox_, archiving_proxies):
|
http_daemon, https_daemon, warcprox_, archiving_proxies):
|
||||||
urls_before = warcprox_.proxy.running_stats.urls
|
urls_before = warcprox_.proxy.running_stats.urls
|
||||||
|
|
||||||
|
# ** comment is obsolete (server is multithreaded) but still useful **
|
||||||
# we need to clear the connection pool here because
|
# we need to clear the connection pool here because
|
||||||
# - connection pool already may already have an open connection localhost
|
# - connection pool already may already have an open connection localhost
|
||||||
# - we're about to make a connection to foo.localhost
|
# - we're about to make a connection to foo.localhost
|
||||||
@ -1134,6 +1144,20 @@ def test_domain_doc_soft_limit(
|
|||||||
assert response.headers["content-type"] == "text/plain;charset=utf-8"
|
assert response.headers["content-type"] == "text/plain;charset=utf-8"
|
||||||
assert response.raw.data == b"request rejected by warcprox: reached soft limit test_domain_doc_limit_bucket:foo.localhost/total/urls=10\n"
|
assert response.raw.data == b"request rejected by warcprox: reached soft limit test_domain_doc_limit_bucket:foo.localhost/total/urls=10\n"
|
||||||
|
|
||||||
|
# make sure soft limit doesn't get applied to a different stats bucket
|
||||||
|
request_meta = {
|
||||||
|
"stats": {"buckets": [{"bucket":"no_limit_bucket","tally-domains":["foo.localhost"]}]},
|
||||||
|
"soft-limits": {"test_domain_doc_limit_bucket:foo.localhost/total/urls":10},
|
||||||
|
}
|
||||||
|
headers = {"Warcprox-Meta": json.dumps(request_meta)}
|
||||||
|
url = 'http://zuh.foo.localhost:{}/o/p'.format(http_daemon.server_port)
|
||||||
|
response = requests.get(
|
||||||
|
url, proxies=archiving_proxies, headers=headers, stream=True,
|
||||||
|
verify=False)
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert response.headers['warcprox-test-header'] == 'o!'
|
||||||
|
assert response.content == b'I am the warcprox test payload! pppppppppp!\n'
|
||||||
|
|
||||||
def test_domain_data_soft_limit(
|
def test_domain_data_soft_limit(
|
||||||
http_daemon, https_daemon, warcprox_, archiving_proxies):
|
http_daemon, https_daemon, warcprox_, archiving_proxies):
|
||||||
urls_before = warcprox_.proxy.running_stats.urls
|
urls_before = warcprox_.proxy.running_stats.urls
|
||||||
@ -1228,6 +1252,19 @@ def test_domain_data_soft_limit(
|
|||||||
### assert response.headers["content-type"] == "text/plain;charset=utf-8"
|
### assert response.headers["content-type"] == "text/plain;charset=utf-8"
|
||||||
### assert response.raw.data == b"request rejected by warcprox: reached soft limit test_domain_data_limit_bucket:xn--zz-2ka.localhost/new/wire_bytes=200\n"
|
### assert response.raw.data == b"request rejected by warcprox: reached soft limit test_domain_data_limit_bucket:xn--zz-2ka.localhost/new/wire_bytes=200\n"
|
||||||
|
|
||||||
|
# make sure soft limit doesn't get applied to a different stats bucket
|
||||||
|
request_meta = {
|
||||||
|
"stats": {"buckets": [{"bucket":"no_limit_bucket","tally-domains":['ÞzZ.LOCALhost']}]},
|
||||||
|
"soft-limits": {"test_domain_data_limit_bucket:ÞZZ.localhost/new/wire_bytes":200},
|
||||||
|
}
|
||||||
|
headers = {"Warcprox-Meta": json.dumps(request_meta)}
|
||||||
|
url = 'http://ÞZz.localhost:{}/y/z'.format(http_daemon.server_port)
|
||||||
|
response = requests.get(
|
||||||
|
url, proxies=archiving_proxies, headers=headers, stream=True)
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert response.headers['warcprox-test-header'] == 'y!'
|
||||||
|
assert response.content == b'I am the warcprox test payload! zzzzzzzzzz!\n'
|
||||||
|
|
||||||
# XXX this test relies on a tor proxy running at localhost:9050 with a working
|
# XXX this test relies on a tor proxy running at localhost:9050 with a working
|
||||||
# connection to the internet, and relies on a third party site (facebook) being
|
# connection to the internet, and relies on a third party site (facebook) being
|
||||||
# up and behaving a certain way
|
# up and behaving a certain way
|
||||||
|
Loading…
x
Reference in New Issue
Block a user