mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
emoji idn fails with python 2.7, so test with a BMP unicode character
This commit is contained in:
parent
33775d360a
commit
46c24833ff
2
setup.py
2
setup.py
@ -51,7 +51,7 @@ except:
|
|||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='warcprox',
|
name='warcprox',
|
||||||
version='2.0.dev20',
|
version='2.0.dev21',
|
||||||
description='WARC writing MITM HTTP/S proxy',
|
description='WARC writing MITM HTTP/S proxy',
|
||||||
url='https://github.com/internetarchive/warcprox',
|
url='https://github.com/internetarchive/warcprox',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
|
@ -1004,13 +1004,13 @@ def test_domain_data_soft_limit(
|
|||||||
http_daemon, https_daemon, warcprox_, archiving_proxies):
|
http_daemon, https_daemon, warcprox_, archiving_proxies):
|
||||||
# using idn
|
# using idn
|
||||||
request_meta = {
|
request_meta = {
|
||||||
"stats": {"buckets": [{"bucket":"test_domain_data_limit_bucket","tally-domains":['🎵zZ.LOCALhost']}]},
|
"stats": {"buckets": [{"bucket":"test_domain_data_limit_bucket","tally-domains":['♛zZ.LOCALhost']}]},
|
||||||
# response is 135 bytes, so 3rd novel url should be disallowed
|
# response is 135 bytes, so 3rd novel url should be disallowed
|
||||||
"soft-limits": {"test_domain_data_limit_bucket:🎵ZZ.localhost/new/wire_bytes":200},
|
"soft-limits": {"test_domain_data_limit_bucket:♛ZZ.localhost/new/wire_bytes":200},
|
||||||
}
|
}
|
||||||
headers = {"Warcprox-Meta": json.dumps(request_meta)}
|
headers = {"Warcprox-Meta": json.dumps(request_meta)}
|
||||||
|
|
||||||
url = 'http://🎵Zz.localhost:{}/y/z'.format(http_daemon.server_port)
|
url = 'http://♛Zz.localhost:{}/y/z'.format(http_daemon.server_port)
|
||||||
response = requests.get(
|
response = requests.get(
|
||||||
url, proxies=archiving_proxies, headers=headers, stream=True)
|
url, proxies=archiving_proxies, headers=headers, stream=True)
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
@ -1025,7 +1025,7 @@ def test_domain_data_soft_limit(
|
|||||||
time.sleep(2.0)
|
time.sleep(2.0)
|
||||||
|
|
||||||
# duplicate, does not count toward limit
|
# duplicate, does not count toward limit
|
||||||
url = 'https://baz.🎵zz.localhost:{}/y/z'.format(https_daemon.server_port)
|
url = 'https://baz.♛zz.localhost:{}/y/z'.format(https_daemon.server_port)
|
||||||
response = requests.get(
|
response = requests.get(
|
||||||
url, proxies=archiving_proxies, headers=headers, stream=True,
|
url, proxies=archiving_proxies, headers=headers, stream=True,
|
||||||
verify=False)
|
verify=False)
|
||||||
@ -1041,7 +1041,7 @@ def test_domain_data_soft_limit(
|
|||||||
time.sleep(2.0)
|
time.sleep(2.0)
|
||||||
|
|
||||||
# novel, pushes stats over the limit
|
# novel, pushes stats over the limit
|
||||||
url = 'https://muh.XN--Zz-B862a.locALHOst:{}/z/~'.format(https_daemon.server_port)
|
url = 'https://muh.XN--Zz-xZX.locALHOst:{}/z/~'.format(https_daemon.server_port)
|
||||||
response = requests.get(
|
response = requests.get(
|
||||||
url, proxies=archiving_proxies, headers=headers, stream=True,
|
url, proxies=archiving_proxies, headers=headers, stream=True,
|
||||||
verify=False)
|
verify=False)
|
||||||
@ -1065,31 +1065,31 @@ def test_domain_data_soft_limit(
|
|||||||
assert response.content == b'I am the warcprox test payload! ~~~~~~~~~~!\n'
|
assert response.content == b'I am the warcprox test payload! ~~~~~~~~~~!\n'
|
||||||
|
|
||||||
# blocked because we're over the limit now
|
# blocked because we're over the limit now
|
||||||
url = 'http://lOl.wHut.🎵ZZ.lOcALHOst:{}/y/z'.format(http_daemon.server_port)
|
url = 'http://lOl.wHut.♛ZZ.lOcALHOst:{}/y/z'.format(http_daemon.server_port)
|
||||||
response = requests.get(
|
response = requests.get(
|
||||||
url, proxies=archiving_proxies, headers=headers, stream=True)
|
url, proxies=archiving_proxies, headers=headers, stream=True)
|
||||||
assert response.status_code == 430
|
assert response.status_code == 430
|
||||||
assert response.reason == "Reached soft limit"
|
assert response.reason == "Reached soft limit"
|
||||||
expected_response_meta = {'reached-soft-limit': {'test_domain_data_limit_bucket:xn--zz-b862a.localhost/new/wire_bytes': 200}, 'stats': {'test_domain_data_limit_bucket:xn--zz-b862a.localhost': {'total': {'wire_bytes': 405, 'urls': 3}, 'revisit': {'wire_bytes': 135, 'urls': 1}, 'new': {'wire_bytes': 270, 'urls': 2}, 'bucket': 'test_domain_data_limit_bucket:xn--zz-b862a.localhost'}}}
|
expected_response_meta = {'reached-soft-limit': {'test_domain_data_limit_bucket:xn--zz-xzx.localhost/new/wire_bytes': 200}, 'stats': {'test_domain_data_limit_bucket:xn--zz-xzx.localhost': {'total': {'wire_bytes': 405, 'urls': 3}, 'revisit': {'wire_bytes': 135, 'urls': 1}, 'new': {'wire_bytes': 270, 'urls': 2}, 'bucket': 'test_domain_data_limit_bucket:xn--zz-xzx.localhost'}}}
|
||||||
assert json.loads(response.headers["warcprox-meta"]) == expected_response_meta
|
assert json.loads(response.headers["warcprox-meta"]) == expected_response_meta
|
||||||
assert response.headers["content-type"] == "text/plain;charset=utf-8"
|
assert response.headers["content-type"] == "text/plain;charset=utf-8"
|
||||||
assert response.raw.data == b"request rejected by warcprox: reached soft limit test_domain_data_limit_bucket:xn--zz-b862a.localhost/new/wire_bytes=200\n"
|
assert response.raw.data == b"request rejected by warcprox: reached soft limit test_domain_data_limit_bucket:xn--zz-xzx.localhost/new/wire_bytes=200\n"
|
||||||
|
|
||||||
# XXX this check is resulting in a segfault on mac and linux, from ssl I
|
# XXX this check is resulting in a segfault on mac and linux, from ssl I
|
||||||
# think, probably because of the dns resolution monkey-patching
|
# think, probably because of the dns resolution monkey-patching
|
||||||
# https://travis-ci.org/internetarchive/warcprox/builds/141187342
|
# https://travis-ci.org/internetarchive/warcprox/builds/141187342
|
||||||
#
|
#
|
||||||
### # https also blocked
|
### # https also blocked
|
||||||
### url = 'https://xn--zz-b862ah.loCAlhost:{}/w/x'.format(https_daemon.server_port)
|
### url = 'https://xn--zz-xzxh.loCAlhost:{}/w/x'.format(https_daemon.server_port)
|
||||||
### response = requests.get(
|
### response = requests.get(
|
||||||
### url, proxies=archiving_proxies, headers=headers, stream=True,
|
### url, proxies=archiving_proxies, headers=headers, stream=True,
|
||||||
### verify=False)
|
### verify=False)
|
||||||
### assert response.status_code == 430
|
### assert response.status_code == 430
|
||||||
### assert response.reason == "Reached soft limit"
|
### assert response.reason == "Reached soft limit"
|
||||||
### expected_response_meta = {'reached-soft-limit': {'test_domain_data_limit_bucket:xn--zz-b862a.localhost/new/wire_bytes': 200}, 'stats': {'test_domain_data_limit_bucket:xn--zz-b862a.localhost': {'total': {'wire_bytes': 405, 'urls': 3}, 'revisit': {'wire_bytes': 135, 'urls': 1}, 'new': {'wire_bytes': 270, 'urls': 2}, 'bucket': 'test_domain_data_limit_bucket:xn--zz-b862a.localhost'}}}
|
### expected_response_meta = {'reached-soft-limit': {'test_domain_data_limit_bucket:xn--zz-xzx.localhost/new/wire_bytes': 200}, 'stats': {'test_domain_data_limit_bucket:xn--zz-xzx.localhost': {'total': {'wire_bytes': 405, 'urls': 3}, 'revisit': {'wire_bytes': 135, 'urls': 1}, 'new': {'wire_bytes': 270, 'urls': 2}, 'bucket': 'test_domain_data_limit_bucket:xn--zz-xzx.localhost'}}}
|
||||||
### assert json.loads(response.headers["warcprox-meta"]) == expected_response_meta
|
### assert json.loads(response.headers["warcprox-meta"]) == expected_response_meta
|
||||||
### assert response.headers["content-type"] == "text/plain;charset=utf-8"
|
### assert response.headers["content-type"] == "text/plain;charset=utf-8"
|
||||||
### assert response.raw.data == b"request rejected by warcprox: reached soft limit test_domain_data_limit_bucket:xn--zz-b862a.localhost/new/wire_bytes=200\n"
|
### assert response.raw.data == b"request rejected by warcprox: reached soft limit test_domain_data_limit_bucket:xn--zz-xzx.localhost/new/wire_bytes=200\n"
|
||||||
|
|
||||||
# XXX this test relies on a tor proxy running at localhost:9050 with a working
|
# XXX this test relies on a tor proxy running at localhost:9050 with a working
|
||||||
# connection to the internet, and relies on a third party site (facebook) being
|
# connection to the internet, and relies on a third party site (facebook) being
|
||||||
|
Loading…
x
Reference in New Issue
Block a user