mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
renaming scope rule "host" to "domain" to make it a less confusing, since rules apply to subdomains as well
This commit is contained in:
parent
04c21408d7
commit
04c4b63f03
2
setup.py
2
setup.py
@ -51,7 +51,7 @@ except:
|
|||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='warcprox',
|
name='warcprox',
|
||||||
version='2.0.dev15',
|
version='2.0.dev16',
|
||||||
description='WARC writing MITM HTTP/S proxy',
|
description='WARC writing MITM HTTP/S proxy',
|
||||||
url='https://github.com/internetarchive/warcprox',
|
url='https://github.com/internetarchive/warcprox',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
|
@ -732,7 +732,7 @@ def test_dedup_buckets(https_daemon, http_daemon, warcprox_, archiving_proxies,
|
|||||||
def test_block_rules(http_daemon, https_daemon, warcprox_, archiving_proxies):
|
def test_block_rules(http_daemon, https_daemon, warcprox_, archiving_proxies):
|
||||||
rules = [
|
rules = [
|
||||||
{
|
{
|
||||||
"host": "localhost",
|
"domain": "localhost",
|
||||||
"url_match": "STRING_MATCH",
|
"url_match": "STRING_MATCH",
|
||||||
"value": "bar",
|
"value": "bar",
|
||||||
},
|
},
|
||||||
@ -746,7 +746,7 @@ def test_block_rules(http_daemon, https_daemon, warcprox_, archiving_proxies):
|
|||||||
"value": "http://(localhost:%s,)/fuh/" % (https_daemon.server_port),
|
"value": "http://(localhost:%s,)/fuh/" % (https_daemon.server_port),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"host": "badhost.com",
|
"domain": "bad.domain.com",
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
request_meta = {"blocks":rules}
|
request_meta = {"blocks":rules}
|
||||||
@ -790,16 +790,16 @@ def test_block_rules(http_daemon, https_daemon, warcprox_, archiving_proxies):
|
|||||||
verify=False)
|
verify=False)
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
|
|
||||||
# blocked by blanket host block
|
# blocked by blanket domain block
|
||||||
url = 'http://badhost.com/'
|
url = 'http://bad.domain.com/'
|
||||||
response = requests.get(
|
response = requests.get(
|
||||||
url, proxies=archiving_proxies, headers=headers, stream=True)
|
url, proxies=archiving_proxies, headers=headers, stream=True)
|
||||||
assert response.status_code == 403
|
assert response.status_code == 403
|
||||||
assert response.content.startswith(b"request rejected by warcprox: blocked by rule found in Warcprox-Meta header:")
|
assert response.content.startswith(b"request rejected by warcprox: blocked by rule found in Warcprox-Meta header:")
|
||||||
assert json.loads(response.headers['warcprox-meta']) == {"blocked-by-rule":rules[3]}
|
assert json.loads(response.headers['warcprox-meta']) == {"blocked-by-rule":rules[3]}
|
||||||
|
|
||||||
# blocked by blanket host block
|
# blocked by blanket domain block
|
||||||
url = 'https://badhost.com/'
|
url = 'https://bad.domain.com/'
|
||||||
response = requests.get(
|
response = requests.get(
|
||||||
url, proxies=archiving_proxies, headers=headers, stream=True,
|
url, proxies=archiving_proxies, headers=headers, stream=True,
|
||||||
verify=False)
|
verify=False)
|
||||||
@ -807,24 +807,24 @@ def test_block_rules(http_daemon, https_daemon, warcprox_, archiving_proxies):
|
|||||||
assert response.content.startswith(b"request rejected by warcprox: blocked by rule found in Warcprox-Meta header:")
|
assert response.content.startswith(b"request rejected by warcprox: blocked by rule found in Warcprox-Meta header:")
|
||||||
assert json.loads(response.headers['warcprox-meta']) == {"blocked-by-rule":rules[3]}
|
assert json.loads(response.headers['warcprox-meta']) == {"blocked-by-rule":rules[3]}
|
||||||
|
|
||||||
# blocked by blanket host block
|
# blocked by blanket domain block
|
||||||
url = 'http://badhost.com:1234/'
|
url = 'http://bad.domain.com:1234/'
|
||||||
response = requests.get(
|
response = requests.get(
|
||||||
url, proxies=archiving_proxies, headers=headers, stream=True)
|
url, proxies=archiving_proxies, headers=headers, stream=True)
|
||||||
assert response.status_code == 403
|
assert response.status_code == 403
|
||||||
assert response.content.startswith(b"request rejected by warcprox: blocked by rule found in Warcprox-Meta header:")
|
assert response.content.startswith(b"request rejected by warcprox: blocked by rule found in Warcprox-Meta header:")
|
||||||
assert json.loads(response.headers['warcprox-meta']) == {"blocked-by-rule":rules[3]}
|
assert json.loads(response.headers['warcprox-meta']) == {"blocked-by-rule":rules[3]}
|
||||||
|
|
||||||
# blocked by blanket host block
|
# blocked by blanket domain block
|
||||||
url = 'http://foo.bar.badhost.com/'
|
url = 'http://foo.bar.bad.domain.com/'
|
||||||
response = requests.get(
|
response = requests.get(
|
||||||
url, proxies=archiving_proxies, headers=headers, stream=True)
|
url, proxies=archiving_proxies, headers=headers, stream=True)
|
||||||
assert response.status_code == 403
|
assert response.status_code == 403
|
||||||
assert response.content.startswith(b"request rejected by warcprox: blocked by rule found in Warcprox-Meta header:")
|
assert response.content.startswith(b"request rejected by warcprox: blocked by rule found in Warcprox-Meta header:")
|
||||||
assert json.loads(response.headers['warcprox-meta']) == {"blocked-by-rule":rules[3]}
|
assert json.loads(response.headers['warcprox-meta']) == {"blocked-by-rule":rules[3]}
|
||||||
|
|
||||||
# host block also applies to subdomains
|
# domain block also applies to subdomains
|
||||||
url = 'https://foo.bar.badhost.com/'
|
url = 'https://foo.bar.bad.domain.com/'
|
||||||
response = requests.get(
|
response = requests.get(
|
||||||
url, proxies=archiving_proxies, headers=headers, stream=True,
|
url, proxies=archiving_proxies, headers=headers, stream=True,
|
||||||
verify=False)
|
verify=False)
|
||||||
@ -832,8 +832,8 @@ def test_block_rules(http_daemon, https_daemon, warcprox_, archiving_proxies):
|
|||||||
assert response.content.startswith(b"request rejected by warcprox: blocked by rule found in Warcprox-Meta header:")
|
assert response.content.startswith(b"request rejected by warcprox: blocked by rule found in Warcprox-Meta header:")
|
||||||
assert json.loads(response.headers['warcprox-meta']) == {"blocked-by-rule":rules[3]}
|
assert json.loads(response.headers['warcprox-meta']) == {"blocked-by-rule":rules[3]}
|
||||||
|
|
||||||
# blocked by blanket host block
|
# blocked by blanket domain block
|
||||||
url = 'http://foo.bar.badhost.com:1234/'
|
url = 'http://foo.bar.bad.domain.com:1234/'
|
||||||
response = requests.get(
|
response = requests.get(
|
||||||
url, proxies=archiving_proxies, headers=headers, stream=True)
|
url, proxies=archiving_proxies, headers=headers, stream=True)
|
||||||
assert response.status_code == 403
|
assert response.status_code == 403
|
||||||
|
@ -120,7 +120,7 @@ class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler):
|
|||||||
def _scope_rule_applies(self, rule):
|
def _scope_rule_applies(self, rule):
|
||||||
u = Url(self.url)
|
u = Url(self.url)
|
||||||
|
|
||||||
if "host" in rule and not u.matches_ip_or_domain(rule["host"]):
|
if "domain" in rule and not u.matches_ip_or_domain(rule["domain"]):
|
||||||
return False
|
return False
|
||||||
if "url_match" in rule:
|
if "url_match" in rule:
|
||||||
if rule["url_match"] == "STRING_MATCH":
|
if rule["url_match"] == "STRING_MATCH":
|
||||||
@ -139,7 +139,7 @@ class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler):
|
|||||||
self.logger.warn("invalid rule.url_match=%s", rule.url_match)
|
self.logger.warn("invalid rule.url_match=%s", rule.url_match)
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
if "host" in rule:
|
if "domain" in rule:
|
||||||
# we already know that it matches from earlier check
|
# we already know that it matches from earlier check
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user