From 04c4b63f03de82d518262a97aec25ae0949a4ab8 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Tue, 28 Jun 2016 15:35:02 -0500 Subject: [PATCH] renaming scope rule "host" to "domain" to make it a less confusing, since rules apply to subdomains as well --- setup.py | 2 +- tests/test_warcprox.py | 28 ++++++++++++++-------------- warcprox/warcproxy.py | 4 ++-- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/setup.py b/setup.py index 3339930..e6b35e4 100755 --- a/setup.py +++ b/setup.py @@ -51,7 +51,7 @@ except: setuptools.setup( name='warcprox', - version='2.0.dev15', + version='2.0.dev16', description='WARC writing MITM HTTP/S proxy', url='https://github.com/internetarchive/warcprox', author='Noah Levitt', diff --git a/tests/test_warcprox.py b/tests/test_warcprox.py index 8793f80..e6c17a0 100755 --- a/tests/test_warcprox.py +++ b/tests/test_warcprox.py @@ -732,7 +732,7 @@ def test_dedup_buckets(https_daemon, http_daemon, warcprox_, archiving_proxies, def test_block_rules(http_daemon, https_daemon, warcprox_, archiving_proxies): rules = [ { - "host": "localhost", + "domain": "localhost", "url_match": "STRING_MATCH", "value": "bar", }, @@ -746,7 +746,7 @@ def test_block_rules(http_daemon, https_daemon, warcprox_, archiving_proxies): "value": "http://(localhost:%s,)/fuh/" % (https_daemon.server_port), }, { - "host": "badhost.com", + "domain": "bad.domain.com", }, ] request_meta = {"blocks":rules} @@ -790,16 +790,16 @@ def test_block_rules(http_daemon, https_daemon, warcprox_, archiving_proxies): verify=False) assert response.status_code == 200 - # blocked by blanket host block - url = 'http://badhost.com/' + # blocked by blanket domain block + url = 'http://bad.domain.com/' response = requests.get( url, proxies=archiving_proxies, headers=headers, stream=True) assert response.status_code == 403 assert response.content.startswith(b"request rejected by warcprox: blocked by rule found in Warcprox-Meta header:") assert json.loads(response.headers['warcprox-meta']) == {"blocked-by-rule":rules[3]} - # blocked by blanket host block - url = 'https://badhost.com/' + # blocked by blanket domain block + url = 'https://bad.domain.com/' response = requests.get( url, proxies=archiving_proxies, headers=headers, stream=True, verify=False) @@ -807,24 +807,24 @@ def test_block_rules(http_daemon, https_daemon, warcprox_, archiving_proxies): assert response.content.startswith(b"request rejected by warcprox: blocked by rule found in Warcprox-Meta header:") assert json.loads(response.headers['warcprox-meta']) == {"blocked-by-rule":rules[3]} - # blocked by blanket host block - url = 'http://badhost.com:1234/' + # blocked by blanket domain block + url = 'http://bad.domain.com:1234/' response = requests.get( url, proxies=archiving_proxies, headers=headers, stream=True) assert response.status_code == 403 assert response.content.startswith(b"request rejected by warcprox: blocked by rule found in Warcprox-Meta header:") assert json.loads(response.headers['warcprox-meta']) == {"blocked-by-rule":rules[3]} - # blocked by blanket host block - url = 'http://foo.bar.badhost.com/' + # blocked by blanket domain block + url = 'http://foo.bar.bad.domain.com/' response = requests.get( url, proxies=archiving_proxies, headers=headers, stream=True) assert response.status_code == 403 assert response.content.startswith(b"request rejected by warcprox: blocked by rule found in Warcprox-Meta header:") assert json.loads(response.headers['warcprox-meta']) == {"blocked-by-rule":rules[3]} - # host block also applies to subdomains - url = 'https://foo.bar.badhost.com/' + # domain block also applies to subdomains + url = 'https://foo.bar.bad.domain.com/' response = requests.get( url, proxies=archiving_proxies, headers=headers, stream=True, verify=False) @@ -832,8 +832,8 @@ def test_block_rules(http_daemon, https_daemon, warcprox_, archiving_proxies): assert response.content.startswith(b"request rejected by warcprox: blocked by rule found in Warcprox-Meta header:") assert json.loads(response.headers['warcprox-meta']) == {"blocked-by-rule":rules[3]} - # blocked by blanket host block - url = 'http://foo.bar.badhost.com:1234/' + # blocked by blanket domain block + url = 'http://foo.bar.bad.domain.com:1234/' response = requests.get( url, proxies=archiving_proxies, headers=headers, stream=True) assert response.status_code == 403 diff --git a/warcprox/warcproxy.py b/warcprox/warcproxy.py index 882fec9..9966a14 100644 --- a/warcprox/warcproxy.py +++ b/warcprox/warcproxy.py @@ -120,7 +120,7 @@ class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler): def _scope_rule_applies(self, rule): u = Url(self.url) - if "host" in rule and not u.matches_ip_or_domain(rule["host"]): + if "domain" in rule and not u.matches_ip_or_domain(rule["domain"]): return False if "url_match" in rule: if rule["url_match"] == "STRING_MATCH": @@ -139,7 +139,7 @@ class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler): self.logger.warn("invalid rule.url_match=%s", rule.url_match) return False else: - if "host" in rule: + if "domain" in rule: # we already know that it matches from earlier check return True else: