From aa36ff2958940cb96f2848358e30e56743c5af0e Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Thu, 30 Jul 2015 21:18:27 +0000 Subject: [PATCH] include Warcprox-Meta response header with relevant info json, and an informative text/plain body, in "420 Limit reached" response --- warcprox/tests/test_warcprox.py | 11 +++++++---- warcprox/warcproxy.py | 22 ++++++++++++++-------- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/warcprox/tests/test_warcprox.py b/warcprox/tests/test_warcprox.py index 33a01bb..0e6c3b9 100755 --- a/warcprox/tests/test_warcprox.py +++ b/warcprox/tests/test_warcprox.py @@ -404,13 +404,16 @@ def test_limits(http_daemon, archiving_proxies): assert response.headers['warcprox-test-header'] == 'a!' assert response.content == b'I am the warcprox test payload! bbbbbbbbbb!\n' + # XXX give warc writer thread a chance to update stats + time.sleep(2.0) + response = requests.get(url, proxies=archiving_proxies, headers=headers, stream=True) assert response.status_code == 420 assert response.reason == "Limit reached" - # response_meta = {"stats":{"job1":{"total":{"urls":10},"new":{"urls":1},"revisit":{"urls":9}}}} - # assert json.loads(headers["warcprox-meta"]) == response_meta - # assert response.headers["content-type"] == "text/plain;charset=utf-8" - # assert response.raw.data == b"request rejected by warcprox: reached limit job1.total.urls=10\n" + expected_response_meta = {'reached-limit': {'job1.total.urls': 10}, 'stats': {'job1': {'revisit': {'wire_bytes': 1215, 'urls': 9}, 'total': {'wire_bytes': 1350, 'urls': 10}, 'new': {'wire_bytes': 135, 'urls': 1}}}} + assert json.loads(response.headers["warcprox-meta"]) == expected_response_meta + assert response.headers["content-type"] == "text/plain;charset=utf-8" + assert response.raw.data == b"request rejected by warcprox: reached limit job1.total.urls=10\n" if __name__ == '__main__': pytest.main() diff --git a/warcprox/warcproxy.py b/warcprox/warcproxy.py index b2a7345..6e5ccff 100644 --- a/warcprox/warcproxy.py +++ b/warcprox/warcproxy.py @@ -157,22 +157,28 @@ class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler): logger = logging.getLogger("warcprox.warcprox.WarcProxyHandler") def _enforce_limits(self, warcprox_meta): - self.logger.info("warcprox_meta=%s", warcprox_meta) if (warcprox_meta and "stats" in warcprox_meta and "limits" in warcprox_meta["stats"]): - self.logger.info("warcprox_meta['stats']['limits']=%s", warcprox_meta['stats']['limits']) + # self.logger.info("warcprox_meta['stats']['limits']=%s", warcprox_meta['stats']['limits']) for item in warcprox_meta["stats"]["limits"].items(): - self.logger.info("item=%s", item) key, limit = item - self.logger.info("limit %s=%d", key, limit) bucket0, bucket1, bucket2 = key.rsplit(".", 2) - self.logger.info("%s::%s::%s", bucket0, bucket1, bucket2) value = self.server.stats_db.value(bucket0, bucket1, bucket2) - self.logger.info("stats value is %s", value) if value and value >= limit: - self.send_error(420, "Limit reached") + self.logger.info('sending "420 Limit reached" %s=%s', key, limit) + body = "request rejected by warcprox: reached limit {}={}\n".format(key, limit).encode("utf-8") + self.send_response(420, "Limit reached") + self.send_header("Content-Type", "text/plain;charset=utf-8") + self.send_header("Connection", "close") + self.send_header("Content-Length", len(body)) + response_meta = {"reached-limit":{key:limit}, "stats":{bucket0: self.server.stats_db.value(bucket0)}} + self.send_header("Warcprox-Meta", json.dumps(response_meta, separators=(",",":"))) + self.end_headers() + if self.command != "HEAD": + self.wfile.write(body) self.connection.close() - return + return True + return False def _proxy_request(self): # Build request