include Warcprox-Meta response header with relevant info json, and an informative text/plain body, in "420 Limit reached" response

This commit is contained in:
Noah Levitt 2015-07-30 21:18:27 +00:00
parent 4ce89e6d03
commit aa36ff2958
2 changed files with 21 additions and 12 deletions

View File

@ -404,13 +404,16 @@ def test_limits(http_daemon, archiving_proxies):
assert response.headers['warcprox-test-header'] == 'a!'
assert response.content == b'I am the warcprox test payload! bbbbbbbbbb!\n'
# XXX give warc writer thread a chance to update stats
time.sleep(2.0)
response = requests.get(url, proxies=archiving_proxies, headers=headers, stream=True)
assert response.status_code == 420
assert response.reason == "Limit reached"
# response_meta = {"stats":{"job1":{"total":{"urls":10},"new":{"urls":1},"revisit":{"urls":9}}}}
# assert json.loads(headers["warcprox-meta"]) == response_meta
# assert response.headers["content-type"] == "text/plain;charset=utf-8"
# assert response.raw.data == b"request rejected by warcprox: reached limit job1.total.urls=10\n"
expected_response_meta = {'reached-limit': {'job1.total.urls': 10}, 'stats': {'job1': {'revisit': {'wire_bytes': 1215, 'urls': 9}, 'total': {'wire_bytes': 1350, 'urls': 10}, 'new': {'wire_bytes': 135, 'urls': 1}}}}
assert json.loads(response.headers["warcprox-meta"]) == expected_response_meta
assert response.headers["content-type"] == "text/plain;charset=utf-8"
assert response.raw.data == b"request rejected by warcprox: reached limit job1.total.urls=10\n"
if __name__ == '__main__':
pytest.main()

View File

@ -157,22 +157,28 @@ class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler):
logger = logging.getLogger("warcprox.warcprox.WarcProxyHandler")
def _enforce_limits(self, warcprox_meta):
self.logger.info("warcprox_meta=%s", warcprox_meta)
if (warcprox_meta and "stats" in warcprox_meta
and "limits" in warcprox_meta["stats"]):
self.logger.info("warcprox_meta['stats']['limits']=%s", warcprox_meta['stats']['limits'])
# self.logger.info("warcprox_meta['stats']['limits']=%s", warcprox_meta['stats']['limits'])
for item in warcprox_meta["stats"]["limits"].items():
self.logger.info("item=%s", item)
key, limit = item
self.logger.info("limit %s=%d", key, limit)
bucket0, bucket1, bucket2 = key.rsplit(".", 2)
self.logger.info("%s::%s::%s", bucket0, bucket1, bucket2)
value = self.server.stats_db.value(bucket0, bucket1, bucket2)
self.logger.info("stats value is %s", value)
if value and value >= limit:
self.send_error(420, "Limit reached")
self.logger.info('sending "420 Limit reached" %s=%s', key, limit)
body = "request rejected by warcprox: reached limit {}={}\n".format(key, limit).encode("utf-8")
self.send_response(420, "Limit reached")
self.send_header("Content-Type", "text/plain;charset=utf-8")
self.send_header("Connection", "close")
self.send_header("Content-Length", len(body))
response_meta = {"reached-limit":{key:limit}, "stats":{bucket0: self.server.stats_db.value(bucket0)}}
self.send_header("Warcprox-Meta", json.dumps(response_meta, separators=(",",":")))
self.end_headers()
if self.command != "HEAD":
self.wfile.write(body)
self.connection.close()
return
return True
return False
def _proxy_request(self):
# Build request