mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
include Warcprox-Meta response header with relevant info json, and an informative text/plain body, in "420 Limit reached" response
This commit is contained in:
parent
4ce89e6d03
commit
aa36ff2958
@ -404,13 +404,16 @@ def test_limits(http_daemon, archiving_proxies):
|
||||
assert response.headers['warcprox-test-header'] == 'a!'
|
||||
assert response.content == b'I am the warcprox test payload! bbbbbbbbbb!\n'
|
||||
|
||||
# XXX give warc writer thread a chance to update stats
|
||||
time.sleep(2.0)
|
||||
|
||||
response = requests.get(url, proxies=archiving_proxies, headers=headers, stream=True)
|
||||
assert response.status_code == 420
|
||||
assert response.reason == "Limit reached"
|
||||
# response_meta = {"stats":{"job1":{"total":{"urls":10},"new":{"urls":1},"revisit":{"urls":9}}}}
|
||||
# assert json.loads(headers["warcprox-meta"]) == response_meta
|
||||
# assert response.headers["content-type"] == "text/plain;charset=utf-8"
|
||||
# assert response.raw.data == b"request rejected by warcprox: reached limit job1.total.urls=10\n"
|
||||
expected_response_meta = {'reached-limit': {'job1.total.urls': 10}, 'stats': {'job1': {'revisit': {'wire_bytes': 1215, 'urls': 9}, 'total': {'wire_bytes': 1350, 'urls': 10}, 'new': {'wire_bytes': 135, 'urls': 1}}}}
|
||||
assert json.loads(response.headers["warcprox-meta"]) == expected_response_meta
|
||||
assert response.headers["content-type"] == "text/plain;charset=utf-8"
|
||||
assert response.raw.data == b"request rejected by warcprox: reached limit job1.total.urls=10\n"
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main()
|
||||
|
@ -157,22 +157,28 @@ class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler):
|
||||
logger = logging.getLogger("warcprox.warcprox.WarcProxyHandler")
|
||||
|
||||
def _enforce_limits(self, warcprox_meta):
|
||||
self.logger.info("warcprox_meta=%s", warcprox_meta)
|
||||
if (warcprox_meta and "stats" in warcprox_meta
|
||||
and "limits" in warcprox_meta["stats"]):
|
||||
self.logger.info("warcprox_meta['stats']['limits']=%s", warcprox_meta['stats']['limits'])
|
||||
# self.logger.info("warcprox_meta['stats']['limits']=%s", warcprox_meta['stats']['limits'])
|
||||
for item in warcprox_meta["stats"]["limits"].items():
|
||||
self.logger.info("item=%s", item)
|
||||
key, limit = item
|
||||
self.logger.info("limit %s=%d", key, limit)
|
||||
bucket0, bucket1, bucket2 = key.rsplit(".", 2)
|
||||
self.logger.info("%s::%s::%s", bucket0, bucket1, bucket2)
|
||||
value = self.server.stats_db.value(bucket0, bucket1, bucket2)
|
||||
self.logger.info("stats value is %s", value)
|
||||
if value and value >= limit:
|
||||
self.send_error(420, "Limit reached")
|
||||
self.logger.info('sending "420 Limit reached" %s=%s', key, limit)
|
||||
body = "request rejected by warcprox: reached limit {}={}\n".format(key, limit).encode("utf-8")
|
||||
self.send_response(420, "Limit reached")
|
||||
self.send_header("Content-Type", "text/plain;charset=utf-8")
|
||||
self.send_header("Connection", "close")
|
||||
self.send_header("Content-Length", len(body))
|
||||
response_meta = {"reached-limit":{key:limit}, "stats":{bucket0: self.server.stats_db.value(bucket0)}}
|
||||
self.send_header("Warcprox-Meta", json.dumps(response_meta, separators=(",",":")))
|
||||
self.end_headers()
|
||||
if self.command != "HEAD":
|
||||
self.wfile.write(body)
|
||||
self.connection.close()
|
||||
return
|
||||
return True
|
||||
return False
|
||||
|
||||
def _proxy_request(self):
|
||||
# Build request
|
||||
|
Loading…
x
Reference in New Issue
Block a user