Merge branch 'master' into qa

* master:
  3 hour hard timeout on urls without content-length
  use predictable id in service registry
This commit is contained in:
Noah Levitt 2018-11-14 13:01:17 -08:00
commit 6dc4abfa84
4 changed files with 18 additions and 2 deletions

View File

@ -40,7 +40,7 @@ except:
setuptools.setup(
name='warcprox',
version='2.4b3.dev190',
version='2.4b3.dev192',
description='WARC writing MITM HTTP/S proxy',
url='https://github.com/internetarchive/warcprox',
author='Noah Levitt',

View File

@ -1576,8 +1576,11 @@ def test_svcreg_status(warcprox_):
'rates_15min', 'active_requests','start_time','urls_processed',
'warc_bytes_written', 'postfetch_chain',
'earliest_still_active_fetch_start',}
assert status['id'] == 'warcprox:%s:%s' % (
socket.gethostname(), warcprox_.proxy.server_port)
assert status['role'] == 'warcprox'
assert status['version'] == warcprox.__version__
assert status['host'] == socket.gethostname()
assert status['port'] == warcprox_.proxy.server_port
assert status['pid'] == os.getpid()
assert status['threads'] == warcprox_.proxy.pool._max_workers

View File

@ -36,6 +36,7 @@ import functools
import doublethink
import importlib
import queue
import socket
class Factory:
@staticmethod
@ -319,13 +320,15 @@ class WarcproxController(object):
status_info = self.status_info
else:
status_info = {
'id': 'warcprox:%s:%s' % (
socket.gethostname(), self.proxy.server_port),
'role': 'warcprox',
'version': warcprox.__version__,
'ttl': self.HEARTBEAT_INTERVAL * 3,
'host': socket.gethostname(),
'port': self.proxy.server_port,
}
status_info.update(self.proxy.status())
self.status_info = self.service_registry.heartbeat(status_info)
self.logger.trace('status in service registry: %s', self.status_info)

View File

@ -464,6 +464,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
req += self.rfile.read(int(self.headers['Content-Length']))
prox_rec_res = None
start = time.time()
try:
self.logger.debug('sending to remote server req=%r', req)
@ -490,6 +491,15 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
'bytes exceeded for URL %s',
self._max_resource_size, self.url)
break
elif (not 'content-length' in self.headers
and time.time() - start > 3 * 60 * 60):
prox_rec_res.truncated = b'time'
self._remote_server_conn.sock.shutdown(socket.SHUT_RDWR)
self._remote_server_conn.sock.close()
self.logger.info(
'reached hard timeout of 3 hours fetching url '
'without content-length: %s', self.url)
break
self.log_request(prox_rec_res.status, prox_rec_res.recorder.len)
# Let's close off the remote end. If remote connection is fine,