diff --git a/setup.py b/setup.py index a948dec..27dde45 100755 --- a/setup.py +++ b/setup.py @@ -40,7 +40,7 @@ except: setuptools.setup( name='warcprox', - version='2.4b3.dev190', + version='2.4b3.dev192', description='WARC writing MITM HTTP/S proxy', url='https://github.com/internetarchive/warcprox', author='Noah Levitt', diff --git a/tests/test_warcprox.py b/tests/test_warcprox.py index 080e0ff..c67c6ff 100755 --- a/tests/test_warcprox.py +++ b/tests/test_warcprox.py @@ -1576,8 +1576,11 @@ def test_svcreg_status(warcprox_): 'rates_15min', 'active_requests','start_time','urls_processed', 'warc_bytes_written', 'postfetch_chain', 'earliest_still_active_fetch_start',} + assert status['id'] == 'warcprox:%s:%s' % ( + socket.gethostname(), warcprox_.proxy.server_port) assert status['role'] == 'warcprox' assert status['version'] == warcprox.__version__ + assert status['host'] == socket.gethostname() assert status['port'] == warcprox_.proxy.server_port assert status['pid'] == os.getpid() assert status['threads'] == warcprox_.proxy.pool._max_workers diff --git a/warcprox/controller.py b/warcprox/controller.py index 1eed21b..80eca1c 100644 --- a/warcprox/controller.py +++ b/warcprox/controller.py @@ -36,6 +36,7 @@ import functools import doublethink import importlib import queue +import socket class Factory: @staticmethod @@ -319,13 +320,15 @@ class WarcproxController(object): status_info = self.status_info else: status_info = { + 'id': 'warcprox:%s:%s' % ( + socket.gethostname(), self.proxy.server_port), 'role': 'warcprox', 'version': warcprox.__version__, 'ttl': self.HEARTBEAT_INTERVAL * 3, + 'host': socket.gethostname(), 'port': self.proxy.server_port, } status_info.update(self.proxy.status()) - self.status_info = self.service_registry.heartbeat(status_info) self.logger.trace('status in service registry: %s', self.status_info) diff --git a/warcprox/mitmproxy.py b/warcprox/mitmproxy.py index 7a7751e..1fc0c72 100644 --- a/warcprox/mitmproxy.py +++ b/warcprox/mitmproxy.py @@ -464,6 +464,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler): req += self.rfile.read(int(self.headers['Content-Length'])) prox_rec_res = None + start = time.time() try: self.logger.debug('sending to remote server req=%r', req) @@ -490,6 +491,15 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler): 'bytes exceeded for URL %s', self._max_resource_size, self.url) break + elif (not 'content-length' in self.headers + and time.time() - start > 3 * 60 * 60): + prox_rec_res.truncated = b'time' + self._remote_server_conn.sock.shutdown(socket.SHUT_RDWR) + self._remote_server_conn.sock.close() + self.logger.info( + 'reached hard timeout of 3 hours fetching url ' + 'without content-length: %s', self.url) + break self.log_request(prox_rec_res.status, prox_rec_res.recorder.len) # Let's close off the remote end. If remote connection is fine,