From 1ea8a06a69b5a786ef7a0cf71a311c102ef800b4 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Mon, 12 Nov 2018 15:57:37 -0800 Subject: [PATCH] 3 hour hard timeout on urls without content-length so that indefinite streams like icecast radio stations don't hang forever --- setup.py | 2 +- warcprox/mitmproxy.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 7b068f6..27dde45 100755 --- a/setup.py +++ b/setup.py @@ -40,7 +40,7 @@ except: setuptools.setup( name='warcprox', - version='2.4b3.dev191', + version='2.4b3.dev192', description='WARC writing MITM HTTP/S proxy', url='https://github.com/internetarchive/warcprox', author='Noah Levitt', diff --git a/warcprox/mitmproxy.py b/warcprox/mitmproxy.py index 7a7751e..1fc0c72 100644 --- a/warcprox/mitmproxy.py +++ b/warcprox/mitmproxy.py @@ -464,6 +464,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler): req += self.rfile.read(int(self.headers['Content-Length'])) prox_rec_res = None + start = time.time() try: self.logger.debug('sending to remote server req=%r', req) @@ -490,6 +491,15 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler): 'bytes exceeded for URL %s', self._max_resource_size, self.url) break + elif (not 'content-length' in self.headers + and time.time() - start > 3 * 60 * 60): + prox_rec_res.truncated = b'time' + self._remote_server_conn.sock.shutdown(socket.SHUT_RDWR) + self._remote_server_conn.sock.close() + self.logger.info( + 'reached hard timeout of 3 hours fetching url ' + 'without content-length: %s', self.url) + break self.log_request(prox_rec_res.status, prox_rec_res.recorder.len) # Let's close off the remote end. If remote connection is fine,