mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
Merge branch 'master' into qa
* master: 3 hour hard timeout on urls without content-length use predictable id in service registry
This commit is contained in:
commit
6dc4abfa84
2
setup.py
2
setup.py
@ -40,7 +40,7 @@ except:
|
||||
|
||||
setuptools.setup(
|
||||
name='warcprox',
|
||||
version='2.4b3.dev190',
|
||||
version='2.4b3.dev192',
|
||||
description='WARC writing MITM HTTP/S proxy',
|
||||
url='https://github.com/internetarchive/warcprox',
|
||||
author='Noah Levitt',
|
||||
|
@ -1576,8 +1576,11 @@ def test_svcreg_status(warcprox_):
|
||||
'rates_15min', 'active_requests','start_time','urls_processed',
|
||||
'warc_bytes_written', 'postfetch_chain',
|
||||
'earliest_still_active_fetch_start',}
|
||||
assert status['id'] == 'warcprox:%s:%s' % (
|
||||
socket.gethostname(), warcprox_.proxy.server_port)
|
||||
assert status['role'] == 'warcprox'
|
||||
assert status['version'] == warcprox.__version__
|
||||
assert status['host'] == socket.gethostname()
|
||||
assert status['port'] == warcprox_.proxy.server_port
|
||||
assert status['pid'] == os.getpid()
|
||||
assert status['threads'] == warcprox_.proxy.pool._max_workers
|
||||
|
@ -36,6 +36,7 @@ import functools
|
||||
import doublethink
|
||||
import importlib
|
||||
import queue
|
||||
import socket
|
||||
|
||||
class Factory:
|
||||
@staticmethod
|
||||
@ -319,13 +320,15 @@ class WarcproxController(object):
|
||||
status_info = self.status_info
|
||||
else:
|
||||
status_info = {
|
||||
'id': 'warcprox:%s:%s' % (
|
||||
socket.gethostname(), self.proxy.server_port),
|
||||
'role': 'warcprox',
|
||||
'version': warcprox.__version__,
|
||||
'ttl': self.HEARTBEAT_INTERVAL * 3,
|
||||
'host': socket.gethostname(),
|
||||
'port': self.proxy.server_port,
|
||||
}
|
||||
status_info.update(self.proxy.status())
|
||||
|
||||
self.status_info = self.service_registry.heartbeat(status_info)
|
||||
self.logger.trace('status in service registry: %s', self.status_info)
|
||||
|
||||
|
@ -464,6 +464,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
||||
req += self.rfile.read(int(self.headers['Content-Length']))
|
||||
|
||||
prox_rec_res = None
|
||||
start = time.time()
|
||||
try:
|
||||
self.logger.debug('sending to remote server req=%r', req)
|
||||
|
||||
@ -490,6 +491,15 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
||||
'bytes exceeded for URL %s',
|
||||
self._max_resource_size, self.url)
|
||||
break
|
||||
elif (not 'content-length' in self.headers
|
||||
and time.time() - start > 3 * 60 * 60):
|
||||
prox_rec_res.truncated = b'time'
|
||||
self._remote_server_conn.sock.shutdown(socket.SHUT_RDWR)
|
||||
self._remote_server_conn.sock.close()
|
||||
self.logger.info(
|
||||
'reached hard timeout of 3 hours fetching url '
|
||||
'without content-length: %s', self.url)
|
||||
break
|
||||
|
||||
self.log_request(prox_rec_res.status, prox_rec_res.recorder.len)
|
||||
# Let's close off the remote end. If remote connection is fine,
|
||||
|
Loading…
x
Reference in New Issue
Block a user