mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
Merge branch 'master' into qa
* master: 3 hour hard timeout on urls without content-length use predictable id in service registry
This commit is contained in:
commit
6dc4abfa84
2
setup.py
2
setup.py
@ -40,7 +40,7 @@ except:
|
|||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='warcprox',
|
name='warcprox',
|
||||||
version='2.4b3.dev190',
|
version='2.4b3.dev192',
|
||||||
description='WARC writing MITM HTTP/S proxy',
|
description='WARC writing MITM HTTP/S proxy',
|
||||||
url='https://github.com/internetarchive/warcprox',
|
url='https://github.com/internetarchive/warcprox',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
|
@ -1576,8 +1576,11 @@ def test_svcreg_status(warcprox_):
|
|||||||
'rates_15min', 'active_requests','start_time','urls_processed',
|
'rates_15min', 'active_requests','start_time','urls_processed',
|
||||||
'warc_bytes_written', 'postfetch_chain',
|
'warc_bytes_written', 'postfetch_chain',
|
||||||
'earliest_still_active_fetch_start',}
|
'earliest_still_active_fetch_start',}
|
||||||
|
assert status['id'] == 'warcprox:%s:%s' % (
|
||||||
|
socket.gethostname(), warcprox_.proxy.server_port)
|
||||||
assert status['role'] == 'warcprox'
|
assert status['role'] == 'warcprox'
|
||||||
assert status['version'] == warcprox.__version__
|
assert status['version'] == warcprox.__version__
|
||||||
|
assert status['host'] == socket.gethostname()
|
||||||
assert status['port'] == warcprox_.proxy.server_port
|
assert status['port'] == warcprox_.proxy.server_port
|
||||||
assert status['pid'] == os.getpid()
|
assert status['pid'] == os.getpid()
|
||||||
assert status['threads'] == warcprox_.proxy.pool._max_workers
|
assert status['threads'] == warcprox_.proxy.pool._max_workers
|
||||||
|
@ -36,6 +36,7 @@ import functools
|
|||||||
import doublethink
|
import doublethink
|
||||||
import importlib
|
import importlib
|
||||||
import queue
|
import queue
|
||||||
|
import socket
|
||||||
|
|
||||||
class Factory:
|
class Factory:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -319,13 +320,15 @@ class WarcproxController(object):
|
|||||||
status_info = self.status_info
|
status_info = self.status_info
|
||||||
else:
|
else:
|
||||||
status_info = {
|
status_info = {
|
||||||
|
'id': 'warcprox:%s:%s' % (
|
||||||
|
socket.gethostname(), self.proxy.server_port),
|
||||||
'role': 'warcprox',
|
'role': 'warcprox',
|
||||||
'version': warcprox.__version__,
|
'version': warcprox.__version__,
|
||||||
'ttl': self.HEARTBEAT_INTERVAL * 3,
|
'ttl': self.HEARTBEAT_INTERVAL * 3,
|
||||||
|
'host': socket.gethostname(),
|
||||||
'port': self.proxy.server_port,
|
'port': self.proxy.server_port,
|
||||||
}
|
}
|
||||||
status_info.update(self.proxy.status())
|
status_info.update(self.proxy.status())
|
||||||
|
|
||||||
self.status_info = self.service_registry.heartbeat(status_info)
|
self.status_info = self.service_registry.heartbeat(status_info)
|
||||||
self.logger.trace('status in service registry: %s', self.status_info)
|
self.logger.trace('status in service registry: %s', self.status_info)
|
||||||
|
|
||||||
|
@ -464,6 +464,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
|||||||
req += self.rfile.read(int(self.headers['Content-Length']))
|
req += self.rfile.read(int(self.headers['Content-Length']))
|
||||||
|
|
||||||
prox_rec_res = None
|
prox_rec_res = None
|
||||||
|
start = time.time()
|
||||||
try:
|
try:
|
||||||
self.logger.debug('sending to remote server req=%r', req)
|
self.logger.debug('sending to remote server req=%r', req)
|
||||||
|
|
||||||
@ -490,6 +491,15 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
|||||||
'bytes exceeded for URL %s',
|
'bytes exceeded for URL %s',
|
||||||
self._max_resource_size, self.url)
|
self._max_resource_size, self.url)
|
||||||
break
|
break
|
||||||
|
elif (not 'content-length' in self.headers
|
||||||
|
and time.time() - start > 3 * 60 * 60):
|
||||||
|
prox_rec_res.truncated = b'time'
|
||||||
|
self._remote_server_conn.sock.shutdown(socket.SHUT_RDWR)
|
||||||
|
self._remote_server_conn.sock.close()
|
||||||
|
self.logger.info(
|
||||||
|
'reached hard timeout of 3 hours fetching url '
|
||||||
|
'without content-length: %s', self.url)
|
||||||
|
break
|
||||||
|
|
||||||
self.log_request(prox_rec_res.status, prox_rec_res.recorder.len)
|
self.log_request(prox_rec_res.status, prox_rec_res.recorder.len)
|
||||||
# Let's close off the remote end. If remote connection is fine,
|
# Let's close off the remote end. If remote connection is fine,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user