mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
Merge pull request #61 from vbanos/remote-server-timeout
Make remote server connection timeout configurable
This commit is contained in:
commit
0f16585a24
@ -249,6 +249,14 @@ class _TestHttpRequestHandler(http_server.BaseHTTPRequestHandler):
|
||||
elif self.path == '/empty-response':
|
||||
headers = b''
|
||||
payload = b''
|
||||
elif self.path == '/slow-response':
|
||||
time.sleep(6)
|
||||
headers = (b'HTTP/1.1 200 OK\r\n'
|
||||
+ b'Content-Type: text/plain\r\n'
|
||||
+ b'\r\n')
|
||||
payload = b'Test.'
|
||||
actual_headers = (b'Content-Type: text/plain\r\n'
|
||||
+ b'Content-Length: ' + str(len(payload)).encode('ascii') + b'\r\n')
|
||||
else:
|
||||
payload = b'404 Not Found\n'
|
||||
headers = (b'HTTP/1.1 404 Not Found\r\n'
|
||||
@ -356,7 +364,8 @@ def warcprox_(request):
|
||||
'--port=0',
|
||||
'--playback-port=0',
|
||||
'--onion-tor-socks-proxy=localhost:9050',
|
||||
'--crawl-log-dir=crawl-logs']
|
||||
'--crawl-log-dir=crawl-logs',
|
||||
'--socket-timeout=4']
|
||||
if request.config.getoption('--rethinkdb-dedup-url'):
|
||||
argv.append('--rethinkdb-dedup-url=%s' % request.config.getoption('--rethinkdb-dedup-url'))
|
||||
# test these here only
|
||||
@ -1711,6 +1720,16 @@ def test_long_warcprox_meta(
|
||||
with pytest.raises(StopIteration):
|
||||
next(rec_iter)
|
||||
|
||||
def test_socket_timeout_response(
|
||||
warcprox_, http_daemon, https_daemon, archiving_proxies,
|
||||
playback_proxies):
|
||||
"""Response will timeout because we use --socket-timeout=4 whereas the
|
||||
target URL will return after 6 sec.
|
||||
"""
|
||||
url = 'http://localhost:%s/slow-response' % http_daemon.server_port
|
||||
response = requests.get(url, proxies=archiving_proxies, verify=False)
|
||||
assert response.status_code == 502
|
||||
|
||||
def test_empty_response(
|
||||
warcprox_, http_daemon, https_daemon, archiving_proxies,
|
||||
playback_proxies):
|
||||
|
@ -162,6 +162,10 @@ def _build_arg_parser(prog='warcprox'):
|
||||
default=None, help=(
|
||||
'host:port of tor socks proxy, used only to connect to '
|
||||
'.onion sites'))
|
||||
# Configurable connection socket timeout, default is 60 sec.
|
||||
arg_parser.add_argument(
|
||||
'--socket-timeout', dest='socket_timeout', type=float,
|
||||
default=None, help=argparse.SUPPRESS)
|
||||
arg_parser.add_argument(
|
||||
'--crawl-log-dir', dest='crawl_log_dir', default=None, help=(
|
||||
'if specified, write crawl log files in the specified '
|
||||
|
@ -205,12 +205,13 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
||||
and records the bytes in transit as it proxies them.
|
||||
'''
|
||||
logger = logging.getLogger("warcprox.mitmproxy.MitmProxyHandler")
|
||||
_socket_timeout = 60
|
||||
|
||||
def __init__(self, request, client_address, server):
|
||||
threading.current_thread().name = 'MitmProxyHandler(tid={},started={},client={}:{})'.format(warcprox.gettid(), datetime.datetime.utcnow().isoformat(), client_address[0], client_address[1])
|
||||
self.is_connect = False
|
||||
self._headers_buffer = []
|
||||
request.settimeout(60) # XXX what value should this have?
|
||||
request.settimeout(self._socket_timeout)
|
||||
http_server.BaseHTTPRequestHandler.__init__(self, request, client_address, server)
|
||||
|
||||
def _determine_host_port(self):
|
||||
@ -247,8 +248,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
||||
self._remote_server_sock = socket.socket()
|
||||
self._remote_server_sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
|
||||
|
||||
# XXX what value should this timeout have?
|
||||
self._remote_server_sock.settimeout(60)
|
||||
self._remote_server_sock.settimeout(self._socket_timeout)
|
||||
self._remote_server_sock.connect((self.hostname, int(self.port)))
|
||||
|
||||
# Wrap socket if SSL is required
|
||||
|
@ -397,6 +397,9 @@ class SingleThreadedWarcProxy(http_server.HTTPServer, object):
|
||||
WarcProxyHandler.onion_tor_socks_proxy_host = options.onion_tor_socks_proxy
|
||||
WarcProxyHandler.onion_tor_socks_proxy_port = None
|
||||
|
||||
if options.socket_timeout:
|
||||
WarcProxyHandler._socket_timeout = options.socket_timeout
|
||||
|
||||
http_server.HTTPServer.__init__(
|
||||
self, server_address, WarcProxyHandler, bind_and_activate=True)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user