mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
Merge pull request #61 from vbanos/remote-server-timeout
Make remote server connection timeout configurable
This commit is contained in:
commit
0f16585a24
@ -249,6 +249,14 @@ class _TestHttpRequestHandler(http_server.BaseHTTPRequestHandler):
|
|||||||
elif self.path == '/empty-response':
|
elif self.path == '/empty-response':
|
||||||
headers = b''
|
headers = b''
|
||||||
payload = b''
|
payload = b''
|
||||||
|
elif self.path == '/slow-response':
|
||||||
|
time.sleep(6)
|
||||||
|
headers = (b'HTTP/1.1 200 OK\r\n'
|
||||||
|
+ b'Content-Type: text/plain\r\n'
|
||||||
|
+ b'\r\n')
|
||||||
|
payload = b'Test.'
|
||||||
|
actual_headers = (b'Content-Type: text/plain\r\n'
|
||||||
|
+ b'Content-Length: ' + str(len(payload)).encode('ascii') + b'\r\n')
|
||||||
else:
|
else:
|
||||||
payload = b'404 Not Found\n'
|
payload = b'404 Not Found\n'
|
||||||
headers = (b'HTTP/1.1 404 Not Found\r\n'
|
headers = (b'HTTP/1.1 404 Not Found\r\n'
|
||||||
@ -356,7 +364,8 @@ def warcprox_(request):
|
|||||||
'--port=0',
|
'--port=0',
|
||||||
'--playback-port=0',
|
'--playback-port=0',
|
||||||
'--onion-tor-socks-proxy=localhost:9050',
|
'--onion-tor-socks-proxy=localhost:9050',
|
||||||
'--crawl-log-dir=crawl-logs']
|
'--crawl-log-dir=crawl-logs',
|
||||||
|
'--socket-timeout=4']
|
||||||
if request.config.getoption('--rethinkdb-dedup-url'):
|
if request.config.getoption('--rethinkdb-dedup-url'):
|
||||||
argv.append('--rethinkdb-dedup-url=%s' % request.config.getoption('--rethinkdb-dedup-url'))
|
argv.append('--rethinkdb-dedup-url=%s' % request.config.getoption('--rethinkdb-dedup-url'))
|
||||||
# test these here only
|
# test these here only
|
||||||
@ -1711,6 +1720,16 @@ def test_long_warcprox_meta(
|
|||||||
with pytest.raises(StopIteration):
|
with pytest.raises(StopIteration):
|
||||||
next(rec_iter)
|
next(rec_iter)
|
||||||
|
|
||||||
|
def test_socket_timeout_response(
|
||||||
|
warcprox_, http_daemon, https_daemon, archiving_proxies,
|
||||||
|
playback_proxies):
|
||||||
|
"""Response will timeout because we use --socket-timeout=4 whereas the
|
||||||
|
target URL will return after 6 sec.
|
||||||
|
"""
|
||||||
|
url = 'http://localhost:%s/slow-response' % http_daemon.server_port
|
||||||
|
response = requests.get(url, proxies=archiving_proxies, verify=False)
|
||||||
|
assert response.status_code == 502
|
||||||
|
|
||||||
def test_empty_response(
|
def test_empty_response(
|
||||||
warcprox_, http_daemon, https_daemon, archiving_proxies,
|
warcprox_, http_daemon, https_daemon, archiving_proxies,
|
||||||
playback_proxies):
|
playback_proxies):
|
||||||
|
@ -162,6 +162,10 @@ def _build_arg_parser(prog='warcprox'):
|
|||||||
default=None, help=(
|
default=None, help=(
|
||||||
'host:port of tor socks proxy, used only to connect to '
|
'host:port of tor socks proxy, used only to connect to '
|
||||||
'.onion sites'))
|
'.onion sites'))
|
||||||
|
# Configurable connection socket timeout, default is 60 sec.
|
||||||
|
arg_parser.add_argument(
|
||||||
|
'--socket-timeout', dest='socket_timeout', type=float,
|
||||||
|
default=None, help=argparse.SUPPRESS)
|
||||||
arg_parser.add_argument(
|
arg_parser.add_argument(
|
||||||
'--crawl-log-dir', dest='crawl_log_dir', default=None, help=(
|
'--crawl-log-dir', dest='crawl_log_dir', default=None, help=(
|
||||||
'if specified, write crawl log files in the specified '
|
'if specified, write crawl log files in the specified '
|
||||||
|
@ -205,12 +205,13 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
|||||||
and records the bytes in transit as it proxies them.
|
and records the bytes in transit as it proxies them.
|
||||||
'''
|
'''
|
||||||
logger = logging.getLogger("warcprox.mitmproxy.MitmProxyHandler")
|
logger = logging.getLogger("warcprox.mitmproxy.MitmProxyHandler")
|
||||||
|
_socket_timeout = 60
|
||||||
|
|
||||||
def __init__(self, request, client_address, server):
|
def __init__(self, request, client_address, server):
|
||||||
threading.current_thread().name = 'MitmProxyHandler(tid={},started={},client={}:{})'.format(warcprox.gettid(), datetime.datetime.utcnow().isoformat(), client_address[0], client_address[1])
|
threading.current_thread().name = 'MitmProxyHandler(tid={},started={},client={}:{})'.format(warcprox.gettid(), datetime.datetime.utcnow().isoformat(), client_address[0], client_address[1])
|
||||||
self.is_connect = False
|
self.is_connect = False
|
||||||
self._headers_buffer = []
|
self._headers_buffer = []
|
||||||
request.settimeout(60) # XXX what value should this have?
|
request.settimeout(self._socket_timeout)
|
||||||
http_server.BaseHTTPRequestHandler.__init__(self, request, client_address, server)
|
http_server.BaseHTTPRequestHandler.__init__(self, request, client_address, server)
|
||||||
|
|
||||||
def _determine_host_port(self):
|
def _determine_host_port(self):
|
||||||
@ -247,8 +248,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
|||||||
self._remote_server_sock = socket.socket()
|
self._remote_server_sock = socket.socket()
|
||||||
self._remote_server_sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
|
self._remote_server_sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
|
||||||
|
|
||||||
# XXX what value should this timeout have?
|
self._remote_server_sock.settimeout(self._socket_timeout)
|
||||||
self._remote_server_sock.settimeout(60)
|
|
||||||
self._remote_server_sock.connect((self.hostname, int(self.port)))
|
self._remote_server_sock.connect((self.hostname, int(self.port)))
|
||||||
|
|
||||||
# Wrap socket if SSL is required
|
# Wrap socket if SSL is required
|
||||||
|
@ -397,6 +397,9 @@ class SingleThreadedWarcProxy(http_server.HTTPServer, object):
|
|||||||
WarcProxyHandler.onion_tor_socks_proxy_host = options.onion_tor_socks_proxy
|
WarcProxyHandler.onion_tor_socks_proxy_host = options.onion_tor_socks_proxy
|
||||||
WarcProxyHandler.onion_tor_socks_proxy_port = None
|
WarcProxyHandler.onion_tor_socks_proxy_port = None
|
||||||
|
|
||||||
|
if options.socket_timeout:
|
||||||
|
WarcProxyHandler._socket_timeout = options.socket_timeout
|
||||||
|
|
||||||
http_server.HTTPServer.__init__(
|
http_server.HTTPServer.__init__(
|
||||||
self, server_address, WarcProxyHandler, bind_and_activate=True)
|
self, server_address, WarcProxyHandler, bind_and_activate=True)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user