diff --git a/warcprox/main.py b/warcprox/main.py index 1f270a1..f13d2dd 100644 --- a/warcprox/main.py +++ b/warcprox/main.py @@ -162,6 +162,10 @@ def _build_arg_parser(prog='warcprox'): default=None, help=( 'host:port of tor socks proxy, used only to connect to ' '.onion sites')) + # Configurable connection timeout to target sites, default is 60 sec. + arg_parser.add_argument( + '--remote-server-timeout', dest='remote_server_timeout', type=float, + default=None, help=argparse.SUPPRESS) arg_parser.add_argument( '--crawl-log-dir', dest='crawl_log_dir', default=None, help=( 'if specified, write crawl log files in the specified ' diff --git a/warcprox/mitmproxy.py b/warcprox/mitmproxy.py index 7792c5c..b25901b 100644 --- a/warcprox/mitmproxy.py +++ b/warcprox/mitmproxy.py @@ -205,6 +205,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler): and records the bytes in transit as it proxies them. ''' logger = logging.getLogger("warcprox.mitmproxy.MitmProxyHandler") + _remote_server_timeout = 60 def __init__(self, request, client_address, server): threading.current_thread().name = 'MitmProxyHandler(tid={},started={},client={}:{})'.format(warcprox.gettid(), datetime.datetime.utcnow().isoformat(), client_address[0], client_address[1]) @@ -248,7 +249,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler): self._remote_server_sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) # XXX what value should this timeout have? - self._remote_server_sock.settimeout(60) + self._remote_server_sock.settimeout(self._remote_server_timeout) self._remote_server_sock.connect((self.hostname, int(self.port))) # Wrap socket if SSL is required diff --git a/warcprox/warcproxy.py b/warcprox/warcproxy.py index 7ae5ab4..6d8accb 100644 --- a/warcprox/warcproxy.py +++ b/warcprox/warcproxy.py @@ -397,6 +397,9 @@ class SingleThreadedWarcProxy(http_server.HTTPServer, object): WarcProxyHandler.onion_tor_socks_proxy_host = options.onion_tor_socks_proxy WarcProxyHandler.onion_tor_socks_proxy_port = None + if options.remote_server_timeout: + WarcProxyHandler._remote_server_timeout = options.remote_server_timeout + http_server.HTTPServer.__init__( self, server_address, WarcProxyHandler, bind_and_activate=True)