heuristic to set size of thread pool based on open files limit, to hopefully fix problem where warcprox got stuck because it ran out of file handles

This commit is contained in:
Noah Levitt 2016-03-04 20:59:11 +00:00
parent 46887f7594
commit 918fdd3e9b
2 changed files with 22 additions and 8 deletions

View File

@ -80,7 +80,7 @@ def _build_arg_parser(prog=os.path.basename(sys.argv[0])):
default=None, help='kafka capture feed topic')
arg_parser.add_argument('--queue-size', dest='queue_size', default=500,
help=argparse.SUPPRESS)
arg_parser.add_argument('--max-threads', dest='max_threads', default=500,
arg_parser.add_argument('--max-threads', dest='max_threads',
help=argparse.SUPPRESS)
arg_parser.add_argument('--profile', action='store_true', default=False,
help=argparse.SUPPRESS)

View File

@ -36,6 +36,7 @@ from certauth.certauth import CertificateAuthority
import warcprox
import datetime
import concurrent.futures
import resource
class ProxyingRecorder(object):
"""
@ -394,12 +395,25 @@ class SingleThreadedWarcProxy(http_server.HTTPServer):
class PooledMixIn(socketserver.ThreadingMixIn):
def process_request(self, request, client_address):
if hasattr(self, 'pool') and self.pool:
self.pool.submit(self.process_request_thread, request, client_address)
else:
socketserver.ThreadingMixIn.process_request(self, request, client_address)
class WarcProxy(PooledMixIn, SingleThreadedWarcProxy):
logger = logging.getLogger("warcprox.warcproxy.WarcProxy")
def __init__(self, *args, **kwargs):
SingleThreadedWarcProxy.__init__(self, *args, **kwargs)
self.pool = concurrent.futures.ThreadPoolExecutor(max_workers=self.options.max_threads or 500)
if self.options.max_threads:
max_threads = self.options.max_threads
self.logger.info("max_threads=%s set by command line option",
max_threads)
else:
# man getrlimit: "RLIMIT_NPROC The maximum number of processes (or,
# more precisely on Linux, threads) that can be created for the
# real user ID of the calling process."
rlimit_nproc = resource.getrlimit(resource.RLIMIT_NPROC)[0]
rlimit_nofile = resource.getrlimit(resource.RLIMIT_NOFILE)[0]
max_threads = min(rlimit_nofile // 10, rlimit_nproc // 2)
self.logger.info("max_threads=%s (rlimit_nproc=%s, rlimit_nofile=%s)",
max_threads, rlimit_nproc, rlimit_nofile)
self.pool = concurrent.futures.ThreadPoolExecutor(max_threads)