mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
heuristic to set size of thread pool based on open files limit, to hopefully fix problem where warcprox got stuck because it ran out of file handles
This commit is contained in:
parent
46887f7594
commit
918fdd3e9b
@ -74,13 +74,13 @@ def _build_arg_parser(prog=os.path.basename(sys.argv[0])):
|
||||
arg_parser.add_argument('--rethinkdb-big-table',
|
||||
dest='rethinkdb_big_table', action='store_true', default=False,
|
||||
help='use a big rethinkdb table called "captures", instead of a small table called "dedup"; table is suitable for use as index for playback (ignored unless --rethinkdb-servers is specified)')
|
||||
arg_parser.add_argument('--kafka-broker-list', dest='kafka_broker_list',
|
||||
arg_parser.add_argument('--kafka-broker-list', dest='kafka_broker_list',
|
||||
default=None, help='kafka broker list for capture feed')
|
||||
arg_parser.add_argument('--kafka-capture-feed-topic', dest='kafka_capture_feed_topic',
|
||||
arg_parser.add_argument('--kafka-capture-feed-topic', dest='kafka_capture_feed_topic',
|
||||
default=None, help='kafka capture feed topic')
|
||||
arg_parser.add_argument('--queue-size', dest='queue_size', default=500,
|
||||
help=argparse.SUPPRESS)
|
||||
arg_parser.add_argument('--max-threads', dest='max_threads', default=500,
|
||||
arg_parser.add_argument('--max-threads', dest='max_threads',
|
||||
help=argparse.SUPPRESS)
|
||||
arg_parser.add_argument('--profile', action='store_true', default=False,
|
||||
help=argparse.SUPPRESS)
|
||||
|
@ -36,6 +36,7 @@ from certauth.certauth import CertificateAuthority
|
||||
import warcprox
|
||||
import datetime
|
||||
import concurrent.futures
|
||||
import resource
|
||||
|
||||
class ProxyingRecorder(object):
|
||||
"""
|
||||
@ -394,12 +395,25 @@ class SingleThreadedWarcProxy(http_server.HTTPServer):
|
||||
|
||||
class PooledMixIn(socketserver.ThreadingMixIn):
|
||||
def process_request(self, request, client_address):
|
||||
if hasattr(self, 'pool') and self.pool:
|
||||
self.pool.submit(self.process_request_thread, request, client_address)
|
||||
else:
|
||||
socketserver.ThreadingMixIn.process_request(self, request, client_address)
|
||||
self.pool.submit(self.process_request_thread, request, client_address)
|
||||
|
||||
class WarcProxy(PooledMixIn, SingleThreadedWarcProxy):
|
||||
logger = logging.getLogger("warcprox.warcproxy.WarcProxy")
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
SingleThreadedWarcProxy.__init__(self, *args, **kwargs)
|
||||
self.pool = concurrent.futures.ThreadPoolExecutor(max_workers=self.options.max_threads or 500)
|
||||
if self.options.max_threads:
|
||||
max_threads = self.options.max_threads
|
||||
self.logger.info("max_threads=%s set by command line option",
|
||||
max_threads)
|
||||
else:
|
||||
# man getrlimit: "RLIMIT_NPROC The maximum number of processes (or,
|
||||
# more precisely on Linux, threads) that can be created for the
|
||||
# real user ID of the calling process."
|
||||
rlimit_nproc = resource.getrlimit(resource.RLIMIT_NPROC)[0]
|
||||
rlimit_nofile = resource.getrlimit(resource.RLIMIT_NOFILE)[0]
|
||||
max_threads = min(rlimit_nofile // 10, rlimit_nproc // 2)
|
||||
self.logger.info("max_threads=%s (rlimit_nproc=%s, rlimit_nofile=%s)",
|
||||
max_threads, rlimit_nproc, rlimit_nofile)
|
||||
|
||||
self.pool = concurrent.futures.ThreadPoolExecutor(max_threads)
|
||||
|
Loading…
x
Reference in New Issue
Block a user