mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
Merge branch 'master' into trough
* master: hidden argument --rethinkdb-big-table-name try to fix https://github.com/internetarchive/warcprox/issues/27
This commit is contained in:
commit
c0cb59e5af
2
setup.py
2
setup.py
@ -49,7 +49,7 @@ except:
|
||||
|
||||
setuptools.setup(
|
||||
name='warcprox',
|
||||
version='2.1b1.dev92',
|
||||
version='2.1b1.dev94',
|
||||
description='WARC writing MITM HTTP/S proxy',
|
||||
url='https://github.com/internetarchive/warcprox',
|
||||
author='Noah Levitt',
|
||||
|
@ -114,6 +114,9 @@ def _build_arg_parser(prog=os.path.basename(sys.argv[0])):
|
||||
arg_parser.add_argument('--rethinkdb-big-table',
|
||||
dest='rethinkdb_big_table', action='store_true', default=False,
|
||||
help='use a big rethinkdb table called "captures", instead of a small table called "dedup"; table is suitable for use as index for playback (ignored unless --rethinkdb-servers is specified)')
|
||||
arg_parser.add_argument(
|
||||
'--rethinkdb-big-table-name', dest='rethinkdb_big_table_name',
|
||||
default='captures', help=argparse.SUPPRESS)
|
||||
arg_parser.add_argument('--queue-size', dest='queue_size', type=int,
|
||||
default=500, help=argparse.SUPPRESS)
|
||||
arg_parser.add_argument('--max-threads', dest='max_threads', type=int,
|
||||
@ -179,7 +182,8 @@ def init_controller(args):
|
||||
rr = doublethink.Rethinker(
|
||||
args.rethinkdb_servers.split(","), args.rethinkdb_db)
|
||||
if args.rethinkdb_big_table:
|
||||
captures_db = warcprox.bigtable.RethinkCaptures(rr, options=options)
|
||||
captures_db = warcprox.bigtable.RethinkCaptures(
|
||||
rr, table=args.rethinkdb_big_table_name, options=options)
|
||||
dedup_db = warcprox.bigtable.RethinkCapturesDedup(
|
||||
captures_db, options=options)
|
||||
listeners.append(captures_db)
|
||||
|
@ -52,7 +52,6 @@ try:
|
||||
import socketserver
|
||||
except ImportError:
|
||||
import SocketServer as socketserver
|
||||
import resource
|
||||
import concurrent.futures
|
||||
import urlcanon
|
||||
import time
|
||||
@ -440,15 +439,23 @@ class PooledMixIn(socketserver.ThreadingMixIn):
|
||||
# man getrlimit: "RLIMIT_NPROC The maximum number of processes (or,
|
||||
# more precisely on Linux, threads) that can be created for the
|
||||
# real user ID of the calling process."
|
||||
rlimit_nproc = resource.getrlimit(resource.RLIMIT_NPROC)[0]
|
||||
rlimit_nofile = resource.getrlimit(resource.RLIMIT_NOFILE)[0]
|
||||
max_threads = min(rlimit_nofile // 10, rlimit_nproc // 2)
|
||||
# resource.RLIM_INFINITY == -1 which can result in max_threads == 0
|
||||
if max_threads <= 0 or max_threads > 5000:
|
||||
max_threads = 5000
|
||||
self.logger.info(
|
||||
"max_threads=%s (rlimit_nproc=%s, rlimit_nofile=%s)",
|
||||
max_threads, rlimit_nproc, rlimit_nofile)
|
||||
try:
|
||||
import resource
|
||||
rlimit_nproc = resource.getrlimit(resource.RLIMIT_NPROC)[0]
|
||||
rlimit_nofile = resource.getrlimit(resource.RLIMIT_NOFILE)[0]
|
||||
max_threads = min(rlimit_nofile // 10, rlimit_nproc // 2)
|
||||
# resource.RLIM_INFINITY == -1 which can result in max_threads == 0
|
||||
if max_threads <= 0 or max_threads > 5000:
|
||||
max_threads = 5000
|
||||
self.logger.info(
|
||||
"max_threads=%s (rlimit_nproc=%s, rlimit_nofile=%s)",
|
||||
max_threads, rlimit_nproc, rlimit_nofile)
|
||||
except Exception as e:
|
||||
self.logger.warn(
|
||||
"unable to calculate optimal number of threads based "
|
||||
"on resource limits due to %s", e)
|
||||
max_threads = 100
|
||||
self.logger.info("max_threads=%s", max_threads)
|
||||
self.max_threads = max_threads
|
||||
self.pool = concurrent.futures.ThreadPoolExecutor(max_threads)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user