mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
Merge branch 'master' into trough
* master: hidden argument --rethinkdb-big-table-name try to fix https://github.com/internetarchive/warcprox/issues/27
This commit is contained in:
commit
c0cb59e5af
2
setup.py
2
setup.py
@ -49,7 +49,7 @@ except:
|
|||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='warcprox',
|
name='warcprox',
|
||||||
version='2.1b1.dev92',
|
version='2.1b1.dev94',
|
||||||
description='WARC writing MITM HTTP/S proxy',
|
description='WARC writing MITM HTTP/S proxy',
|
||||||
url='https://github.com/internetarchive/warcprox',
|
url='https://github.com/internetarchive/warcprox',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
|
@ -114,6 +114,9 @@ def _build_arg_parser(prog=os.path.basename(sys.argv[0])):
|
|||||||
arg_parser.add_argument('--rethinkdb-big-table',
|
arg_parser.add_argument('--rethinkdb-big-table',
|
||||||
dest='rethinkdb_big_table', action='store_true', default=False,
|
dest='rethinkdb_big_table', action='store_true', default=False,
|
||||||
help='use a big rethinkdb table called "captures", instead of a small table called "dedup"; table is suitable for use as index for playback (ignored unless --rethinkdb-servers is specified)')
|
help='use a big rethinkdb table called "captures", instead of a small table called "dedup"; table is suitable for use as index for playback (ignored unless --rethinkdb-servers is specified)')
|
||||||
|
arg_parser.add_argument(
|
||||||
|
'--rethinkdb-big-table-name', dest='rethinkdb_big_table_name',
|
||||||
|
default='captures', help=argparse.SUPPRESS)
|
||||||
arg_parser.add_argument('--queue-size', dest='queue_size', type=int,
|
arg_parser.add_argument('--queue-size', dest='queue_size', type=int,
|
||||||
default=500, help=argparse.SUPPRESS)
|
default=500, help=argparse.SUPPRESS)
|
||||||
arg_parser.add_argument('--max-threads', dest='max_threads', type=int,
|
arg_parser.add_argument('--max-threads', dest='max_threads', type=int,
|
||||||
@ -179,7 +182,8 @@ def init_controller(args):
|
|||||||
rr = doublethink.Rethinker(
|
rr = doublethink.Rethinker(
|
||||||
args.rethinkdb_servers.split(","), args.rethinkdb_db)
|
args.rethinkdb_servers.split(","), args.rethinkdb_db)
|
||||||
if args.rethinkdb_big_table:
|
if args.rethinkdb_big_table:
|
||||||
captures_db = warcprox.bigtable.RethinkCaptures(rr, options=options)
|
captures_db = warcprox.bigtable.RethinkCaptures(
|
||||||
|
rr, table=args.rethinkdb_big_table_name, options=options)
|
||||||
dedup_db = warcprox.bigtable.RethinkCapturesDedup(
|
dedup_db = warcprox.bigtable.RethinkCapturesDedup(
|
||||||
captures_db, options=options)
|
captures_db, options=options)
|
||||||
listeners.append(captures_db)
|
listeners.append(captures_db)
|
||||||
|
@ -52,7 +52,6 @@ try:
|
|||||||
import socketserver
|
import socketserver
|
||||||
except ImportError:
|
except ImportError:
|
||||||
import SocketServer as socketserver
|
import SocketServer as socketserver
|
||||||
import resource
|
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
import urlcanon
|
import urlcanon
|
||||||
import time
|
import time
|
||||||
@ -440,15 +439,23 @@ class PooledMixIn(socketserver.ThreadingMixIn):
|
|||||||
# man getrlimit: "RLIMIT_NPROC The maximum number of processes (or,
|
# man getrlimit: "RLIMIT_NPROC The maximum number of processes (or,
|
||||||
# more precisely on Linux, threads) that can be created for the
|
# more precisely on Linux, threads) that can be created for the
|
||||||
# real user ID of the calling process."
|
# real user ID of the calling process."
|
||||||
rlimit_nproc = resource.getrlimit(resource.RLIMIT_NPROC)[0]
|
try:
|
||||||
rlimit_nofile = resource.getrlimit(resource.RLIMIT_NOFILE)[0]
|
import resource
|
||||||
max_threads = min(rlimit_nofile // 10, rlimit_nproc // 2)
|
rlimit_nproc = resource.getrlimit(resource.RLIMIT_NPROC)[0]
|
||||||
# resource.RLIM_INFINITY == -1 which can result in max_threads == 0
|
rlimit_nofile = resource.getrlimit(resource.RLIMIT_NOFILE)[0]
|
||||||
if max_threads <= 0 or max_threads > 5000:
|
max_threads = min(rlimit_nofile // 10, rlimit_nproc // 2)
|
||||||
max_threads = 5000
|
# resource.RLIM_INFINITY == -1 which can result in max_threads == 0
|
||||||
self.logger.info(
|
if max_threads <= 0 or max_threads > 5000:
|
||||||
"max_threads=%s (rlimit_nproc=%s, rlimit_nofile=%s)",
|
max_threads = 5000
|
||||||
max_threads, rlimit_nproc, rlimit_nofile)
|
self.logger.info(
|
||||||
|
"max_threads=%s (rlimit_nproc=%s, rlimit_nofile=%s)",
|
||||||
|
max_threads, rlimit_nproc, rlimit_nofile)
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.warn(
|
||||||
|
"unable to calculate optimal number of threads based "
|
||||||
|
"on resource limits due to %s", e)
|
||||||
|
max_threads = 100
|
||||||
|
self.logger.info("max_threads=%s", max_threads)
|
||||||
self.max_threads = max_threads
|
self.max_threads = max_threads
|
||||||
self.pool = concurrent.futures.ThreadPoolExecutor(max_threads)
|
self.pool = concurrent.futures.ThreadPoolExecutor(max_threads)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user