use request count and payload size to specify length of benchmark run

This commit is contained in:
Noah Levitt 2017-05-10 18:58:19 +00:00
parent 2a0c8c28c9
commit 621ebb91ea
2 changed files with 40 additions and 37 deletions

View File

@ -47,7 +47,15 @@ async def do_get(request):
'Content-Type': 'text/plain', 'Content-Length': str(n)}) 'Content-Type': 'text/plain', 'Content-Length': str(n)})
await response.prepare(request) await response.prepare(request)
for i in range(n // 80): for i in range(n // 80):
response.write(b'x' * 79 + b'\n') # some random bytes at the beginning to avoid deduplication
# XXX doesn't work for n < 80
if i == 0:
rando = bytes([random.choice(
b'abcdefghijlkmopqrstuvwxyz') for i in range(30)])
bs = rando + b'x' * 49 + b'\n'
else:
bs = b'x' * 79 + b'\n'
response.write(bs)
await response.drain() await response.drain()
if n % 80 > 0: if n % 80 > 0:
response.write(b'x' * (n % 80 - 1) + b'\n') response.write(b'x' * (n % 80 - 1) + b'\n')
@ -113,33 +121,28 @@ async def fetch(session, url, proxy=None):
# logging.info('finished receiving response from %s', url) # logging.info('finished receiving response from %s', url)
return n_bytes return n_bytes
async def benchmarking_client(base_url, duration, proxy=None): async def benchmarking_client(
base_url, requests=200, payload_size=100000, proxy=None):
start = time.time() start = time.time()
connector = aiohttp.TCPConnector(verify_ssl=False) connector = aiohttp.TCPConnector(verify_ssl=False)
n = 1000
n_urls = 0 n_urls = 0
n_bytes = 0 n_bytes = 0
url = '%s/%s' % (base_url, payload_size)
outstanding_requests = set() outstanding_requests = set()
async with aiohttp.ClientSession(connector=connector) as session: async with aiohttp.ClientSession(connector=connector) as session:
for i in range(requests):
future = asyncio.ensure_future(fetch(session, url, proxy))
outstanding_requests.add(future)
# logging.info('scheduled future fetch of %s', url)
while True: while True:
if (time.time() - start < duration done, pending = await asyncio.wait(
and len(outstanding_requests) < 100): outstanding_requests, return_when=asyncio.FIRST_COMPLETED)
url = '%s/%s' % (base_url, n) for future in done:
n += 1000 outstanding_requests.remove(future)
# task = asyncio.get_event_loop().create_task(fetch(session, url)) n_urls += 1
future = asyncio.ensure_future(fetch(session, url, proxy)) n_bytes += future.result()
outstanding_requests.add(future) if not pending:
# logging.info('scheduled future fetch of %s', url) return n_urls, n_bytes, time.time() - start
else:
done, pending = await asyncio.wait(
outstanding_requests,
return_when=asyncio.FIRST_COMPLETED)
for future in done:
outstanding_requests.remove(future)
n_urls += 1
n_bytes += future.result()
if time.time() - start >= duration and not pending:
return n_urls, n_bytes, time.time() - start
def build_arg_parser(tmpdir, prog=os.path.basename(sys.argv[0])): def build_arg_parser(tmpdir, prog=os.path.basename(sys.argv[0])):
desc = ''' desc = '''
@ -151,9 +154,6 @@ benchmark warcprox. Runs 4 benchmarks:
3. http with warcprox 3. http with warcprox
4. https with warcprox 4. https with warcprox
Each of these runs for a predetermined amount of time, which is 1/4 of the time
specified by the --time option.
Uses a temporary directory for warcs and other files. Otherwise, most warcprox Uses a temporary directory for warcs and other files. Otherwise, most warcprox
options can be specified on the command line. Useful for comparing performance options can be specified on the command line. Useful for comparing performance
with different options. with different options.
@ -242,9 +242,11 @@ Benchmarking code uses asyncio/aiohttp and requires python 3.5 or later.
'--profile', dest='profile', action='store_true', default=False, '--profile', dest='profile', action='store_true', default=False,
help='profile the warc writer thread') help='profile the warc writer thread')
arg_parser.add_argument( arg_parser.add_argument(
'--time', dest='time', type=float, default=20.0, help=( '--requests', dest='requests', type=int, default=200,
'time to spend running benchmarks; total allotment will be ' help='number of urls to fetch')
'divided among the 4 benchmark cases')) arg_parser.add_argument(
'--payload-size', dest='payload_size', type=int, default=100000,
help='size of each response payload, in bytes')
arg_parser.add_argument( arg_parser.add_argument(
'--skip-baseline', dest='skip_baseline', action='store_true', '--skip-baseline', dest='skip_baseline', action='store_true',
help='skip the baseline bechmarks') help='skip the baseline bechmarks')
@ -297,14 +299,16 @@ if __name__ == '__main__':
if not args.skip_baseline: if not args.skip_baseline:
n_urls, n_bytes, elapsed = loop.run_until_complete( n_urls, n_bytes, elapsed = loop.run_until_complete(
benchmarking_client( benchmarking_client(
'http://127.0.0.1:4080', args.time / 4.0)) 'http://127.0.0.1:4080', args.requests,
args.payload_size))
logging.info( logging.info(
'http baseline (no proxy): n_urls=%s n_bytes=%s in %.1f ' 'http baseline (no proxy): n_urls=%s n_bytes=%s in %.1f '
'sec', n_urls, n_bytes, elapsed) 'sec', n_urls, n_bytes, elapsed)
n_urls, n_bytes, elapsed = loop.run_until_complete( n_urls, n_bytes, elapsed = loop.run_until_complete(
benchmarking_client( benchmarking_client(
'https://127.0.0.1:4443', args.time / 4.0)) 'https://127.0.0.1:4443', args.requests,
args.payload_size))
logging.info( logging.info(
'https baseline (no proxy): n_urls=%s n_bytes=%s in %.1f ' 'https baseline (no proxy): n_urls=%s n_bytes=%s in %.1f '
'sec', n_urls, n_bytes, elapsed) 'sec', n_urls, n_bytes, elapsed)
@ -312,11 +316,6 @@ if __name__ == '__main__':
logging.info('SKIPPED') logging.info('SKIPPED')
logging.info('===== baseline benchmark finished =====') logging.info('===== baseline benchmark finished =====')
if args.skip_baseline:
t = args.time / 2.0
else:
t = args.time / 4.0
warcprox_controller = warcprox.main.init_controller(args) warcprox_controller = warcprox.main.init_controller(args)
warcprox_controller_thread = threading.Thread( warcprox_controller_thread = threading.Thread(
target=warcprox_controller.run_until_shutdown) target=warcprox_controller.run_until_shutdown)
@ -327,13 +326,17 @@ if __name__ == '__main__':
warcprox_controller.proxy.server_address[1]) warcprox_controller.proxy.server_address[1])
logging.info('===== warcprox benchmark starting =====') logging.info('===== warcprox benchmark starting =====')
n_urls, n_bytes, elapsed = loop.run_until_complete( n_urls, n_bytes, elapsed = loop.run_until_complete(
benchmarking_client('http://127.0.0.1:4080', t, proxy)) benchmarking_client(
'http://127.0.0.1:4080', args.requests, args.payload_size,
proxy))
logging.info( logging.info(
'http: n_urls=%s n_bytes=%s in %.1f sec', 'http: n_urls=%s n_bytes=%s in %.1f sec',
n_urls, n_bytes, elapsed) n_urls, n_bytes, elapsed)
n_urls, n_bytes, elapsed = loop.run_until_complete( n_urls, n_bytes, elapsed = loop.run_until_complete(
benchmarking_client('https://127.0.0.1:4443', t, proxy)) benchmarking_client(
'https://127.0.0.1:4443', args.requests, args.payload_size,
proxy))
logging.info( logging.info(
'https: n_urls=%s n_bytes=%s in %.1f sec', 'https: n_urls=%s n_bytes=%s in %.1f sec',
n_urls, n_bytes, elapsed) n_urls, n_bytes, elapsed)

View File

@ -51,7 +51,7 @@ except:
setuptools.setup( setuptools.setup(
name='warcprox', name='warcprox',
version='2.1b1.dev79', version='2.1b1.dev80',
description='WARC writing MITM HTTP/S proxy', description='WARC writing MITM HTTP/S proxy',
url='https://github.com/internetarchive/warcprox', url='https://github.com/internetarchive/warcprox',
author='Noah Levitt', author='Noah Levitt',