mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
use request count and payload size to specify length of benchmark run
This commit is contained in:
parent
2a0c8c28c9
commit
621ebb91ea
@ -47,7 +47,15 @@ async def do_get(request):
|
|||||||
'Content-Type': 'text/plain', 'Content-Length': str(n)})
|
'Content-Type': 'text/plain', 'Content-Length': str(n)})
|
||||||
await response.prepare(request)
|
await response.prepare(request)
|
||||||
for i in range(n // 80):
|
for i in range(n // 80):
|
||||||
response.write(b'x' * 79 + b'\n')
|
# some random bytes at the beginning to avoid deduplication
|
||||||
|
# XXX doesn't work for n < 80
|
||||||
|
if i == 0:
|
||||||
|
rando = bytes([random.choice(
|
||||||
|
b'abcdefghijlkmopqrstuvwxyz') for i in range(30)])
|
||||||
|
bs = rando + b'x' * 49 + b'\n'
|
||||||
|
else:
|
||||||
|
bs = b'x' * 79 + b'\n'
|
||||||
|
response.write(bs)
|
||||||
await response.drain()
|
await response.drain()
|
||||||
if n % 80 > 0:
|
if n % 80 > 0:
|
||||||
response.write(b'x' * (n % 80 - 1) + b'\n')
|
response.write(b'x' * (n % 80 - 1) + b'\n')
|
||||||
@ -113,33 +121,28 @@ async def fetch(session, url, proxy=None):
|
|||||||
# logging.info('finished receiving response from %s', url)
|
# logging.info('finished receiving response from %s', url)
|
||||||
return n_bytes
|
return n_bytes
|
||||||
|
|
||||||
async def benchmarking_client(base_url, duration, proxy=None):
|
async def benchmarking_client(
|
||||||
|
base_url, requests=200, payload_size=100000, proxy=None):
|
||||||
start = time.time()
|
start = time.time()
|
||||||
connector = aiohttp.TCPConnector(verify_ssl=False)
|
connector = aiohttp.TCPConnector(verify_ssl=False)
|
||||||
n = 1000
|
|
||||||
n_urls = 0
|
n_urls = 0
|
||||||
n_bytes = 0
|
n_bytes = 0
|
||||||
|
url = '%s/%s' % (base_url, payload_size)
|
||||||
outstanding_requests = set()
|
outstanding_requests = set()
|
||||||
async with aiohttp.ClientSession(connector=connector) as session:
|
async with aiohttp.ClientSession(connector=connector) as session:
|
||||||
|
for i in range(requests):
|
||||||
|
future = asyncio.ensure_future(fetch(session, url, proxy))
|
||||||
|
outstanding_requests.add(future)
|
||||||
|
# logging.info('scheduled future fetch of %s', url)
|
||||||
while True:
|
while True:
|
||||||
if (time.time() - start < duration
|
done, pending = await asyncio.wait(
|
||||||
and len(outstanding_requests) < 100):
|
outstanding_requests, return_when=asyncio.FIRST_COMPLETED)
|
||||||
url = '%s/%s' % (base_url, n)
|
for future in done:
|
||||||
n += 1000
|
outstanding_requests.remove(future)
|
||||||
# task = asyncio.get_event_loop().create_task(fetch(session, url))
|
n_urls += 1
|
||||||
future = asyncio.ensure_future(fetch(session, url, proxy))
|
n_bytes += future.result()
|
||||||
outstanding_requests.add(future)
|
if not pending:
|
||||||
# logging.info('scheduled future fetch of %s', url)
|
return n_urls, n_bytes, time.time() - start
|
||||||
else:
|
|
||||||
done, pending = await asyncio.wait(
|
|
||||||
outstanding_requests,
|
|
||||||
return_when=asyncio.FIRST_COMPLETED)
|
|
||||||
for future in done:
|
|
||||||
outstanding_requests.remove(future)
|
|
||||||
n_urls += 1
|
|
||||||
n_bytes += future.result()
|
|
||||||
if time.time() - start >= duration and not pending:
|
|
||||||
return n_urls, n_bytes, time.time() - start
|
|
||||||
|
|
||||||
def build_arg_parser(tmpdir, prog=os.path.basename(sys.argv[0])):
|
def build_arg_parser(tmpdir, prog=os.path.basename(sys.argv[0])):
|
||||||
desc = '''
|
desc = '''
|
||||||
@ -151,9 +154,6 @@ benchmark warcprox. Runs 4 benchmarks:
|
|||||||
3. http with warcprox
|
3. http with warcprox
|
||||||
4. https with warcprox
|
4. https with warcprox
|
||||||
|
|
||||||
Each of these runs for a predetermined amount of time, which is 1/4 of the time
|
|
||||||
specified by the --time option.
|
|
||||||
|
|
||||||
Uses a temporary directory for warcs and other files. Otherwise, most warcprox
|
Uses a temporary directory for warcs and other files. Otherwise, most warcprox
|
||||||
options can be specified on the command line. Useful for comparing performance
|
options can be specified on the command line. Useful for comparing performance
|
||||||
with different options.
|
with different options.
|
||||||
@ -242,9 +242,11 @@ Benchmarking code uses asyncio/aiohttp and requires python 3.5 or later.
|
|||||||
'--profile', dest='profile', action='store_true', default=False,
|
'--profile', dest='profile', action='store_true', default=False,
|
||||||
help='profile the warc writer thread')
|
help='profile the warc writer thread')
|
||||||
arg_parser.add_argument(
|
arg_parser.add_argument(
|
||||||
'--time', dest='time', type=float, default=20.0, help=(
|
'--requests', dest='requests', type=int, default=200,
|
||||||
'time to spend running benchmarks; total allotment will be '
|
help='number of urls to fetch')
|
||||||
'divided among the 4 benchmark cases'))
|
arg_parser.add_argument(
|
||||||
|
'--payload-size', dest='payload_size', type=int, default=100000,
|
||||||
|
help='size of each response payload, in bytes')
|
||||||
arg_parser.add_argument(
|
arg_parser.add_argument(
|
||||||
'--skip-baseline', dest='skip_baseline', action='store_true',
|
'--skip-baseline', dest='skip_baseline', action='store_true',
|
||||||
help='skip the baseline bechmarks')
|
help='skip the baseline bechmarks')
|
||||||
@ -297,14 +299,16 @@ if __name__ == '__main__':
|
|||||||
if not args.skip_baseline:
|
if not args.skip_baseline:
|
||||||
n_urls, n_bytes, elapsed = loop.run_until_complete(
|
n_urls, n_bytes, elapsed = loop.run_until_complete(
|
||||||
benchmarking_client(
|
benchmarking_client(
|
||||||
'http://127.0.0.1:4080', args.time / 4.0))
|
'http://127.0.0.1:4080', args.requests,
|
||||||
|
args.payload_size))
|
||||||
logging.info(
|
logging.info(
|
||||||
'http baseline (no proxy): n_urls=%s n_bytes=%s in %.1f '
|
'http baseline (no proxy): n_urls=%s n_bytes=%s in %.1f '
|
||||||
'sec', n_urls, n_bytes, elapsed)
|
'sec', n_urls, n_bytes, elapsed)
|
||||||
|
|
||||||
n_urls, n_bytes, elapsed = loop.run_until_complete(
|
n_urls, n_bytes, elapsed = loop.run_until_complete(
|
||||||
benchmarking_client(
|
benchmarking_client(
|
||||||
'https://127.0.0.1:4443', args.time / 4.0))
|
'https://127.0.0.1:4443', args.requests,
|
||||||
|
args.payload_size))
|
||||||
logging.info(
|
logging.info(
|
||||||
'https baseline (no proxy): n_urls=%s n_bytes=%s in %.1f '
|
'https baseline (no proxy): n_urls=%s n_bytes=%s in %.1f '
|
||||||
'sec', n_urls, n_bytes, elapsed)
|
'sec', n_urls, n_bytes, elapsed)
|
||||||
@ -312,11 +316,6 @@ if __name__ == '__main__':
|
|||||||
logging.info('SKIPPED')
|
logging.info('SKIPPED')
|
||||||
logging.info('===== baseline benchmark finished =====')
|
logging.info('===== baseline benchmark finished =====')
|
||||||
|
|
||||||
if args.skip_baseline:
|
|
||||||
t = args.time / 2.0
|
|
||||||
else:
|
|
||||||
t = args.time / 4.0
|
|
||||||
|
|
||||||
warcprox_controller = warcprox.main.init_controller(args)
|
warcprox_controller = warcprox.main.init_controller(args)
|
||||||
warcprox_controller_thread = threading.Thread(
|
warcprox_controller_thread = threading.Thread(
|
||||||
target=warcprox_controller.run_until_shutdown)
|
target=warcprox_controller.run_until_shutdown)
|
||||||
@ -327,13 +326,17 @@ if __name__ == '__main__':
|
|||||||
warcprox_controller.proxy.server_address[1])
|
warcprox_controller.proxy.server_address[1])
|
||||||
logging.info('===== warcprox benchmark starting =====')
|
logging.info('===== warcprox benchmark starting =====')
|
||||||
n_urls, n_bytes, elapsed = loop.run_until_complete(
|
n_urls, n_bytes, elapsed = loop.run_until_complete(
|
||||||
benchmarking_client('http://127.0.0.1:4080', t, proxy))
|
benchmarking_client(
|
||||||
|
'http://127.0.0.1:4080', args.requests, args.payload_size,
|
||||||
|
proxy))
|
||||||
logging.info(
|
logging.info(
|
||||||
'http: n_urls=%s n_bytes=%s in %.1f sec',
|
'http: n_urls=%s n_bytes=%s in %.1f sec',
|
||||||
n_urls, n_bytes, elapsed)
|
n_urls, n_bytes, elapsed)
|
||||||
|
|
||||||
n_urls, n_bytes, elapsed = loop.run_until_complete(
|
n_urls, n_bytes, elapsed = loop.run_until_complete(
|
||||||
benchmarking_client('https://127.0.0.1:4443', t, proxy))
|
benchmarking_client(
|
||||||
|
'https://127.0.0.1:4443', args.requests, args.payload_size,
|
||||||
|
proxy))
|
||||||
logging.info(
|
logging.info(
|
||||||
'https: n_urls=%s n_bytes=%s in %.1f sec',
|
'https: n_urls=%s n_bytes=%s in %.1f sec',
|
||||||
n_urls, n_bytes, elapsed)
|
n_urls, n_bytes, elapsed)
|
||||||
|
2
setup.py
2
setup.py
@ -51,7 +51,7 @@ except:
|
|||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='warcprox',
|
name='warcprox',
|
||||||
version='2.1b1.dev79',
|
version='2.1b1.dev80',
|
||||||
description='WARC writing MITM HTTP/S proxy',
|
description='WARC writing MITM HTTP/S proxy',
|
||||||
url='https://github.com/internetarchive/warcprox',
|
url='https://github.com/internetarchive/warcprox',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user