2015-10-28 21:34:34 +00:00
|
|
|
#!/usr/bin/env python
|
2017-05-08 20:56:32 -07:00
|
|
|
'''
|
|
|
|
run-benchmarks.py - some benchmarking code for warcprox
|
2015-10-28 21:34:34 +00:00
|
|
|
|
2017-05-08 20:56:32 -07:00
|
|
|
Copyright (C) 2015-2017 Internet Archive
|
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU General Public License
|
|
|
|
as published by the Free Software Foundation; either version 2
|
|
|
|
of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
|
|
|
USA.
|
|
|
|
'''
|
|
|
|
|
|
|
|
import aiohttp.web
|
2015-10-28 21:34:34 +00:00
|
|
|
import asyncio
|
|
|
|
import ssl
|
|
|
|
import OpenSSL.crypto
|
|
|
|
import OpenSSL.SSL
|
2017-05-08 20:56:32 -07:00
|
|
|
import tempfile
|
2015-10-28 21:34:34 +00:00
|
|
|
import random
|
|
|
|
import os
|
|
|
|
import logging
|
2017-05-08 20:56:32 -07:00
|
|
|
import sys
|
|
|
|
import time
|
|
|
|
import argparse
|
|
|
|
import hashlib
|
|
|
|
import datetime
|
|
|
|
import cryptography.hazmat.backends.openssl
|
|
|
|
import warcprox
|
2015-10-28 21:34:34 +00:00
|
|
|
import warcprox.main
|
2017-05-08 20:56:32 -07:00
|
|
|
import threading
|
|
|
|
|
|
|
|
# https://medium.com/@generativist/a-simple-streaming-http-server-in-aiohttp-4233dbc173c7
|
|
|
|
async def do_get(request):
|
|
|
|
n = int(request.match_info.get('n'))
|
|
|
|
response = aiohttp.web.StreamResponse(
|
2017-05-10 18:01:56 +00:00
|
|
|
status=200, reason='OK', headers={
|
|
|
|
'Content-Type': 'text/plain', 'Content-Length': str(n)})
|
2017-05-08 20:56:32 -07:00
|
|
|
await response.prepare(request)
|
2017-05-10 18:01:56 +00:00
|
|
|
for i in range(n // 80):
|
2017-05-10 18:58:19 +00:00
|
|
|
# some random bytes at the beginning to avoid deduplication
|
|
|
|
# XXX doesn't work for n < 80
|
|
|
|
if i == 0:
|
|
|
|
rando = bytes([random.choice(
|
|
|
|
b'abcdefghijlkmopqrstuvwxyz') for i in range(30)])
|
|
|
|
bs = rando + b'x' * 49 + b'\n'
|
|
|
|
else:
|
|
|
|
bs = b'x' * 79 + b'\n'
|
|
|
|
response.write(bs)
|
2017-05-10 18:01:56 +00:00
|
|
|
await response.drain()
|
|
|
|
if n % 80 > 0:
|
|
|
|
response.write(b'x' * (n % 80 - 1) + b'\n')
|
2017-05-08 20:56:32 -07:00
|
|
|
await response.drain()
|
2015-10-28 21:34:34 +00:00
|
|
|
|
2017-05-08 20:56:32 -07:00
|
|
|
return response
|
2015-10-28 21:34:34 +00:00
|
|
|
|
|
|
|
def self_signed_cert():
|
|
|
|
key = OpenSSL.crypto.PKey()
|
|
|
|
key.generate_key(OpenSSL.crypto.TYPE_RSA, 2048)
|
|
|
|
|
|
|
|
cert = OpenSSL.crypto.X509()
|
|
|
|
cert.set_serial_number(random.randint(0, 2 ** 64 - 1))
|
2017-05-08 20:56:32 -07:00
|
|
|
cert.get_subject().CN = '127.0.0.1'
|
2015-10-28 21:34:34 +00:00
|
|
|
|
|
|
|
cert.set_version(2)
|
|
|
|
cert.gmtime_adj_notBefore(0)
|
|
|
|
cert.gmtime_adj_notAfter(10 * 365 * 24 * 60 * 60)
|
|
|
|
|
|
|
|
cert.set_issuer(cert.get_subject())
|
|
|
|
cert.set_pubkey(key)
|
2017-05-08 20:56:32 -07:00
|
|
|
cert.sign(key, 'sha1')
|
2015-10-28 21:34:34 +00:00
|
|
|
|
|
|
|
return key, cert
|
|
|
|
|
2017-05-08 20:56:32 -07:00
|
|
|
def ssl_context():
|
|
|
|
sslc = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False) as certfile:
|
|
|
|
key, cert = self_signed_cert()
|
|
|
|
certfile.write(
|
|
|
|
OpenSSL.crypto.dump_privatekey(OpenSSL.SSL.FILETYPE_PEM, key))
|
|
|
|
certfile.write(
|
|
|
|
OpenSSL.crypto.dump_certificate(OpenSSL.SSL.FILETYPE_PEM, cert))
|
|
|
|
sslc.load_cert_chain(certfile.name)
|
|
|
|
os.remove(certfile.name)
|
|
|
|
return sslc
|
2015-10-28 21:34:34 +00:00
|
|
|
|
|
|
|
def start_servers():
|
2017-05-08 20:56:32 -07:00
|
|
|
app = aiohttp.web.Application()
|
|
|
|
app.router.add_get('/{n}', do_get)
|
|
|
|
|
2015-10-28 21:34:34 +00:00
|
|
|
loop = asyncio.get_event_loop()
|
|
|
|
|
2017-05-08 20:56:32 -07:00
|
|
|
http = loop.create_server(
|
|
|
|
app.make_handler(access_log=None), '127.0.0.1', 4080)
|
|
|
|
loop.run_until_complete(http)
|
2015-10-28 21:34:34 +00:00
|
|
|
|
2017-05-08 20:56:32 -07:00
|
|
|
sslc = ssl_context()
|
|
|
|
https = loop.create_server(
|
|
|
|
app.make_handler(access_log=None), '127.0.0.1', 4443, ssl=sslc)
|
|
|
|
loop.run_until_complete(https)
|
2015-10-28 21:34:34 +00:00
|
|
|
|
2017-05-10 18:01:56 +00:00
|
|
|
async def fetch(session, url, proxy=None):
|
|
|
|
# logging.info('sending request to %s', url)
|
|
|
|
n_bytes = 0
|
|
|
|
async with session.get(url, proxy=proxy) as response:
|
|
|
|
assert response.status == 200
|
|
|
|
while True:
|
|
|
|
chunk = await response.content.read(2**16)
|
|
|
|
n_bytes += len(chunk)
|
|
|
|
if not chunk:
|
|
|
|
break
|
|
|
|
# logging.info('finished receiving response from %s', url)
|
|
|
|
return n_bytes
|
|
|
|
|
2017-05-10 18:58:19 +00:00
|
|
|
async def benchmarking_client(
|
|
|
|
base_url, requests=200, payload_size=100000, proxy=None):
|
2017-05-10 18:01:56 +00:00
|
|
|
start = time.time()
|
|
|
|
connector = aiohttp.TCPConnector(verify_ssl=False)
|
2017-05-08 20:56:32 -07:00
|
|
|
n_urls = 0
|
|
|
|
n_bytes = 0
|
2017-05-10 18:58:19 +00:00
|
|
|
url = '%s/%s' % (base_url, payload_size)
|
2017-05-10 18:01:56 +00:00
|
|
|
outstanding_requests = set()
|
|
|
|
async with aiohttp.ClientSession(connector=connector) as session:
|
2017-05-10 18:58:19 +00:00
|
|
|
for i in range(requests):
|
|
|
|
future = asyncio.ensure_future(fetch(session, url, proxy))
|
|
|
|
outstanding_requests.add(future)
|
|
|
|
# logging.info('scheduled future fetch of %s', url)
|
2017-05-10 18:01:56 +00:00
|
|
|
while True:
|
2017-05-10 18:58:19 +00:00
|
|
|
done, pending = await asyncio.wait(
|
|
|
|
outstanding_requests, return_when=asyncio.FIRST_COMPLETED)
|
|
|
|
for future in done:
|
|
|
|
outstanding_requests.remove(future)
|
|
|
|
n_urls += 1
|
|
|
|
n_bytes += future.result()
|
|
|
|
if not pending:
|
|
|
|
return n_urls, n_bytes, time.time() - start
|
2015-10-28 21:34:34 +00:00
|
|
|
|
2017-05-08 20:56:32 -07:00
|
|
|
def build_arg_parser(tmpdir, prog=os.path.basename(sys.argv[0])):
|
2017-05-10 18:01:56 +00:00
|
|
|
desc = '''
|
|
|
|
Warcprox benchmarker. Runs simple http and https servers and uses them to
|
|
|
|
benchmark warcprox. Runs 4 benchmarks:
|
|
|
|
|
|
|
|
1. baseline http (no warcprox)
|
|
|
|
2. baseline https (no warcprox)
|
|
|
|
3. http with warcprox
|
|
|
|
4. https with warcprox
|
|
|
|
|
|
|
|
Uses a temporary directory for warcs and other files. Otherwise, most warcprox
|
|
|
|
options can be specified on the command line. Useful for comparing performance
|
|
|
|
with different options.
|
|
|
|
|
|
|
|
Benchmarking code uses asyncio/aiohttp and requires python 3.5 or later.
|
|
|
|
'''
|
2017-11-14 14:40:50 -08:00
|
|
|
arg_parser = warcprox.main._build_arg_parser()
|
|
|
|
arg_parser.description = desc
|
2017-10-23 12:49:32 -07:00
|
|
|
|
2017-05-10 18:01:56 +00:00
|
|
|
arg_parser.add_argument(
|
2017-05-10 18:58:19 +00:00
|
|
|
'--requests', dest='requests', type=int, default=200,
|
|
|
|
help='number of urls to fetch')
|
|
|
|
arg_parser.add_argument(
|
|
|
|
'--payload-size', dest='payload_size', type=int, default=100000,
|
|
|
|
help='size of each response payload, in bytes')
|
2017-05-10 18:01:56 +00:00
|
|
|
arg_parser.add_argument(
|
|
|
|
'--skip-baseline', dest='skip_baseline', action='store_true',
|
|
|
|
help='skip the baseline bechmarks')
|
2017-11-14 14:40:50 -08:00
|
|
|
|
|
|
|
# filter out options that are not configurable for the benchmarks
|
|
|
|
filtered = []
|
|
|
|
for action in arg_parser._action_groups[1]._group_actions:
|
|
|
|
if action.dest not in (
|
|
|
|
'port', 'address', 'cacert', 'certs_dir', 'directory'):
|
|
|
|
filtered.append(action)
|
|
|
|
arg_parser._action_groups[1]._group_actions = filtered
|
|
|
|
|
2017-05-08 20:56:32 -07:00
|
|
|
return arg_parser
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
# see https://github.com/pyca/cryptography/issues/2911
|
|
|
|
cryptography.hazmat.backends.openssl.backend.activate_builtin_random()
|
2015-10-28 21:34:34 +00:00
|
|
|
|
2017-05-10 18:01:56 +00:00
|
|
|
# with tempfile.TemporaryDirectory() as tmpdir:
|
|
|
|
tmpdir = tempfile.mkdtemp()
|
|
|
|
if True:
|
2017-05-08 20:56:32 -07:00
|
|
|
arg_parser = build_arg_parser(tmpdir)
|
|
|
|
args = arg_parser.parse_args(args=sys.argv[1:])
|
2015-10-28 21:34:34 +00:00
|
|
|
|
2017-05-08 20:56:32 -07:00
|
|
|
if args.trace:
|
2018-08-20 11:14:38 -07:00
|
|
|
loglevel = logging.TRACE
|
2017-05-08 20:56:32 -07:00
|
|
|
elif args.verbose:
|
|
|
|
loglevel = logging.DEBUG
|
|
|
|
else:
|
|
|
|
loglevel = logging.INFO
|
|
|
|
|
|
|
|
logging.basicConfig(
|
|
|
|
stream=sys.stdout, level=loglevel, format=(
|
|
|
|
'%(asctime)s %(process)d %(levelname)s %(threadName)s '
|
2017-05-10 18:01:56 +00:00
|
|
|
'%(name)s.%(funcName)s(%(filename)s:%(lineno)d) '
|
|
|
|
'%(message)s'))
|
2017-05-08 20:56:32 -07:00
|
|
|
logging.getLogger('warcprox').setLevel(loglevel + 5)
|
|
|
|
|
2017-05-10 18:01:56 +00:00
|
|
|
logging.info('using temp dir %s', tmpdir)
|
|
|
|
|
2017-05-08 20:56:32 -07:00
|
|
|
args.playback_port = None
|
|
|
|
args.address = '127.0.0.1'
|
|
|
|
args.port = 0
|
|
|
|
args.cacert = os.path.join(tmpdir, 'benchmark-warcprox-ca.pem')
|
|
|
|
args.certs_dir = os.path.join(tmpdir, 'benchmark-warcprox-ca')
|
|
|
|
args.directory = os.path.join(tmpdir, 'warcs')
|
2018-01-15 17:15:36 -08:00
|
|
|
# if args.rethinkdb_servers:
|
|
|
|
# args.rethinkdb_db = 'benchmarks_{:%Y%m%d%H%M%S}' % (
|
|
|
|
# datetime.datetime.utcnow())
|
2015-10-28 21:34:34 +00:00
|
|
|
|
2017-05-08 20:56:32 -07:00
|
|
|
start_servers()
|
|
|
|
logging.info(
|
|
|
|
'servers running at http://127.0.0.1:4080 and '
|
|
|
|
'https://127.0.0.1:4443')
|
|
|
|
|
|
|
|
loop = asyncio.get_event_loop()
|
|
|
|
|
2017-05-10 18:01:56 +00:00
|
|
|
logging.info('===== baseline benchmark starting (no proxy) =====')
|
|
|
|
if not args.skip_baseline:
|
|
|
|
n_urls, n_bytes, elapsed = loop.run_until_complete(
|
|
|
|
benchmarking_client(
|
2017-05-10 18:58:19 +00:00
|
|
|
'http://127.0.0.1:4080', args.requests,
|
|
|
|
args.payload_size))
|
2017-05-10 18:01:56 +00:00
|
|
|
logging.info(
|
|
|
|
'http baseline (no proxy): n_urls=%s n_bytes=%s in %.1f '
|
|
|
|
'sec', n_urls, n_bytes, elapsed)
|
2017-05-08 20:56:32 -07:00
|
|
|
|
2017-05-10 18:01:56 +00:00
|
|
|
n_urls, n_bytes, elapsed = loop.run_until_complete(
|
|
|
|
benchmarking_client(
|
2017-05-10 18:58:19 +00:00
|
|
|
'https://127.0.0.1:4443', args.requests,
|
|
|
|
args.payload_size))
|
2017-05-10 18:01:56 +00:00
|
|
|
logging.info(
|
|
|
|
'https baseline (no proxy): n_urls=%s n_bytes=%s in %.1f '
|
|
|
|
'sec', n_urls, n_bytes, elapsed)
|
|
|
|
else:
|
|
|
|
logging.info('SKIPPED')
|
|
|
|
logging.info('===== baseline benchmark finished =====')
|
|
|
|
|
2018-01-15 17:15:36 -08:00
|
|
|
options = warcprox.Options(**vars(args))
|
|
|
|
warcprox_controller = warcprox.controller.WarcproxController(options)
|
|
|
|
|
2017-05-10 18:01:56 +00:00
|
|
|
warcprox_controller_thread = threading.Thread(
|
|
|
|
target=warcprox_controller.run_until_shutdown)
|
|
|
|
warcprox_controller_thread.start()
|
2017-05-08 20:56:32 -07:00
|
|
|
|
2017-05-10 18:01:56 +00:00
|
|
|
proxy = 'http://%s:%s' % (
|
2017-05-08 20:56:32 -07:00
|
|
|
warcprox_controller.proxy.server_address[0],
|
|
|
|
warcprox_controller.proxy.server_address[1])
|
2017-05-10 18:01:56 +00:00
|
|
|
logging.info('===== warcprox benchmark starting =====')
|
|
|
|
n_urls, n_bytes, elapsed = loop.run_until_complete(
|
2017-05-10 18:58:19 +00:00
|
|
|
benchmarking_client(
|
|
|
|
'http://127.0.0.1:4080', args.requests, args.payload_size,
|
|
|
|
proxy))
|
2017-05-08 20:56:32 -07:00
|
|
|
logging.info(
|
|
|
|
'http: n_urls=%s n_bytes=%s in %.1f sec',
|
2017-05-10 18:01:56 +00:00
|
|
|
n_urls, n_bytes, elapsed)
|
2017-05-08 20:56:32 -07:00
|
|
|
|
2017-05-10 18:01:56 +00:00
|
|
|
n_urls, n_bytes, elapsed = loop.run_until_complete(
|
2017-05-10 18:58:19 +00:00
|
|
|
benchmarking_client(
|
|
|
|
'https://127.0.0.1:4443', args.requests, args.payload_size,
|
|
|
|
proxy))
|
2017-05-08 20:56:32 -07:00
|
|
|
logging.info(
|
|
|
|
'https: n_urls=%s n_bytes=%s in %.1f sec',
|
2017-05-10 18:01:56 +00:00
|
|
|
n_urls, n_bytes, elapsed)
|
2015-10-28 21:34:34 +00:00
|
|
|
|
2017-05-10 18:01:56 +00:00
|
|
|
start = time.time()
|
2015-10-28 21:34:34 +00:00
|
|
|
warcprox_controller.stop.set()
|
|
|
|
warcprox_controller_thread.join()
|
2017-05-10 18:01:56 +00:00
|
|
|
logging.info(
|
|
|
|
'waited %.1f sec for warcprox to finish', time.time() - start)
|
|
|
|
logging.info('===== warcprox benchmark finished =====')
|