warcprox/benchmarks/run-benchmarks.py

#!/usr/bin/env python
'''
run-benchmarks.py - some benchmarking code for warcprox

Copyright (C) 2015-2017 Internet Archive

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
USA.
'''

import aiohttp.web
import asyncio
import ssl
import OpenSSL.crypto
import OpenSSL.SSL
import tempfile
import random
import os
import logging
import sys
import time
import argparse
import hashlib
import datetime
import cryptography.hazmat.backends.openssl
import warcprox
import warcprox.main
import threading

# https://medium.com/@generativist/a-simple-streaming-http-server-in-aiohttp-4233dbc173c7
async def do_get(request):
    n = int(request.match_info.get('n'))
    response = aiohttp.web.StreamResponse(
            status=200, reason='OK', headers={
                'Content-Type': 'text/plain', 'Content-Length': str(n)})
    await response.prepare(request)
    for i in range(n // 80):
        # some random bytes at the beginning to avoid deduplication
        # XXX doesn't work for n < 80
        if i == 0:
            rando = bytes([random.choice(
                b'abcdefghijlkmopqrstuvwxyz') for i in range(30)])
            bs = rando + b'x' * 49 + b'\n'
        else:
            bs = b'x' * 79 + b'\n'
        response.write(bs)
        await response.drain()
    if n % 80 > 0:
        response.write(b'x' * (n % 80 - 1) + b'\n')
        await response.drain()

    return response

def self_signed_cert():
    key = OpenSSL.crypto.PKey()
    key.generate_key(OpenSSL.crypto.TYPE_RSA, 2048)

    cert = OpenSSL.crypto.X509()
    cert.set_serial_number(random.randint(0, 2 ** 64 - 1))
    cert.get_subject().CN = '127.0.0.1'

    cert.set_version(2)
    cert.gmtime_adj_notBefore(0)
    cert.gmtime_adj_notAfter(10 * 365 * 24 * 60 * 60)

    cert.set_issuer(cert.get_subject())
    cert.set_pubkey(key)
    cert.sign(key, 'sha1')

    return key, cert

def ssl_context():
    sslc = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
    with tempfile.NamedTemporaryFile(delete=False) as certfile:
        key, cert = self_signed_cert()
        certfile.write(
                OpenSSL.crypto.dump_privatekey(OpenSSL.SSL.FILETYPE_PEM, key))
        certfile.write(
                OpenSSL.crypto.dump_certificate(OpenSSL.SSL.FILETYPE_PEM, cert))
    sslc.load_cert_chain(certfile.name)
    os.remove(certfile.name)
    return sslc

def start_servers():
    app = aiohttp.web.Application()
    app.router.add_get('/{n}', do_get)

    loop = asyncio.get_event_loop()

    http = loop.create_server(
            app.make_handler(access_log=None), '127.0.0.1', 4080)
    loop.run_until_complete(http)

    sslc = ssl_context()
    https = loop.create_server(
            app.make_handler(access_log=None), '127.0.0.1', 4443, ssl=sslc)
    loop.run_until_complete(https)

async def fetch(session, url, proxy=None):
    # logging.info('sending request to %s', url)
    n_bytes = 0
    async with session.get(url, proxy=proxy) as response:
        assert response.status == 200
        while True:
            chunk = await response.content.read(2**16)
            n_bytes += len(chunk)
            if not chunk:
                break
        # logging.info('finished receiving response from %s', url)
    return n_bytes

async def benchmarking_client(
        base_url, requests=200, payload_size=100000, proxy=None):
    start = time.time()
    connector = aiohttp.TCPConnector(verify_ssl=False)
    n_urls = 0
    n_bytes = 0
    url = '%s/%s' % (base_url, payload_size)
    outstanding_requests = set()
    async with aiohttp.ClientSession(connector=connector) as session:
        for i in range(requests):
            future = asyncio.ensure_future(fetch(session, url, proxy))
            outstanding_requests.add(future)
            # logging.info('scheduled future fetch of %s', url)
        while True:
            done, pending = await asyncio.wait(
                    outstanding_requests, return_when=asyncio.FIRST_COMPLETED)
            for future in done:
                outstanding_requests.remove(future)
                n_urls += 1
                n_bytes += future.result()
            if not pending:
                return n_urls, n_bytes, time.time() - start

def build_arg_parser(tmpdir, prog=os.path.basename(sys.argv[0])):
    desc = '''
Warcprox benchmarker. Runs simple http and https servers and uses them to
benchmark warcprox. Runs 4 benchmarks:

    1. baseline http (no warcprox)
    2. baseline https (no warcprox)
    3. http with warcprox
    4. https with warcprox

Uses a temporary directory for warcs and other files. Otherwise, most warcprox
options can be specified on the command line. Useful for comparing performance
with different options.

Benchmarking code uses asyncio/aiohttp and requires python 3.5 or later.
'''
    arg_parser = warcprox.main._build_arg_parser()
    arg_parser.description = desc

    arg_parser.add_argument(
            '--requests', dest='requests', type=int, default=200,
            help='number of urls to fetch')
    arg_parser.add_argument(
            '--payload-size', dest='payload_size', type=int, default=100000,
            help='size of each response payload, in bytes')
    arg_parser.add_argument(
            '--skip-baseline', dest='skip_baseline', action='store_true',
            help='skip the baseline bechmarks')

    # filter out options that are not configurable for the benchmarks
    filtered = []
    for action in arg_parser._action_groups[1]._group_actions:
        if action.dest not in (
                'port', 'address', 'cacert', 'certs_dir', 'directory'):
            filtered.append(action)
    arg_parser._action_groups[1]._group_actions = filtered

    return arg_parser

if __name__ == '__main__':
    # see https://github.com/pyca/cryptography/issues/2911
    cryptography.hazmat.backends.openssl.backend.activate_builtin_random()

    # with tempfile.TemporaryDirectory() as tmpdir:
    tmpdir = tempfile.mkdtemp()
    if True:
        arg_parser = build_arg_parser(tmpdir)
        args = arg_parser.parse_args(args=sys.argv[1:])

        if args.trace:
            loglevel = warcprox.TRACE
        elif args.verbose:
            loglevel = logging.DEBUG
        else:
            loglevel = logging.INFO

        logging.basicConfig(
                stream=sys.stdout, level=loglevel, format=(
                    '%(asctime)s %(process)d %(levelname)s %(threadName)s '
                    '%(name)s.%(funcName)s(%(filename)s:%(lineno)d) '
                    '%(message)s'))
        logging.getLogger('warcprox').setLevel(loglevel + 5)

        logging.info('using temp dir %s', tmpdir)

        args.playback_port = None
        args.address = '127.0.0.1'
        args.port = 0
        args.cacert = os.path.join(tmpdir, 'benchmark-warcprox-ca.pem')
        args.certs_dir = os.path.join(tmpdir, 'benchmark-warcprox-ca')
        args.directory = os.path.join(tmpdir, 'warcs')
        # if args.rethinkdb_servers:
        #     args.rethinkdb_db = 'benchmarks_{:%Y%m%d%H%M%S}' % (
        #             datetime.datetime.utcnow())

        start_servers()
        logging.info(
                'servers running at http://127.0.0.1:4080 and '
                'https://127.0.0.1:4443')

        loop = asyncio.get_event_loop()

        logging.info('===== baseline benchmark starting (no proxy) =====')
        if not args.skip_baseline:
            n_urls, n_bytes, elapsed = loop.run_until_complete(
                    benchmarking_client(
                        'http://127.0.0.1:4080', args.requests,
                        args.payload_size))
            logging.info(
                    'http baseline (no proxy): n_urls=%s n_bytes=%s in %.1f '
                    'sec', n_urls, n_bytes, elapsed)

            n_urls, n_bytes, elapsed = loop.run_until_complete(
                    benchmarking_client(
                        'https://127.0.0.1:4443', args.requests,
                        args.payload_size))
            logging.info(
                    'https baseline (no proxy): n_urls=%s n_bytes=%s in %.1f '
                    'sec', n_urls, n_bytes, elapsed)
        else:
            logging.info('SKIPPED')
        logging.info('===== baseline benchmark finished =====')

        options = warcprox.Options(**vars(args))
        warcprox_controller = warcprox.controller.WarcproxController(options)

        warcprox_controller_thread = threading.Thread(
                target=warcprox_controller.run_until_shutdown)
        warcprox_controller_thread.start()

        proxy = 'http://%s:%s' % (
                warcprox_controller.proxy.server_address[0],
                warcprox_controller.proxy.server_address[1])
        logging.info('===== warcprox benchmark starting =====')
        n_urls, n_bytes, elapsed = loop.run_until_complete(
                benchmarking_client(
                    'http://127.0.0.1:4080', args.requests, args.payload_size,
                    proxy))
        logging.info(
                'http: n_urls=%s n_bytes=%s in %.1f sec',
                n_urls, n_bytes, elapsed)

        n_urls, n_bytes, elapsed = loop.run_until_complete(
                benchmarking_client(
                    'https://127.0.0.1:4443', args.requests, args.payload_size,
                    proxy))
        logging.info(
                'https: n_urls=%s n_bytes=%s in %.1f sec',
                n_urls, n_bytes, elapsed)

        start = time.time()
        warcprox_controller.stop.set()
        warcprox_controller_thread.join()
        logging.info(
                'waited %.1f sec for warcprox to finish', time.time() - start)
        logging.info('===== warcprox benchmark finished =====')
working on benchmarking code... so far they seem to reveal that warcprox behaves poorly under load (perhaps timeouts are configured too short?) 2015-10-28 21:34:34 +00:00			`#!/usr/bin/env python`
rewrite run-benchmarks.py for aiohttp2 2017-05-08 20:56:32 -07:00			`'''`
			`run-benchmarks.py - some benchmarking code for warcprox`
working on benchmarking code... so far they seem to reveal that warcprox behaves poorly under load (perhaps timeouts are configured too short?) 2015-10-28 21:34:34 +00:00
rewrite run-benchmarks.py for aiohttp2 2017-05-08 20:56:32 -07:00			`Copyright (C) 2015-2017 Internet Archive`

			`This program is free software; you can redistribute it and/or`
			`modify it under the terms of the GNU General Public License`
			`as published by the Free Software Foundation; either version 2`
			`of the License, or (at your option) any later version.`

			`This program is distributed in the hope that it will be useful,`
			`but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`GNU General Public License for more details.`

			`You should have received a copy of the GNU General Public License`
			`along with this program; if not, write to the Free Software`
			`Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,`
			`USA.`
			`'''`

			`import aiohttp.web`
working on benchmarking code... so far they seem to reveal that warcprox behaves poorly under load (perhaps timeouts are configured too short?) 2015-10-28 21:34:34 +00:00			`import asyncio`
			`import ssl`
			`import OpenSSL.crypto`
			`import OpenSSL.SSL`
rewrite run-benchmarks.py for aiohttp2 2017-05-08 20:56:32 -07:00			`import tempfile`
working on benchmarking code... so far they seem to reveal that warcprox behaves poorly under load (perhaps timeouts are configured too short?) 2015-10-28 21:34:34 +00:00			`import random`
			`import os`
			`import logging`
rewrite run-benchmarks.py for aiohttp2 2017-05-08 20:56:32 -07:00			`import sys`
			`import time`
			`import argparse`
			`import hashlib`
			`import datetime`
			`import cryptography.hazmat.backends.openssl`
			`import warcprox`
working on benchmarking code... so far they seem to reveal that warcprox behaves poorly under load (perhaps timeouts are configured too short?) 2015-10-28 21:34:34 +00:00			`import warcprox.main`
rewrite run-benchmarks.py for aiohttp2 2017-05-08 20:56:32 -07:00			`import threading`

			`# https://medium.com/@generativist/a-simple-streaming-http-server-in-aiohttp-4233dbc173c7`
			`async def do_get(request):`
			`n = int(request.match_info.get('n'))`
			`response = aiohttp.web.StreamResponse(`
improvements to run-benchmark.py, primarily to actually make multiple requests in parallel 2017-05-10 18:01:56 +00:00			`status=200, reason='OK', headers={`
			`'Content-Type': 'text/plain', 'Content-Length': str(n)})`
rewrite run-benchmarks.py for aiohttp2 2017-05-08 20:56:32 -07:00			`await response.prepare(request)`
improvements to run-benchmark.py, primarily to actually make multiple requests in parallel 2017-05-10 18:01:56 +00:00			`for i in range(n // 80):`
use request count and payload size to specify length of benchmark run 2017-05-10 18:58:19 +00:00			`# some random bytes at the beginning to avoid deduplication`
			`# XXX doesn't work for n < 80`
			`if i == 0:`
			`rando = bytes([random.choice(`
			`b'abcdefghijlkmopqrstuvwxyz') for i in range(30)])`
			`bs = rando + b'x' * 49 + b'\n'`
			`else:`
			`bs = b'x' * 79 + b'\n'`
			`response.write(bs)`
improvements to run-benchmark.py, primarily to actually make multiple requests in parallel 2017-05-10 18:01:56 +00:00			`await response.drain()`
			`if n % 80 > 0:`
			`response.write(b'x' * (n % 80 - 1) + b'\n')`
rewrite run-benchmarks.py for aiohttp2 2017-05-08 20:56:32 -07:00			`await response.drain()`
working on benchmarking code... so far they seem to reveal that warcprox behaves poorly under load (perhaps timeouts are configured too short?) 2015-10-28 21:34:34 +00:00
rewrite run-benchmarks.py for aiohttp2 2017-05-08 20:56:32 -07:00			`return response`
working on benchmarking code... so far they seem to reveal that warcprox behaves poorly under load (perhaps timeouts are configured too short?) 2015-10-28 21:34:34 +00:00
			`def self_signed_cert():`
			`key = OpenSSL.crypto.PKey()`
			`key.generate_key(OpenSSL.crypto.TYPE_RSA, 2048)`

			`cert = OpenSSL.crypto.X509()`
			`cert.set_serial_number(random.randint(0, 2 ** 64 - 1))`
rewrite run-benchmarks.py for aiohttp2 2017-05-08 20:56:32 -07:00			`cert.get_subject().CN = '127.0.0.1'`
working on benchmarking code... so far they seem to reveal that warcprox behaves poorly under load (perhaps timeouts are configured too short?) 2015-10-28 21:34:34 +00:00
			`cert.set_version(2)`
			`cert.gmtime_adj_notBefore(0)`
			`cert.gmtime_adj_notAfter(10 * 365 * 24 * 60 * 60)`

			`cert.set_issuer(cert.get_subject())`
			`cert.set_pubkey(key)`
rewrite run-benchmarks.py for aiohttp2 2017-05-08 20:56:32 -07:00			`cert.sign(key, 'sha1')`
working on benchmarking code... so far they seem to reveal that warcprox behaves poorly under load (perhaps timeouts are configured too short?) 2015-10-28 21:34:34 +00:00
			`return key, cert`

rewrite run-benchmarks.py for aiohttp2 2017-05-08 20:56:32 -07:00			`def ssl_context():`
			`sslc = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)`
			`with tempfile.NamedTemporaryFile(delete=False) as certfile:`
			`key, cert = self_signed_cert()`
			`certfile.write(`
			`OpenSSL.crypto.dump_privatekey(OpenSSL.SSL.FILETYPE_PEM, key))`
			`certfile.write(`
			`OpenSSL.crypto.dump_certificate(OpenSSL.SSL.FILETYPE_PEM, cert))`
			`sslc.load_cert_chain(certfile.name)`
			`os.remove(certfile.name)`
			`return sslc`
working on benchmarking code... so far they seem to reveal that warcprox behaves poorly under load (perhaps timeouts are configured too short?) 2015-10-28 21:34:34 +00:00
			`def start_servers():`
rewrite run-benchmarks.py for aiohttp2 2017-05-08 20:56:32 -07:00			`app = aiohttp.web.Application()`
			`app.router.add_get('/{n}', do_get)`

working on benchmarking code... so far they seem to reveal that warcprox behaves poorly under load (perhaps timeouts are configured too short?) 2015-10-28 21:34:34 +00:00			`loop = asyncio.get_event_loop()`

rewrite run-benchmarks.py for aiohttp2 2017-05-08 20:56:32 -07:00			`http = loop.create_server(`
			`app.make_handler(access_log=None), '127.0.0.1', 4080)`
			`loop.run_until_complete(http)`
working on benchmarking code... so far they seem to reveal that warcprox behaves poorly under load (perhaps timeouts are configured too short?) 2015-10-28 21:34:34 +00:00
rewrite run-benchmarks.py for aiohttp2 2017-05-08 20:56:32 -07:00			`sslc = ssl_context()`
			`https = loop.create_server(`
			`app.make_handler(access_log=None), '127.0.0.1', 4443, ssl=sslc)`
			`loop.run_until_complete(https)`
working on benchmarking code... so far they seem to reveal that warcprox behaves poorly under load (perhaps timeouts are configured too short?) 2015-10-28 21:34:34 +00:00
improvements to run-benchmark.py, primarily to actually make multiple requests in parallel 2017-05-10 18:01:56 +00:00			`async def fetch(session, url, proxy=None):`
			`# logging.info('sending request to %s', url)`
			`n_bytes = 0`
			`async with session.get(url, proxy=proxy) as response:`
			`assert response.status == 200`
			`while True:`
			`chunk = await response.content.read(2**16)`
			`n_bytes += len(chunk)`
			`if not chunk:`
			`break`
			`# logging.info('finished receiving response from %s', url)`
			`return n_bytes`

use request count and payload size to specify length of benchmark run 2017-05-10 18:58:19 +00:00			`async def benchmarking_client(`
			`base_url, requests=200, payload_size=100000, proxy=None):`
improvements to run-benchmark.py, primarily to actually make multiple requests in parallel 2017-05-10 18:01:56 +00:00			`start = time.time()`
			`connector = aiohttp.TCPConnector(verify_ssl=False)`
rewrite run-benchmarks.py for aiohttp2 2017-05-08 20:56:32 -07:00			`n_urls = 0`
			`n_bytes = 0`
use request count and payload size to specify length of benchmark run 2017-05-10 18:58:19 +00:00			`url = '%s/%s' % (base_url, payload_size)`
improvements to run-benchmark.py, primarily to actually make multiple requests in parallel 2017-05-10 18:01:56 +00:00			`outstanding_requests = set()`
			`async with aiohttp.ClientSession(connector=connector) as session:`
use request count and payload size to specify length of benchmark run 2017-05-10 18:58:19 +00:00			`for i in range(requests):`
			`future = asyncio.ensure_future(fetch(session, url, proxy))`
			`outstanding_requests.add(future)`
			`# logging.info('scheduled future fetch of %s', url)`
improvements to run-benchmark.py, primarily to actually make multiple requests in parallel 2017-05-10 18:01:56 +00:00			`while True:`
use request count and payload size to specify length of benchmark run 2017-05-10 18:58:19 +00:00			`done, pending = await asyncio.wait(`
			`outstanding_requests, return_when=asyncio.FIRST_COMPLETED)`
			`for future in done:`
			`outstanding_requests.remove(future)`
			`n_urls += 1`
			`n_bytes += future.result()`
			`if not pending:`
			`return n_urls, n_bytes, time.time() - start`
working on benchmarking code... so far they seem to reveal that warcprox behaves poorly under load (perhaps timeouts are configured too short?) 2015-10-28 21:34:34 +00:00
rewrite run-benchmarks.py for aiohttp2 2017-05-08 20:56:32 -07:00			`def build_arg_parser(tmpdir, prog=os.path.basename(sys.argv[0])):`
improvements to run-benchmark.py, primarily to actually make multiple requests in parallel 2017-05-10 18:01:56 +00:00			`desc = '''`
			`Warcprox benchmarker. Runs simple http and https servers and uses them to`
			`benchmark warcprox. Runs 4 benchmarks:`

			`1. baseline http (no warcprox)`
			`2. baseline https (no warcprox)`
			`3. http with warcprox`
			`4. https with warcprox`

			`Uses a temporary directory for warcs and other files. Otherwise, most warcprox`
			`options can be specified on the command line. Useful for comparing performance`
			`with different options.`

			`Benchmarking code uses asyncio/aiohttp and requires python 3.5 or later.`
			`'''`
hacky way to fix problem of benchmarks arguments getting stale 2017-11-14 14:40:50 -08:00			`arg_parser = warcprox.main._build_arg_parser()`
			`arg_parser.description = desc`
fix benchmarks (update command line args) 2017-10-23 12:49:32 -07:00
improvements to run-benchmark.py, primarily to actually make multiple requests in parallel 2017-05-10 18:01:56 +00:00			`arg_parser.add_argument(`
use request count and payload size to specify length of benchmark run 2017-05-10 18:58:19 +00:00			`'--requests', dest='requests', type=int, default=200,`
			`help='number of urls to fetch')`
			`arg_parser.add_argument(`
			`'--payload-size', dest='payload_size', type=int, default=100000,`
			`help='size of each response payload, in bytes')`
improvements to run-benchmark.py, primarily to actually make multiple requests in parallel 2017-05-10 18:01:56 +00:00			`arg_parser.add_argument(`
			`'--skip-baseline', dest='skip_baseline', action='store_true',`
			`help='skip the baseline bechmarks')`
hacky way to fix problem of benchmarks arguments getting stale 2017-11-14 14:40:50 -08:00
			`# filter out options that are not configurable for the benchmarks`
			`filtered = []`
			`for action in arg_parser._action_groups[1]._group_actions:`
			`if action.dest not in (`
			`'port', 'address', 'cacert', 'certs_dir', 'directory'):`
			`filtered.append(action)`
			`arg_parser._action_groups[1]._group_actions = filtered`

rewrite run-benchmarks.py for aiohttp2 2017-05-08 20:56:32 -07:00			`return arg_parser`

			`if __name__ == '__main__':`
			`# see https://github.com/pyca/cryptography/issues/2911`
			`cryptography.hazmat.backends.openssl.backend.activate_builtin_random()`
working on benchmarking code... so far they seem to reveal that warcprox behaves poorly under load (perhaps timeouts are configured too short?) 2015-10-28 21:34:34 +00:00
improvements to run-benchmark.py, primarily to actually make multiple requests in parallel 2017-05-10 18:01:56 +00:00			`# with tempfile.TemporaryDirectory() as tmpdir:`
			`tmpdir = tempfile.mkdtemp()`
			`if True:`
rewrite run-benchmarks.py for aiohttp2 2017-05-08 20:56:32 -07:00			`arg_parser = build_arg_parser(tmpdir)`
			`args = arg_parser.parse_args(args=sys.argv[1:])`
working on benchmarking code... so far they seem to reveal that warcprox behaves poorly under load (perhaps timeouts are configured too short?) 2015-10-28 21:34:34 +00:00
rewrite run-benchmarks.py for aiohttp2 2017-05-08 20:56:32 -07:00			`if args.trace:`
			`loglevel = warcprox.TRACE`
			`elif args.verbose:`
			`loglevel = logging.DEBUG`
			`else:`
			`loglevel = logging.INFO`

			`logging.basicConfig(`
			`stream=sys.stdout, level=loglevel, format=(`
			`'%(asctime)s %(process)d %(levelname)s %(threadName)s '`
improvements to run-benchmark.py, primarily to actually make multiple requests in parallel 2017-05-10 18:01:56 +00:00			`'%(name)s.%(funcName)s(%(filename)s:%(lineno)d) '`
			`'%(message)s'))`
rewrite run-benchmarks.py for aiohttp2 2017-05-08 20:56:32 -07:00			`logging.getLogger('warcprox').setLevel(loglevel + 5)`

improvements to run-benchmark.py, primarily to actually make multiple requests in parallel 2017-05-10 18:01:56 +00:00			`logging.info('using temp dir %s', tmpdir)`

rewrite run-benchmarks.py for aiohttp2 2017-05-08 20:56:32 -07:00			`args.playback_port = None`
			`args.address = '127.0.0.1'`
			`args.port = 0`
			`args.cacert = os.path.join(tmpdir, 'benchmark-warcprox-ca.pem')`
			`args.certs_dir = os.path.join(tmpdir, 'benchmark-warcprox-ca')`
			`args.directory = os.path.join(tmpdir, 'warcs')`
make run-benchmarks.py work (with no args) 2018-01-15 17:15:36 -08:00			`# if args.rethinkdb_servers:`
			`# args.rethinkdb_db = 'benchmarks_{:%Y%m%d%H%M%S}' % (`
			`# datetime.datetime.utcnow())`
working on benchmarking code... so far they seem to reveal that warcprox behaves poorly under load (perhaps timeouts are configured too short?) 2015-10-28 21:34:34 +00:00
rewrite run-benchmarks.py for aiohttp2 2017-05-08 20:56:32 -07:00			`start_servers()`
			`logging.info(`
			`'servers running at http://127.0.0.1:4080 and '`
			`'https://127.0.0.1:4443')`

			`loop = asyncio.get_event_loop()`

improvements to run-benchmark.py, primarily to actually make multiple requests in parallel 2017-05-10 18:01:56 +00:00			`logging.info('===== baseline benchmark starting (no proxy) =====')`
			`if not args.skip_baseline:`
			`n_urls, n_bytes, elapsed = loop.run_until_complete(`
			`benchmarking_client(`
use request count and payload size to specify length of benchmark run 2017-05-10 18:58:19 +00:00			`'http://127.0.0.1:4080', args.requests,`
			`args.payload_size))`
improvements to run-benchmark.py, primarily to actually make multiple requests in parallel 2017-05-10 18:01:56 +00:00			`logging.info(`
			`'http baseline (no proxy): n_urls=%s n_bytes=%s in %.1f '`
			`'sec', n_urls, n_bytes, elapsed)`
rewrite run-benchmarks.py for aiohttp2 2017-05-08 20:56:32 -07:00
improvements to run-benchmark.py, primarily to actually make multiple requests in parallel 2017-05-10 18:01:56 +00:00			`n_urls, n_bytes, elapsed = loop.run_until_complete(`
			`benchmarking_client(`
use request count and payload size to specify length of benchmark run 2017-05-10 18:58:19 +00:00			`'https://127.0.0.1:4443', args.requests,`
			`args.payload_size))`
improvements to run-benchmark.py, primarily to actually make multiple requests in parallel 2017-05-10 18:01:56 +00:00			`logging.info(`
			`'https baseline (no proxy): n_urls=%s n_bytes=%s in %.1f '`
			`'sec', n_urls, n_bytes, elapsed)`
			`else:`
			`logging.info('SKIPPED')`
			`logging.info('===== baseline benchmark finished =====')`

make run-benchmarks.py work (with no args) 2018-01-15 17:15:36 -08:00			`options = warcprox.Options(**vars(args))`
			`warcprox_controller = warcprox.controller.WarcproxController(options)`

improvements to run-benchmark.py, primarily to actually make multiple requests in parallel 2017-05-10 18:01:56 +00:00			`warcprox_controller_thread = threading.Thread(`
			`target=warcprox_controller.run_until_shutdown)`
			`warcprox_controller_thread.start()`
rewrite run-benchmarks.py for aiohttp2 2017-05-08 20:56:32 -07:00
improvements to run-benchmark.py, primarily to actually make multiple requests in parallel 2017-05-10 18:01:56 +00:00			`proxy = 'http://%s:%s' % (`
rewrite run-benchmarks.py for aiohttp2 2017-05-08 20:56:32 -07:00			`warcprox_controller.proxy.server_address[0],`
			`warcprox_controller.proxy.server_address[1])`
improvements to run-benchmark.py, primarily to actually make multiple requests in parallel 2017-05-10 18:01:56 +00:00			`logging.info('===== warcprox benchmark starting =====')`
			`n_urls, n_bytes, elapsed = loop.run_until_complete(`
use request count and payload size to specify length of benchmark run 2017-05-10 18:58:19 +00:00			`benchmarking_client(`
			`'http://127.0.0.1:4080', args.requests, args.payload_size,`
			`proxy))`
rewrite run-benchmarks.py for aiohttp2 2017-05-08 20:56:32 -07:00			`logging.info(`
			`'http: n_urls=%s n_bytes=%s in %.1f sec',`
improvements to run-benchmark.py, primarily to actually make multiple requests in parallel 2017-05-10 18:01:56 +00:00			`n_urls, n_bytes, elapsed)`
rewrite run-benchmarks.py for aiohttp2 2017-05-08 20:56:32 -07:00
improvements to run-benchmark.py, primarily to actually make multiple requests in parallel 2017-05-10 18:01:56 +00:00			`n_urls, n_bytes, elapsed = loop.run_until_complete(`
use request count and payload size to specify length of benchmark run 2017-05-10 18:58:19 +00:00			`benchmarking_client(`
			`'https://127.0.0.1:4443', args.requests, args.payload_size,`
			`proxy))`
rewrite run-benchmarks.py for aiohttp2 2017-05-08 20:56:32 -07:00			`logging.info(`
			`'https: n_urls=%s n_bytes=%s in %.1f sec',`
improvements to run-benchmark.py, primarily to actually make multiple requests in parallel 2017-05-10 18:01:56 +00:00			`n_urls, n_bytes, elapsed)`
working on benchmarking code... so far they seem to reveal that warcprox behaves poorly under load (perhaps timeouts are configured too short?) 2015-10-28 21:34:34 +00:00
improvements to run-benchmark.py, primarily to actually make multiple requests in parallel 2017-05-10 18:01:56 +00:00			`start = time.time()`
working on benchmarking code... so far they seem to reveal that warcprox behaves poorly under load (perhaps timeouts are configured too short?) 2015-10-28 21:34:34 +00:00			`warcprox_controller.stop.set()`
			`warcprox_controller_thread.join()`
improvements to run-benchmark.py, primarily to actually make multiple requests in parallel 2017-05-10 18:01:56 +00:00			`logging.info(`
			`'waited %.1f sec for warcprox to finish', time.time() - start)`
			`logging.info('===== warcprox benchmark finished =====')`