From 9b8043d3a2b268b6708251811223da3c22b1d1ae Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Fri, 6 Oct 2017 17:00:35 -0700 Subject: [PATCH] greatly simplify automated test setup by reusing initialization code from the command line executable; this also has the benefit of testing that initialization code --- setup.py | 2 +- tests/test_warcprox.py | 200 ++++++++--------------------------------- 2 files changed, 40 insertions(+), 162 deletions(-) diff --git a/setup.py b/setup.py index 766f1b6..89f37dc 100755 --- a/setup.py +++ b/setup.py @@ -49,7 +49,7 @@ except: setuptools.setup( name='warcprox', - version='2.2b1.dev104', + version='2.2b1.dev105', description='WARC writing MITM HTTP/S proxy', url='https://github.com/internetarchive/warcprox', author='Noah Levitt', diff --git a/tests/test_warcprox.py b/tests/test_warcprox.py index 216b4af..b24a5c8 100755 --- a/tests/test_warcprox.py +++ b/tests/test_warcprox.py @@ -60,6 +60,7 @@ except ImportError: import certauth.certauth import warcprox +import warcprox.main try: import http.client as http_client @@ -241,166 +242,44 @@ def https_daemon(request, cert): return https_daemon @pytest.fixture(scope="module") -def captures_db(request, rethinkdb_servers, rethinkdb_big_table): - captures_db = None +def warcprox_(request, rethinkdb_servers, rethinkdb_big_table): + orig_dir = os.getcwd() + work_dir = tempfile.mkdtemp() + logging.info('changing to working directory %r', work_dir) + os.chdir(work_dir) + + argv = ['warcprox', + '--method-filter=GET', + '--method-filter=POST', + '--port=0', + '--playback-port=0', + '--onion-tor-socks-proxy=localhost:9050'] if rethinkdb_servers: - servers = rethinkdb_servers.split(",") - if rethinkdb_big_table: - db = 'warcprox_test_captures_' + "".join(random.sample("abcdefghijklmnopqrstuvwxyz0123456789_",8)) - rr = doublethink.Rethinker(servers, db) - captures_db = warcprox.bigtable.RethinkCaptures(rr) - captures_db.start() + rethinkdb_db = 'warcprox_test_%s' % ''.join(random.sample("abcdefghijklmnopqrstuvwxyz0123456789_",8)) + argv.append('--rethinkdb-servers=%s' % rethinkdb_servers) + argv.append('--rethinkdb-db=%s' % rethinkdb_db) + if rethinkdb_big_table: + argv.append('--rethinkdb-big-table') - def fin(): - if captures_db: - captures_db.close() - # logging.info('dropping rethinkdb database {}'.format(db)) - # result = captures_db.rr.db_drop(db).run() - # logging.info("result=%s", result) - request.addfinalizer(fin) + args = warcprox.main.parse_args(argv) + warcprox_ = warcprox.main.init_controller(args) - return captures_db - -@pytest.fixture(scope="module") -def rethink_dedup_db(request, rethinkdb_servers, captures_db): - ddb = None - if rethinkdb_servers: - if captures_db: - ddb = warcprox.bigtable.RethinkCapturesDedup(captures_db) - else: - servers = rethinkdb_servers.split(",") - db = 'warcprox_test_dedup_' + "".join(random.sample("abcdefghijklmnopqrstuvwxyz0123456789_",8)) - rr = doublethink.Rethinker(servers, db) - ddb = warcprox.dedup.RethinkDedupDb(rr) - - def fin(): - if rethinkdb_servers and not captures_db: - logging.info('dropping rethinkdb database {}'.format(db)) - result = ddb.rr.db_drop(db).run() - logging.info("result=%s", result) - request.addfinalizer(fin) - - return ddb - -@pytest.fixture(scope="module") -def dedup_db(request, rethink_dedup_db): - dedup_db_file = None - ddb = rethink_dedup_db - if not ddb: - f = tempfile.NamedTemporaryFile(prefix='warcprox-test-dedup-', suffix='.db', delete=False) - f.close() - dedup_db_file = f.name - ddb = warcprox.dedup.DedupDb(dedup_db_file) - - def fin(): - if dedup_db_file: - logging.info('deleting file {}'.format(dedup_db_file)) - os.unlink(dedup_db_file) - request.addfinalizer(fin) - - return ddb - -@pytest.fixture(scope="module") -def stats_db(request, rethinkdb_servers): - if rethinkdb_servers: - servers = rethinkdb_servers.split(",") - db = 'warcprox_test_stats_' + "".join(random.sample("abcdefghijklmnopqrstuvwxyz0123456789_",8)) - rr = doublethink.Rethinker(servers, db) - sdb = warcprox.stats.RethinkStatsDb(rr) - sdb.start() - else: - f = tempfile.NamedTemporaryFile(prefix='warcprox-test-stats-', suffix='.db', delete=False) - f.close() - stats_db_file = f.name - sdb = warcprox.stats.StatsDb(stats_db_file) - - def fin(): - sdb.close() - if rethinkdb_servers: - logging.info('dropping rethinkdb database {}'.format(db)) - result = sdb.rr.db_drop(db).run() - logging.info("result=%s", result) - # else: - # logging.info('deleting file {}'.format(stats_db_file)) - # os.unlink(stats_db_file) - request.addfinalizer(fin) - - return sdb - -@pytest.fixture(scope="module") -def service_registry(request, rethinkdb_servers): - if rethinkdb_servers: - servers = rethinkdb_servers.split(",") - db = 'warcprox_test_services_' + "".join(random.sample("abcdefghijklmnopqrstuvwxyz0123456789_",8)) - rr = doublethink.Rethinker(servers, db) - - def fin(): - logging.info('dropping rethinkdb database {}'.format(db)) - result = rr.db_drop(db).run() - logging.info("result=%s", result) - request.addfinalizer(fin) - - return doublethink.ServiceRegistry(rr) - else: - return None - -@pytest.fixture(scope="module") -def warcprox_(request, captures_db, dedup_db, stats_db, service_registry): - f = tempfile.NamedTemporaryFile(prefix='warcprox-test-ca-', suffix='.pem', delete=True) - f.close() # delete it, or CertificateAuthority will try to read it - ca_file = f.name - ca_dir = tempfile.mkdtemp(prefix='warcprox-test-', suffix='-ca') - ca = certauth.certauth.CertificateAuthority(ca_file, ca_dir, 'warcprox-test') - - recorded_url_q = warcprox.TimestampedQueue() - - options = warcprox.Options(port=0, playback_port=0, - onion_tor_socks_proxy='localhost:9050') - proxy = warcprox.warcproxy.WarcProxy(ca=ca, recorded_url_q=recorded_url_q, - stats_db=stats_db, options=options) - options.port = proxy.server_port - - options.directory = tempfile.mkdtemp(prefix='warcprox-test-warcs-') - - f = tempfile.NamedTemporaryFile(prefix='warcprox-test-playback-index-', suffix='.db', delete=False) - f.close() - playback_index_db_file = f.name - playback_index_db = warcprox.playback.PlaybackIndexDb(playback_index_db_file) - playback_proxy = warcprox.playback.PlaybackProxy(ca=ca, - playback_index_db=playback_index_db, options=options) - options.playback_proxy = playback_proxy.server_port - - options.method_filter = ['GET','POST'] - - writer_pool = warcprox.writer.WarcWriterPool(options) - warc_writer_threads = [ - warcprox.writerthread.WarcWriterThread( - recorded_url_q=recorded_url_q, writer_pool=writer_pool, - dedup_db=dedup_db, listeners=[ - captures_db or dedup_db, playback_index_db, stats_db], - options=options) - for i in range(int(proxy.max_threads ** 0.5))] - - warcprox_ = warcprox.controller.WarcproxController( - proxy=proxy, warc_writer_threads=warc_writer_threads, - playback_proxy=playback_proxy, service_registry=service_registry, - options=options) logging.info('starting warcprox') - warcprox_thread = threading.Thread(name='WarcproxThread', - target=warcprox_.run_until_shutdown) + warcprox_thread = threading.Thread( + name='WarcproxThread', target=warcprox_.run_until_shutdown) warcprox_thread.start() def fin(): - logging.info('stopping warcprox') warcprox_.stop.set() warcprox_thread.join() - for f in (ca_file, ca_dir, options.directory, playback_index_db_file): - if os.path.isdir(f): - logging.info('deleting directory {}'.format(f)) - shutil.rmtree(f) - else: - logging.info('deleting file {}'.format(f)) - os.unlink(f) + if rethinkdb_servers: + logging.info('dropping rethinkdb database %r', rethinkdb_db) + rr = doublethink.Rethinker(rethinkdb_servers) + result = rr.db_drop(rethinkdb_db).run() + logging.info('deleting working directory %r', work_dir) + os.chdir(orig_dir) + shutil.rmtree(work_dir) + request.addfinalizer(fin) return warcprox_ @@ -1226,9 +1105,8 @@ def test_method_filter( assert response.content == payload def test_dedup_ok_flag( - https_daemon, http_daemon, warcprox_, archiving_proxies, - rethinkdb_big_table): - if not rethinkdb_big_table: + https_daemon, http_daemon, warcprox_, archiving_proxies): + if not warcprox_.options.rethinkdb_big_table: # this feature is n/a unless using rethinkdb big table return @@ -1312,11 +1190,11 @@ def test_status_api(warcprox_): assert status['pid'] == os.getpid() assert status['threads'] == warcprox_.proxy.pool._max_workers -def test_svcreg_status(warcprox_, service_registry): - if service_registry: +def test_svcreg_status(warcprox_): + if warcprox_.service_registry: start = time.time() while time.time() - start < 15: - status = service_registry.available_service('warcprox') + status = warcprox_.service_registry.available_service('warcprox') if status: break time.sleep(0.5) @@ -1373,11 +1251,11 @@ def test_controller_with_defaults(): assert not wwt.writer_pool.default_warc_writer.record_builder.base32 assert wwt.writer_pool.default_warc_writer.record_builder.digest_algorithm == 'sha1' -def test_choose_a_port_for_me(service_registry): +def test_choose_a_port_for_me(warcprox_): options = warcprox.Options() options.port = 0 controller = warcprox.controller.WarcproxController( - service_registry=service_registry, options=options) + service_registry=warcprox_.service_registry, options=options) assert controller.proxy.server_port != 0 assert controller.proxy.server_port != 8000 assert controller.proxy.server_address == ( @@ -1393,12 +1271,12 @@ def test_choose_a_port_for_me(service_registry): status = json.loads(response.content.decode('ascii')) assert status['port'] == controller.proxy.server_port - if service_registry: + if warcprox_.service_registry: # check that service registry entry lists the correct port start = time.time() ports = [] while time.time() - start < 30: - svcs = service_registry.available_services('warcprox') + svcs = warcprox_.service_registry.available_services('warcprox') ports = [svc['port'] for svc in svcs] if controller.proxy.server_port in ports: break