mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
greatly simplify automated test setup by reusing initialization code from the command line executable; this also has the benefit of testing that initialization code
This commit is contained in:
parent
0cc68dd428
commit
9b8043d3a2
2
setup.py
2
setup.py
@ -49,7 +49,7 @@ except:
|
||||
|
||||
setuptools.setup(
|
||||
name='warcprox',
|
||||
version='2.2b1.dev104',
|
||||
version='2.2b1.dev105',
|
||||
description='WARC writing MITM HTTP/S proxy',
|
||||
url='https://github.com/internetarchive/warcprox',
|
||||
author='Noah Levitt',
|
||||
|
@ -60,6 +60,7 @@ except ImportError:
|
||||
import certauth.certauth
|
||||
|
||||
import warcprox
|
||||
import warcprox.main
|
||||
|
||||
try:
|
||||
import http.client as http_client
|
||||
@ -241,166 +242,44 @@ def https_daemon(request, cert):
|
||||
return https_daemon
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def captures_db(request, rethinkdb_servers, rethinkdb_big_table):
|
||||
captures_db = None
|
||||
def warcprox_(request, rethinkdb_servers, rethinkdb_big_table):
|
||||
orig_dir = os.getcwd()
|
||||
work_dir = tempfile.mkdtemp()
|
||||
logging.info('changing to working directory %r', work_dir)
|
||||
os.chdir(work_dir)
|
||||
|
||||
argv = ['warcprox',
|
||||
'--method-filter=GET',
|
||||
'--method-filter=POST',
|
||||
'--port=0',
|
||||
'--playback-port=0',
|
||||
'--onion-tor-socks-proxy=localhost:9050']
|
||||
if rethinkdb_servers:
|
||||
servers = rethinkdb_servers.split(",")
|
||||
if rethinkdb_big_table:
|
||||
db = 'warcprox_test_captures_' + "".join(random.sample("abcdefghijklmnopqrstuvwxyz0123456789_",8))
|
||||
rr = doublethink.Rethinker(servers, db)
|
||||
captures_db = warcprox.bigtable.RethinkCaptures(rr)
|
||||
captures_db.start()
|
||||
rethinkdb_db = 'warcprox_test_%s' % ''.join(random.sample("abcdefghijklmnopqrstuvwxyz0123456789_",8))
|
||||
argv.append('--rethinkdb-servers=%s' % rethinkdb_servers)
|
||||
argv.append('--rethinkdb-db=%s' % rethinkdb_db)
|
||||
if rethinkdb_big_table:
|
||||
argv.append('--rethinkdb-big-table')
|
||||
|
||||
def fin():
|
||||
if captures_db:
|
||||
captures_db.close()
|
||||
# logging.info('dropping rethinkdb database {}'.format(db))
|
||||
# result = captures_db.rr.db_drop(db).run()
|
||||
# logging.info("result=%s", result)
|
||||
request.addfinalizer(fin)
|
||||
args = warcprox.main.parse_args(argv)
|
||||
warcprox_ = warcprox.main.init_controller(args)
|
||||
|
||||
return captures_db
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def rethink_dedup_db(request, rethinkdb_servers, captures_db):
|
||||
ddb = None
|
||||
if rethinkdb_servers:
|
||||
if captures_db:
|
||||
ddb = warcprox.bigtable.RethinkCapturesDedup(captures_db)
|
||||
else:
|
||||
servers = rethinkdb_servers.split(",")
|
||||
db = 'warcprox_test_dedup_' + "".join(random.sample("abcdefghijklmnopqrstuvwxyz0123456789_",8))
|
||||
rr = doublethink.Rethinker(servers, db)
|
||||
ddb = warcprox.dedup.RethinkDedupDb(rr)
|
||||
|
||||
def fin():
|
||||
if rethinkdb_servers and not captures_db:
|
||||
logging.info('dropping rethinkdb database {}'.format(db))
|
||||
result = ddb.rr.db_drop(db).run()
|
||||
logging.info("result=%s", result)
|
||||
request.addfinalizer(fin)
|
||||
|
||||
return ddb
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def dedup_db(request, rethink_dedup_db):
|
||||
dedup_db_file = None
|
||||
ddb = rethink_dedup_db
|
||||
if not ddb:
|
||||
f = tempfile.NamedTemporaryFile(prefix='warcprox-test-dedup-', suffix='.db', delete=False)
|
||||
f.close()
|
||||
dedup_db_file = f.name
|
||||
ddb = warcprox.dedup.DedupDb(dedup_db_file)
|
||||
|
||||
def fin():
|
||||
if dedup_db_file:
|
||||
logging.info('deleting file {}'.format(dedup_db_file))
|
||||
os.unlink(dedup_db_file)
|
||||
request.addfinalizer(fin)
|
||||
|
||||
return ddb
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def stats_db(request, rethinkdb_servers):
|
||||
if rethinkdb_servers:
|
||||
servers = rethinkdb_servers.split(",")
|
||||
db = 'warcprox_test_stats_' + "".join(random.sample("abcdefghijklmnopqrstuvwxyz0123456789_",8))
|
||||
rr = doublethink.Rethinker(servers, db)
|
||||
sdb = warcprox.stats.RethinkStatsDb(rr)
|
||||
sdb.start()
|
||||
else:
|
||||
f = tempfile.NamedTemporaryFile(prefix='warcprox-test-stats-', suffix='.db', delete=False)
|
||||
f.close()
|
||||
stats_db_file = f.name
|
||||
sdb = warcprox.stats.StatsDb(stats_db_file)
|
||||
|
||||
def fin():
|
||||
sdb.close()
|
||||
if rethinkdb_servers:
|
||||
logging.info('dropping rethinkdb database {}'.format(db))
|
||||
result = sdb.rr.db_drop(db).run()
|
||||
logging.info("result=%s", result)
|
||||
# else:
|
||||
# logging.info('deleting file {}'.format(stats_db_file))
|
||||
# os.unlink(stats_db_file)
|
||||
request.addfinalizer(fin)
|
||||
|
||||
return sdb
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def service_registry(request, rethinkdb_servers):
|
||||
if rethinkdb_servers:
|
||||
servers = rethinkdb_servers.split(",")
|
||||
db = 'warcprox_test_services_' + "".join(random.sample("abcdefghijklmnopqrstuvwxyz0123456789_",8))
|
||||
rr = doublethink.Rethinker(servers, db)
|
||||
|
||||
def fin():
|
||||
logging.info('dropping rethinkdb database {}'.format(db))
|
||||
result = rr.db_drop(db).run()
|
||||
logging.info("result=%s", result)
|
||||
request.addfinalizer(fin)
|
||||
|
||||
return doublethink.ServiceRegistry(rr)
|
||||
else:
|
||||
return None
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def warcprox_(request, captures_db, dedup_db, stats_db, service_registry):
|
||||
f = tempfile.NamedTemporaryFile(prefix='warcprox-test-ca-', suffix='.pem', delete=True)
|
||||
f.close() # delete it, or CertificateAuthority will try to read it
|
||||
ca_file = f.name
|
||||
ca_dir = tempfile.mkdtemp(prefix='warcprox-test-', suffix='-ca')
|
||||
ca = certauth.certauth.CertificateAuthority(ca_file, ca_dir, 'warcprox-test')
|
||||
|
||||
recorded_url_q = warcprox.TimestampedQueue()
|
||||
|
||||
options = warcprox.Options(port=0, playback_port=0,
|
||||
onion_tor_socks_proxy='localhost:9050')
|
||||
proxy = warcprox.warcproxy.WarcProxy(ca=ca, recorded_url_q=recorded_url_q,
|
||||
stats_db=stats_db, options=options)
|
||||
options.port = proxy.server_port
|
||||
|
||||
options.directory = tempfile.mkdtemp(prefix='warcprox-test-warcs-')
|
||||
|
||||
f = tempfile.NamedTemporaryFile(prefix='warcprox-test-playback-index-', suffix='.db', delete=False)
|
||||
f.close()
|
||||
playback_index_db_file = f.name
|
||||
playback_index_db = warcprox.playback.PlaybackIndexDb(playback_index_db_file)
|
||||
playback_proxy = warcprox.playback.PlaybackProxy(ca=ca,
|
||||
playback_index_db=playback_index_db, options=options)
|
||||
options.playback_proxy = playback_proxy.server_port
|
||||
|
||||
options.method_filter = ['GET','POST']
|
||||
|
||||
writer_pool = warcprox.writer.WarcWriterPool(options)
|
||||
warc_writer_threads = [
|
||||
warcprox.writerthread.WarcWriterThread(
|
||||
recorded_url_q=recorded_url_q, writer_pool=writer_pool,
|
||||
dedup_db=dedup_db, listeners=[
|
||||
captures_db or dedup_db, playback_index_db, stats_db],
|
||||
options=options)
|
||||
for i in range(int(proxy.max_threads ** 0.5))]
|
||||
|
||||
warcprox_ = warcprox.controller.WarcproxController(
|
||||
proxy=proxy, warc_writer_threads=warc_writer_threads,
|
||||
playback_proxy=playback_proxy, service_registry=service_registry,
|
||||
options=options)
|
||||
logging.info('starting warcprox')
|
||||
warcprox_thread = threading.Thread(name='WarcproxThread',
|
||||
target=warcprox_.run_until_shutdown)
|
||||
warcprox_thread = threading.Thread(
|
||||
name='WarcproxThread', target=warcprox_.run_until_shutdown)
|
||||
warcprox_thread.start()
|
||||
|
||||
def fin():
|
||||
logging.info('stopping warcprox')
|
||||
warcprox_.stop.set()
|
||||
warcprox_thread.join()
|
||||
for f in (ca_file, ca_dir, options.directory, playback_index_db_file):
|
||||
if os.path.isdir(f):
|
||||
logging.info('deleting directory {}'.format(f))
|
||||
shutil.rmtree(f)
|
||||
else:
|
||||
logging.info('deleting file {}'.format(f))
|
||||
os.unlink(f)
|
||||
if rethinkdb_servers:
|
||||
logging.info('dropping rethinkdb database %r', rethinkdb_db)
|
||||
rr = doublethink.Rethinker(rethinkdb_servers)
|
||||
result = rr.db_drop(rethinkdb_db).run()
|
||||
logging.info('deleting working directory %r', work_dir)
|
||||
os.chdir(orig_dir)
|
||||
shutil.rmtree(work_dir)
|
||||
|
||||
request.addfinalizer(fin)
|
||||
|
||||
return warcprox_
|
||||
@ -1226,9 +1105,8 @@ def test_method_filter(
|
||||
assert response.content == payload
|
||||
|
||||
def test_dedup_ok_flag(
|
||||
https_daemon, http_daemon, warcprox_, archiving_proxies,
|
||||
rethinkdb_big_table):
|
||||
if not rethinkdb_big_table:
|
||||
https_daemon, http_daemon, warcprox_, archiving_proxies):
|
||||
if not warcprox_.options.rethinkdb_big_table:
|
||||
# this feature is n/a unless using rethinkdb big table
|
||||
return
|
||||
|
||||
@ -1312,11 +1190,11 @@ def test_status_api(warcprox_):
|
||||
assert status['pid'] == os.getpid()
|
||||
assert status['threads'] == warcprox_.proxy.pool._max_workers
|
||||
|
||||
def test_svcreg_status(warcprox_, service_registry):
|
||||
if service_registry:
|
||||
def test_svcreg_status(warcprox_):
|
||||
if warcprox_.service_registry:
|
||||
start = time.time()
|
||||
while time.time() - start < 15:
|
||||
status = service_registry.available_service('warcprox')
|
||||
status = warcprox_.service_registry.available_service('warcprox')
|
||||
if status:
|
||||
break
|
||||
time.sleep(0.5)
|
||||
@ -1373,11 +1251,11 @@ def test_controller_with_defaults():
|
||||
assert not wwt.writer_pool.default_warc_writer.record_builder.base32
|
||||
assert wwt.writer_pool.default_warc_writer.record_builder.digest_algorithm == 'sha1'
|
||||
|
||||
def test_choose_a_port_for_me(service_registry):
|
||||
def test_choose_a_port_for_me(warcprox_):
|
||||
options = warcprox.Options()
|
||||
options.port = 0
|
||||
controller = warcprox.controller.WarcproxController(
|
||||
service_registry=service_registry, options=options)
|
||||
service_registry=warcprox_.service_registry, options=options)
|
||||
assert controller.proxy.server_port != 0
|
||||
assert controller.proxy.server_port != 8000
|
||||
assert controller.proxy.server_address == (
|
||||
@ -1393,12 +1271,12 @@ def test_choose_a_port_for_me(service_registry):
|
||||
status = json.loads(response.content.decode('ascii'))
|
||||
assert status['port'] == controller.proxy.server_port
|
||||
|
||||
if service_registry:
|
||||
if warcprox_.service_registry:
|
||||
# check that service registry entry lists the correct port
|
||||
start = time.time()
|
||||
ports = []
|
||||
while time.time() - start < 30:
|
||||
svcs = service_registry.available_services('warcprox')
|
||||
svcs = warcprox_.service_registry.available_services('warcprox')
|
||||
ports = [svc['port'] for svc in svcs]
|
||||
if controller.proxy.server_port in ports:
|
||||
break
|
||||
|
Loading…
x
Reference in New Issue
Block a user