mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
greatly simplify automated test setup by reusing initialization code from the command line executable; this also has the benefit of testing that initialization code
This commit is contained in:
parent
0cc68dd428
commit
9b8043d3a2
2
setup.py
2
setup.py
@ -49,7 +49,7 @@ except:
|
|||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='warcprox',
|
name='warcprox',
|
||||||
version='2.2b1.dev104',
|
version='2.2b1.dev105',
|
||||||
description='WARC writing MITM HTTP/S proxy',
|
description='WARC writing MITM HTTP/S proxy',
|
||||||
url='https://github.com/internetarchive/warcprox',
|
url='https://github.com/internetarchive/warcprox',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
|
@ -60,6 +60,7 @@ except ImportError:
|
|||||||
import certauth.certauth
|
import certauth.certauth
|
||||||
|
|
||||||
import warcprox
|
import warcprox
|
||||||
|
import warcprox.main
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import http.client as http_client
|
import http.client as http_client
|
||||||
@ -241,166 +242,44 @@ def https_daemon(request, cert):
|
|||||||
return https_daemon
|
return https_daemon
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
def captures_db(request, rethinkdb_servers, rethinkdb_big_table):
|
def warcprox_(request, rethinkdb_servers, rethinkdb_big_table):
|
||||||
captures_db = None
|
orig_dir = os.getcwd()
|
||||||
|
work_dir = tempfile.mkdtemp()
|
||||||
|
logging.info('changing to working directory %r', work_dir)
|
||||||
|
os.chdir(work_dir)
|
||||||
|
|
||||||
|
argv = ['warcprox',
|
||||||
|
'--method-filter=GET',
|
||||||
|
'--method-filter=POST',
|
||||||
|
'--port=0',
|
||||||
|
'--playback-port=0',
|
||||||
|
'--onion-tor-socks-proxy=localhost:9050']
|
||||||
if rethinkdb_servers:
|
if rethinkdb_servers:
|
||||||
servers = rethinkdb_servers.split(",")
|
rethinkdb_db = 'warcprox_test_%s' % ''.join(random.sample("abcdefghijklmnopqrstuvwxyz0123456789_",8))
|
||||||
if rethinkdb_big_table:
|
argv.append('--rethinkdb-servers=%s' % rethinkdb_servers)
|
||||||
db = 'warcprox_test_captures_' + "".join(random.sample("abcdefghijklmnopqrstuvwxyz0123456789_",8))
|
argv.append('--rethinkdb-db=%s' % rethinkdb_db)
|
||||||
rr = doublethink.Rethinker(servers, db)
|
if rethinkdb_big_table:
|
||||||
captures_db = warcprox.bigtable.RethinkCaptures(rr)
|
argv.append('--rethinkdb-big-table')
|
||||||
captures_db.start()
|
|
||||||
|
|
||||||
def fin():
|
args = warcprox.main.parse_args(argv)
|
||||||
if captures_db:
|
warcprox_ = warcprox.main.init_controller(args)
|
||||||
captures_db.close()
|
|
||||||
# logging.info('dropping rethinkdb database {}'.format(db))
|
|
||||||
# result = captures_db.rr.db_drop(db).run()
|
|
||||||
# logging.info("result=%s", result)
|
|
||||||
request.addfinalizer(fin)
|
|
||||||
|
|
||||||
return captures_db
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
|
||||||
def rethink_dedup_db(request, rethinkdb_servers, captures_db):
|
|
||||||
ddb = None
|
|
||||||
if rethinkdb_servers:
|
|
||||||
if captures_db:
|
|
||||||
ddb = warcprox.bigtable.RethinkCapturesDedup(captures_db)
|
|
||||||
else:
|
|
||||||
servers = rethinkdb_servers.split(",")
|
|
||||||
db = 'warcprox_test_dedup_' + "".join(random.sample("abcdefghijklmnopqrstuvwxyz0123456789_",8))
|
|
||||||
rr = doublethink.Rethinker(servers, db)
|
|
||||||
ddb = warcprox.dedup.RethinkDedupDb(rr)
|
|
||||||
|
|
||||||
def fin():
|
|
||||||
if rethinkdb_servers and not captures_db:
|
|
||||||
logging.info('dropping rethinkdb database {}'.format(db))
|
|
||||||
result = ddb.rr.db_drop(db).run()
|
|
||||||
logging.info("result=%s", result)
|
|
||||||
request.addfinalizer(fin)
|
|
||||||
|
|
||||||
return ddb
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
|
||||||
def dedup_db(request, rethink_dedup_db):
|
|
||||||
dedup_db_file = None
|
|
||||||
ddb = rethink_dedup_db
|
|
||||||
if not ddb:
|
|
||||||
f = tempfile.NamedTemporaryFile(prefix='warcprox-test-dedup-', suffix='.db', delete=False)
|
|
||||||
f.close()
|
|
||||||
dedup_db_file = f.name
|
|
||||||
ddb = warcprox.dedup.DedupDb(dedup_db_file)
|
|
||||||
|
|
||||||
def fin():
|
|
||||||
if dedup_db_file:
|
|
||||||
logging.info('deleting file {}'.format(dedup_db_file))
|
|
||||||
os.unlink(dedup_db_file)
|
|
||||||
request.addfinalizer(fin)
|
|
||||||
|
|
||||||
return ddb
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
|
||||||
def stats_db(request, rethinkdb_servers):
|
|
||||||
if rethinkdb_servers:
|
|
||||||
servers = rethinkdb_servers.split(",")
|
|
||||||
db = 'warcprox_test_stats_' + "".join(random.sample("abcdefghijklmnopqrstuvwxyz0123456789_",8))
|
|
||||||
rr = doublethink.Rethinker(servers, db)
|
|
||||||
sdb = warcprox.stats.RethinkStatsDb(rr)
|
|
||||||
sdb.start()
|
|
||||||
else:
|
|
||||||
f = tempfile.NamedTemporaryFile(prefix='warcprox-test-stats-', suffix='.db', delete=False)
|
|
||||||
f.close()
|
|
||||||
stats_db_file = f.name
|
|
||||||
sdb = warcprox.stats.StatsDb(stats_db_file)
|
|
||||||
|
|
||||||
def fin():
|
|
||||||
sdb.close()
|
|
||||||
if rethinkdb_servers:
|
|
||||||
logging.info('dropping rethinkdb database {}'.format(db))
|
|
||||||
result = sdb.rr.db_drop(db).run()
|
|
||||||
logging.info("result=%s", result)
|
|
||||||
# else:
|
|
||||||
# logging.info('deleting file {}'.format(stats_db_file))
|
|
||||||
# os.unlink(stats_db_file)
|
|
||||||
request.addfinalizer(fin)
|
|
||||||
|
|
||||||
return sdb
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
|
||||||
def service_registry(request, rethinkdb_servers):
|
|
||||||
if rethinkdb_servers:
|
|
||||||
servers = rethinkdb_servers.split(",")
|
|
||||||
db = 'warcprox_test_services_' + "".join(random.sample("abcdefghijklmnopqrstuvwxyz0123456789_",8))
|
|
||||||
rr = doublethink.Rethinker(servers, db)
|
|
||||||
|
|
||||||
def fin():
|
|
||||||
logging.info('dropping rethinkdb database {}'.format(db))
|
|
||||||
result = rr.db_drop(db).run()
|
|
||||||
logging.info("result=%s", result)
|
|
||||||
request.addfinalizer(fin)
|
|
||||||
|
|
||||||
return doublethink.ServiceRegistry(rr)
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
|
||||||
def warcprox_(request, captures_db, dedup_db, stats_db, service_registry):
|
|
||||||
f = tempfile.NamedTemporaryFile(prefix='warcprox-test-ca-', suffix='.pem', delete=True)
|
|
||||||
f.close() # delete it, or CertificateAuthority will try to read it
|
|
||||||
ca_file = f.name
|
|
||||||
ca_dir = tempfile.mkdtemp(prefix='warcprox-test-', suffix='-ca')
|
|
||||||
ca = certauth.certauth.CertificateAuthority(ca_file, ca_dir, 'warcprox-test')
|
|
||||||
|
|
||||||
recorded_url_q = warcprox.TimestampedQueue()
|
|
||||||
|
|
||||||
options = warcprox.Options(port=0, playback_port=0,
|
|
||||||
onion_tor_socks_proxy='localhost:9050')
|
|
||||||
proxy = warcprox.warcproxy.WarcProxy(ca=ca, recorded_url_q=recorded_url_q,
|
|
||||||
stats_db=stats_db, options=options)
|
|
||||||
options.port = proxy.server_port
|
|
||||||
|
|
||||||
options.directory = tempfile.mkdtemp(prefix='warcprox-test-warcs-')
|
|
||||||
|
|
||||||
f = tempfile.NamedTemporaryFile(prefix='warcprox-test-playback-index-', suffix='.db', delete=False)
|
|
||||||
f.close()
|
|
||||||
playback_index_db_file = f.name
|
|
||||||
playback_index_db = warcprox.playback.PlaybackIndexDb(playback_index_db_file)
|
|
||||||
playback_proxy = warcprox.playback.PlaybackProxy(ca=ca,
|
|
||||||
playback_index_db=playback_index_db, options=options)
|
|
||||||
options.playback_proxy = playback_proxy.server_port
|
|
||||||
|
|
||||||
options.method_filter = ['GET','POST']
|
|
||||||
|
|
||||||
writer_pool = warcprox.writer.WarcWriterPool(options)
|
|
||||||
warc_writer_threads = [
|
|
||||||
warcprox.writerthread.WarcWriterThread(
|
|
||||||
recorded_url_q=recorded_url_q, writer_pool=writer_pool,
|
|
||||||
dedup_db=dedup_db, listeners=[
|
|
||||||
captures_db or dedup_db, playback_index_db, stats_db],
|
|
||||||
options=options)
|
|
||||||
for i in range(int(proxy.max_threads ** 0.5))]
|
|
||||||
|
|
||||||
warcprox_ = warcprox.controller.WarcproxController(
|
|
||||||
proxy=proxy, warc_writer_threads=warc_writer_threads,
|
|
||||||
playback_proxy=playback_proxy, service_registry=service_registry,
|
|
||||||
options=options)
|
|
||||||
logging.info('starting warcprox')
|
logging.info('starting warcprox')
|
||||||
warcprox_thread = threading.Thread(name='WarcproxThread',
|
warcprox_thread = threading.Thread(
|
||||||
target=warcprox_.run_until_shutdown)
|
name='WarcproxThread', target=warcprox_.run_until_shutdown)
|
||||||
warcprox_thread.start()
|
warcprox_thread.start()
|
||||||
|
|
||||||
def fin():
|
def fin():
|
||||||
logging.info('stopping warcprox')
|
|
||||||
warcprox_.stop.set()
|
warcprox_.stop.set()
|
||||||
warcprox_thread.join()
|
warcprox_thread.join()
|
||||||
for f in (ca_file, ca_dir, options.directory, playback_index_db_file):
|
if rethinkdb_servers:
|
||||||
if os.path.isdir(f):
|
logging.info('dropping rethinkdb database %r', rethinkdb_db)
|
||||||
logging.info('deleting directory {}'.format(f))
|
rr = doublethink.Rethinker(rethinkdb_servers)
|
||||||
shutil.rmtree(f)
|
result = rr.db_drop(rethinkdb_db).run()
|
||||||
else:
|
logging.info('deleting working directory %r', work_dir)
|
||||||
logging.info('deleting file {}'.format(f))
|
os.chdir(orig_dir)
|
||||||
os.unlink(f)
|
shutil.rmtree(work_dir)
|
||||||
|
|
||||||
request.addfinalizer(fin)
|
request.addfinalizer(fin)
|
||||||
|
|
||||||
return warcprox_
|
return warcprox_
|
||||||
@ -1226,9 +1105,8 @@ def test_method_filter(
|
|||||||
assert response.content == payload
|
assert response.content == payload
|
||||||
|
|
||||||
def test_dedup_ok_flag(
|
def test_dedup_ok_flag(
|
||||||
https_daemon, http_daemon, warcprox_, archiving_proxies,
|
https_daemon, http_daemon, warcprox_, archiving_proxies):
|
||||||
rethinkdb_big_table):
|
if not warcprox_.options.rethinkdb_big_table:
|
||||||
if not rethinkdb_big_table:
|
|
||||||
# this feature is n/a unless using rethinkdb big table
|
# this feature is n/a unless using rethinkdb big table
|
||||||
return
|
return
|
||||||
|
|
||||||
@ -1312,11 +1190,11 @@ def test_status_api(warcprox_):
|
|||||||
assert status['pid'] == os.getpid()
|
assert status['pid'] == os.getpid()
|
||||||
assert status['threads'] == warcprox_.proxy.pool._max_workers
|
assert status['threads'] == warcprox_.proxy.pool._max_workers
|
||||||
|
|
||||||
def test_svcreg_status(warcprox_, service_registry):
|
def test_svcreg_status(warcprox_):
|
||||||
if service_registry:
|
if warcprox_.service_registry:
|
||||||
start = time.time()
|
start = time.time()
|
||||||
while time.time() - start < 15:
|
while time.time() - start < 15:
|
||||||
status = service_registry.available_service('warcprox')
|
status = warcprox_.service_registry.available_service('warcprox')
|
||||||
if status:
|
if status:
|
||||||
break
|
break
|
||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
@ -1373,11 +1251,11 @@ def test_controller_with_defaults():
|
|||||||
assert not wwt.writer_pool.default_warc_writer.record_builder.base32
|
assert not wwt.writer_pool.default_warc_writer.record_builder.base32
|
||||||
assert wwt.writer_pool.default_warc_writer.record_builder.digest_algorithm == 'sha1'
|
assert wwt.writer_pool.default_warc_writer.record_builder.digest_algorithm == 'sha1'
|
||||||
|
|
||||||
def test_choose_a_port_for_me(service_registry):
|
def test_choose_a_port_for_me(warcprox_):
|
||||||
options = warcprox.Options()
|
options = warcprox.Options()
|
||||||
options.port = 0
|
options.port = 0
|
||||||
controller = warcprox.controller.WarcproxController(
|
controller = warcprox.controller.WarcproxController(
|
||||||
service_registry=service_registry, options=options)
|
service_registry=warcprox_.service_registry, options=options)
|
||||||
assert controller.proxy.server_port != 0
|
assert controller.proxy.server_port != 0
|
||||||
assert controller.proxy.server_port != 8000
|
assert controller.proxy.server_port != 8000
|
||||||
assert controller.proxy.server_address == (
|
assert controller.proxy.server_address == (
|
||||||
@ -1393,12 +1271,12 @@ def test_choose_a_port_for_me(service_registry):
|
|||||||
status = json.loads(response.content.decode('ascii'))
|
status = json.loads(response.content.decode('ascii'))
|
||||||
assert status['port'] == controller.proxy.server_port
|
assert status['port'] == controller.proxy.server_port
|
||||||
|
|
||||||
if service_registry:
|
if warcprox_.service_registry:
|
||||||
# check that service registry entry lists the correct port
|
# check that service registry entry lists the correct port
|
||||||
start = time.time()
|
start = time.time()
|
||||||
ports = []
|
ports = []
|
||||||
while time.time() - start < 30:
|
while time.time() - start < 30:
|
||||||
svcs = service_registry.available_services('warcprox')
|
svcs = warcprox_.service_registry.available_services('warcprox')
|
||||||
ports = [svc['port'] for svc in svcs]
|
ports = [svc['port'] for svc in svcs]
|
||||||
if controller.proxy.server_port in ports:
|
if controller.proxy.server_port in ports:
|
||||||
break
|
break
|
||||||
|
Loading…
x
Reference in New Issue
Block a user