From 3d90b9c2e9543472038da2c5898f196340051114 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Thu, 20 Aug 2015 22:51:29 +0000 Subject: [PATCH] py.test option --rethinkdb-servers to run tests using rethinkdb --- warcprox/tests/conftest.py | 11 +++++ warcprox/tests/test_warcprox.py | 73 ++++++++++++++++++++++----------- 2 files changed, 59 insertions(+), 25 deletions(-) create mode 100644 warcprox/tests/conftest.py diff --git a/warcprox/tests/conftest.py b/warcprox/tests/conftest.py new file mode 100644 index 0000000..7d42917 --- /dev/null +++ b/warcprox/tests/conftest.py @@ -0,0 +1,11 @@ +# vim:set sw=4 et: +import pytest + +def pytest_addoption(parser): + parser.addoption('--rethinkdb-servers', dest='rethinkdb_servers', + help='rethink db servers for dedup, e.g. db0.foo.org,db0.foo.org:38015,db1.foo.org') + +@pytest.fixture +def rethinkdb_servers(request): + return request.config.getoption("--rethinkdb-servers") + diff --git a/warcprox/tests/test_warcprox.py b/warcprox/tests/test_warcprox.py index 62b3553..88e0e6a 100755 --- a/warcprox/tests/test_warcprox.py +++ b/warcprox/tests/test_warcprox.py @@ -15,6 +15,9 @@ import shutil import requests import re import json +import rethinkdb +r = rethinkdb +import random try: import http.server as http_server @@ -129,7 +132,32 @@ def https_daemon(request, cert): return https_daemon @pytest.fixture() -def warcprox_(request): +def dedup_db(request, rethinkdb_servers): + if rethinkdb_servers: + servers = rethinkdb_servers.split(",") + db = 'warcprox_test_' + "".join(random.sample("abcdefghijklmnopqrstuvwxyz0123456789_",8)) + ddb = warcprox.dedup.RethinkDedupDb(servers, db) + else: + f = tempfile.NamedTemporaryFile(prefix='warcprox-test-dedup-', suffix='.db', delete=False) + f.close() + dedup_db_file = f.name + ddb = warcprox.dedup.DedupDb(dedup_db_file) + + def fin(): + if rethinkdb_servers: + logging.info('dropping rethinkdb database {}'.format(db)) + with ddb._random_server_connection() as conn: + result = r.db_drop(db).run(conn) + logging.info("result=%s", result) + else: + logging.info('deleting file {}'.format(dedup_db_file)) + os.unlink(dedup_db_file) + request.addfinalizer(fin) + + return ddb + +@pytest.fixture() +def warcprox_(request, dedup_db): f = tempfile.NamedTemporaryFile(prefix='warcprox-test-ca-', suffix='.pem', delete=True) f.close() # delete it, or CertificateAuthority will try to read it ca_file = f.name @@ -155,11 +183,6 @@ def warcprox_(request): playback_proxy = warcprox.playback.PlaybackProxy(server_address=('localhost', 0), ca=ca, playback_index_db=playback_index_db, warcs_dir=warcs_dir) - f = tempfile.NamedTemporaryFile(prefix='warcprox-test-dedup-', suffix='.db', delete=False) - f.close() - dedup_db_file = f.name - dedup_db = warcprox.dedup.DedupDb(dedup_db_file) - default_warc_writer = warcprox.writer.WarcWriter(directory=warcs_dir, port=proxy.server_port) writer_pool = warcprox.writer.WarcWriterPool(default_warc_writer) @@ -178,7 +201,7 @@ def warcprox_(request): logging.info('stopping warcprox') warcprox_.stop.set() warcprox_thread.join() - for f in (ca_file, ca_dir, warcs_dir, playback_index_db_file, dedup_db_file, stats_db_file): + for f in (ca_file, ca_dir, warcs_dir, playback_index_db_file, stats_db_file): if os.path.isdir(f): logging.info('deleting directory {}'.format(f)) shutil.rmtree(f) @@ -297,13 +320,13 @@ def test_dedup_http(http_daemon, warcprox_, archiving_proxies, playback_proxies) assert response.content == b'I am the warcprox test payload! ffffffffff!\n' # check in dedup db - # {u'i': u'', u'u': u'https://localhost:62841/c/d', u'd': u'2013-11-22T00:14:37Z'} + # {u'id': u'', u'url': u'https://localhost:62841/c/d', u'date': u'2013-11-22T00:14:37Z'} dedup_lookup = warcprox_.warc_writer_thread.dedup_db.lookup(b'sha1:65e1216acfd220f0292715e74bd7a1ec35c99dfc') - assert dedup_lookup['u'] == url.encode('ascii') - assert re.match(br'^$', dedup_lookup['i']) - assert re.match(br'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$', dedup_lookup['d']) - record_id = dedup_lookup['i'] - dedup_date = dedup_lookup['d'] + assert dedup_lookup['url'] == url.encode('ascii') + assert re.match(br'^$', dedup_lookup['id']) + assert re.match(br'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$', dedup_lookup['date']) + record_id = dedup_lookup['id'] + dedup_date = dedup_lookup['date'] # need revisit to have a later timestamp than original, else playing # back the latest record might not hit the revisit @@ -320,9 +343,9 @@ def test_dedup_http(http_daemon, warcprox_, archiving_proxies, playback_proxies) # check in dedup db (no change from prev) dedup_lookup = warcprox_.warc_writer_thread.dedup_db.lookup(b'sha1:65e1216acfd220f0292715e74bd7a1ec35c99dfc') - assert dedup_lookup['u'] == url.encode('ascii') - assert dedup_lookup['i'] == record_id - assert dedup_lookup['d'] == dedup_date + assert dedup_lookup['url'] == url.encode('ascii') + assert dedup_lookup['id'] == record_id + assert dedup_lookup['date'] == dedup_date # test playback logging.debug('testing playback of revisit of {}'.format(url)) @@ -358,13 +381,13 @@ def test_dedup_https(https_daemon, warcprox_, archiving_proxies, playback_proxie assert response.content == b'I am the warcprox test payload! hhhhhhhhhh!\n' # check in dedup db - # {u'i': u'', u'u': u'https://localhost:62841/c/d', u'd': u'2013-11-22T00:14:37Z'} + # {u'id': u'', u'url': u'https://localhost:62841/c/d', u'date': u'2013-11-22T00:14:37Z'} dedup_lookup = warcprox_.warc_writer_thread.dedup_db.lookup(b'sha1:5b4efa64fdb308ec06ae56a9beba155a6f734b89') - assert dedup_lookup['u'] == url.encode('ascii') - assert re.match(br'^$', dedup_lookup['i']) - assert re.match(br'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$', dedup_lookup['d']) - record_id = dedup_lookup['i'] - dedup_date = dedup_lookup['d'] + assert dedup_lookup['url'] == url.encode('ascii') + assert re.match(br'^$', dedup_lookup['id']) + assert re.match(br'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$', dedup_lookup['date']) + record_id = dedup_lookup['id'] + dedup_date = dedup_lookup['date'] # need revisit to have a later timestamp than original, else playing # back the latest record might not hit the revisit @@ -381,9 +404,9 @@ def test_dedup_https(https_daemon, warcprox_, archiving_proxies, playback_proxie # check in dedup db (no change from prev) dedup_lookup = warcprox_.warc_writer_thread.dedup_db.lookup(b'sha1:5b4efa64fdb308ec06ae56a9beba155a6f734b89') - assert dedup_lookup['u'] == url.encode('ascii') - assert dedup_lookup['i'] == record_id - assert dedup_lookup['d'] == dedup_date + assert dedup_lookup['url'] == url.encode('ascii') + assert dedup_lookup['id'] == record_id + assert dedup_lookup['date'] == dedup_date # test playback logging.debug('testing playback of revisit of {}'.format(url))