mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
py.test option --rethinkdb-servers to run tests using rethinkdb
This commit is contained in:
parent
e66dc3a9fb
commit
3d90b9c2e9
11
warcprox/tests/conftest.py
Normal file
11
warcprox/tests/conftest.py
Normal file
@ -0,0 +1,11 @@
|
||||
# vim:set sw=4 et:
|
||||
import pytest
|
||||
|
||||
def pytest_addoption(parser):
|
||||
parser.addoption('--rethinkdb-servers', dest='rethinkdb_servers',
|
||||
help='rethink db servers for dedup, e.g. db0.foo.org,db0.foo.org:38015,db1.foo.org')
|
||||
|
||||
@pytest.fixture
|
||||
def rethinkdb_servers(request):
|
||||
return request.config.getoption("--rethinkdb-servers")
|
||||
|
@ -15,6 +15,9 @@ import shutil
|
||||
import requests
|
||||
import re
|
||||
import json
|
||||
import rethinkdb
|
||||
r = rethinkdb
|
||||
import random
|
||||
|
||||
try:
|
||||
import http.server as http_server
|
||||
@ -129,7 +132,32 @@ def https_daemon(request, cert):
|
||||
return https_daemon
|
||||
|
||||
@pytest.fixture()
|
||||
def warcprox_(request):
|
||||
def dedup_db(request, rethinkdb_servers):
|
||||
if rethinkdb_servers:
|
||||
servers = rethinkdb_servers.split(",")
|
||||
db = 'warcprox_test_' + "".join(random.sample("abcdefghijklmnopqrstuvwxyz0123456789_",8))
|
||||
ddb = warcprox.dedup.RethinkDedupDb(servers, db)
|
||||
else:
|
||||
f = tempfile.NamedTemporaryFile(prefix='warcprox-test-dedup-', suffix='.db', delete=False)
|
||||
f.close()
|
||||
dedup_db_file = f.name
|
||||
ddb = warcprox.dedup.DedupDb(dedup_db_file)
|
||||
|
||||
def fin():
|
||||
if rethinkdb_servers:
|
||||
logging.info('dropping rethinkdb database {}'.format(db))
|
||||
with ddb._random_server_connection() as conn:
|
||||
result = r.db_drop(db).run(conn)
|
||||
logging.info("result=%s", result)
|
||||
else:
|
||||
logging.info('deleting file {}'.format(dedup_db_file))
|
||||
os.unlink(dedup_db_file)
|
||||
request.addfinalizer(fin)
|
||||
|
||||
return ddb
|
||||
|
||||
@pytest.fixture()
|
||||
def warcprox_(request, dedup_db):
|
||||
f = tempfile.NamedTemporaryFile(prefix='warcprox-test-ca-', suffix='.pem', delete=True)
|
||||
f.close() # delete it, or CertificateAuthority will try to read it
|
||||
ca_file = f.name
|
||||
@ -155,11 +183,6 @@ def warcprox_(request):
|
||||
playback_proxy = warcprox.playback.PlaybackProxy(server_address=('localhost', 0), ca=ca,
|
||||
playback_index_db=playback_index_db, warcs_dir=warcs_dir)
|
||||
|
||||
f = tempfile.NamedTemporaryFile(prefix='warcprox-test-dedup-', suffix='.db', delete=False)
|
||||
f.close()
|
||||
dedup_db_file = f.name
|
||||
dedup_db = warcprox.dedup.DedupDb(dedup_db_file)
|
||||
|
||||
default_warc_writer = warcprox.writer.WarcWriter(directory=warcs_dir,
|
||||
port=proxy.server_port)
|
||||
writer_pool = warcprox.writer.WarcWriterPool(default_warc_writer)
|
||||
@ -178,7 +201,7 @@ def warcprox_(request):
|
||||
logging.info('stopping warcprox')
|
||||
warcprox_.stop.set()
|
||||
warcprox_thread.join()
|
||||
for f in (ca_file, ca_dir, warcs_dir, playback_index_db_file, dedup_db_file, stats_db_file):
|
||||
for f in (ca_file, ca_dir, warcs_dir, playback_index_db_file, stats_db_file):
|
||||
if os.path.isdir(f):
|
||||
logging.info('deleting directory {}'.format(f))
|
||||
shutil.rmtree(f)
|
||||
@ -297,13 +320,13 @@ def test_dedup_http(http_daemon, warcprox_, archiving_proxies, playback_proxies)
|
||||
assert response.content == b'I am the warcprox test payload! ffffffffff!\n'
|
||||
|
||||
# check in dedup db
|
||||
# {u'i': u'<urn:uuid:e691dc0f-4bb9-4ad8-9afb-2af836aa05e4>', u'u': u'https://localhost:62841/c/d', u'd': u'2013-11-22T00:14:37Z'}
|
||||
# {u'id': u'<urn:uuid:e691dc0f-4bb9-4ad8-9afb-2af836aa05e4>', u'url': u'https://localhost:62841/c/d', u'date': u'2013-11-22T00:14:37Z'}
|
||||
dedup_lookup = warcprox_.warc_writer_thread.dedup_db.lookup(b'sha1:65e1216acfd220f0292715e74bd7a1ec35c99dfc')
|
||||
assert dedup_lookup['u'] == url.encode('ascii')
|
||||
assert re.match(br'^<urn:uuid:[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}>$', dedup_lookup['i'])
|
||||
assert re.match(br'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$', dedup_lookup['d'])
|
||||
record_id = dedup_lookup['i']
|
||||
dedup_date = dedup_lookup['d']
|
||||
assert dedup_lookup['url'] == url.encode('ascii')
|
||||
assert re.match(br'^<urn:uuid:[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}>$', dedup_lookup['id'])
|
||||
assert re.match(br'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$', dedup_lookup['date'])
|
||||
record_id = dedup_lookup['id']
|
||||
dedup_date = dedup_lookup['date']
|
||||
|
||||
# need revisit to have a later timestamp than original, else playing
|
||||
# back the latest record might not hit the revisit
|
||||
@ -320,9 +343,9 @@ def test_dedup_http(http_daemon, warcprox_, archiving_proxies, playback_proxies)
|
||||
|
||||
# check in dedup db (no change from prev)
|
||||
dedup_lookup = warcprox_.warc_writer_thread.dedup_db.lookup(b'sha1:65e1216acfd220f0292715e74bd7a1ec35c99dfc')
|
||||
assert dedup_lookup['u'] == url.encode('ascii')
|
||||
assert dedup_lookup['i'] == record_id
|
||||
assert dedup_lookup['d'] == dedup_date
|
||||
assert dedup_lookup['url'] == url.encode('ascii')
|
||||
assert dedup_lookup['id'] == record_id
|
||||
assert dedup_lookup['date'] == dedup_date
|
||||
|
||||
# test playback
|
||||
logging.debug('testing playback of revisit of {}'.format(url))
|
||||
@ -358,13 +381,13 @@ def test_dedup_https(https_daemon, warcprox_, archiving_proxies, playback_proxie
|
||||
assert response.content == b'I am the warcprox test payload! hhhhhhhhhh!\n'
|
||||
|
||||
# check in dedup db
|
||||
# {u'i': u'<urn:uuid:e691dc0f-4bb9-4ad8-9afb-2af836aa05e4>', u'u': u'https://localhost:62841/c/d', u'd': u'2013-11-22T00:14:37Z'}
|
||||
# {u'id': u'<urn:uuid:e691dc0f-4bb9-4ad8-9afb-2af836aa05e4>', u'url': u'https://localhost:62841/c/d', u'date': u'2013-11-22T00:14:37Z'}
|
||||
dedup_lookup = warcprox_.warc_writer_thread.dedup_db.lookup(b'sha1:5b4efa64fdb308ec06ae56a9beba155a6f734b89')
|
||||
assert dedup_lookup['u'] == url.encode('ascii')
|
||||
assert re.match(br'^<urn:uuid:[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}>$', dedup_lookup['i'])
|
||||
assert re.match(br'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$', dedup_lookup['d'])
|
||||
record_id = dedup_lookup['i']
|
||||
dedup_date = dedup_lookup['d']
|
||||
assert dedup_lookup['url'] == url.encode('ascii')
|
||||
assert re.match(br'^<urn:uuid:[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}>$', dedup_lookup['id'])
|
||||
assert re.match(br'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$', dedup_lookup['date'])
|
||||
record_id = dedup_lookup['id']
|
||||
dedup_date = dedup_lookup['date']
|
||||
|
||||
# need revisit to have a later timestamp than original, else playing
|
||||
# back the latest record might not hit the revisit
|
||||
@ -381,9 +404,9 @@ def test_dedup_https(https_daemon, warcprox_, archiving_proxies, playback_proxie
|
||||
|
||||
# check in dedup db (no change from prev)
|
||||
dedup_lookup = warcprox_.warc_writer_thread.dedup_db.lookup(b'sha1:5b4efa64fdb308ec06ae56a9beba155a6f734b89')
|
||||
assert dedup_lookup['u'] == url.encode('ascii')
|
||||
assert dedup_lookup['i'] == record_id
|
||||
assert dedup_lookup['d'] == dedup_date
|
||||
assert dedup_lookup['url'] == url.encode('ascii')
|
||||
assert dedup_lookup['id'] == record_id
|
||||
assert dedup_lookup['date'] == dedup_date
|
||||
|
||||
# test playback
|
||||
logging.debug('testing playback of revisit of {}'.format(url))
|
||||
|
Loading…
x
Reference in New Issue
Block a user