fix warcprox-ensure-rethinkdb-tables and add tests

This commit is contained in:
Noah Levitt 2017-11-28 10:38:38 -08:00
parent ef590a2fec
commit 61a7c234e8
2 changed files with 165 additions and 24 deletions

View File

@ -0,0 +1,109 @@
#!/usr/bin/env python
# vim: set fileencoding=utf-8:
'''
tests/test_ensure_rethinkdb_tables.py - automated tests of
ensure-rethinkdb-tables utility
Copyright (C) 2017 Internet Archive
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
USA.
'''
import warcprox.main
import pytest
import socket
import doublethink
import logging
import sys
logging.basicConfig(
stream=sys.stdout, level=warcprox.TRACE,
format='%(asctime)s %(process)d %(levelname)s %(threadName)s '
'%(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s')
def rethinkdb_is_running():
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
sock.connect(('127.0.0.1', 28015))
return True
except:
return False
if_rethinkdb = pytest.mark.skipif(
not rethinkdb_is_running(),
reason='rethinkdb not listening at 127.0.0.1:28015')
@if_rethinkdb
def test_individual_options():
rr = doublethink.Rethinker(['127.0.0.1'])
try:
warcprox.main.ensure_rethinkdb_tables([
'warcprox-ensure-rethinkdb-tables',
'--rethinkdb-stats-url=rethinkdb://127.0.0.1/db0/stats'])
assert rr.db('db0').table_list().run() == ['stats']
finally:
rr.db_drop('db0').run()
try:
warcprox.main.ensure_rethinkdb_tables([
'warcprox-ensure-rethinkdb-tables',
'--rethinkdb-services-url=rethinkdb://127.0.0.1/db1/services'])
assert rr.db('db1').table_list().run() == ['services']
finally:
rr.db_drop('db1').run()
try:
warcprox.main.ensure_rethinkdb_tables([
'warcprox-ensure-rethinkdb-tables',
'--rethinkdb-dedup-url=rethinkdb://127.0.0.1/db2/dedup'])
assert rr.db('db2').table_list().run() == ['dedup']
finally:
rr.db_drop('db2').run()
try:
warcprox.main.ensure_rethinkdb_tables([
'warcprox-ensure-rethinkdb-tables',
'--rethinkdb-big-table-url=rethinkdb://127.0.0.1/db3/captures'])
assert rr.db('db3').table_list().run() == ['captures']
finally:
rr.db_drop('db3').run()
try:
warcprox.main.ensure_rethinkdb_tables([
'warcprox-ensure-rethinkdb-tables',
'--rethinkdb-trough-db-url=rethinkdb://127.0.0.1/db4'])
assert rr.db('db4').table_list().run() == ['services']
# ['assignment', 'lock', 'schema', 'services']
finally:
rr.db_drop('db4').run()
@if_rethinkdb
def test_combos():
rr = doublethink.Rethinker(['127.0.0.1'])
try:
warcprox.main.ensure_rethinkdb_tables([
'warcprox-ensure-rethinkdb-tables',
'--rethinkdb-stats-url=rethinkdb://127.0.0.1/db00/stats',
'--rethinkdb-trough-db-url=rethinkdb://127.0.0.1/db01',
])
assert rr.db('db00').table_list().run() == ['stats']
assert rr.db('db01').table_list().run() == ['services']
# ['assignment', 'lock', 'schema', 'services']
finally:
rr.db_drop('db00').run()
rr.db_drop('db01').run()

View File

@ -43,7 +43,6 @@ import warcprox
import doublethink
import cryptography.hazmat.backends.openssl
import importlib
import doublethink
class BetterArgumentDefaultsHelpFormatter(
argparse.ArgumentDefaultsHelpFormatter,
@ -62,7 +61,7 @@ class BetterArgumentDefaultsHelpFormatter(
else:
return argparse.ArgumentDefaultsHelpFormatter._get_help_string(self, action)
def _build_arg_parser(prog=os.path.basename(sys.argv[0])):
def _build_arg_parser(prog):
arg_parser = argparse.ArgumentParser(prog=prog,
description='warcprox - WARC writing MITM HTTP/S proxy',
formatter_class=BetterArgumentDefaultsHelpFormatter)
@ -294,7 +293,7 @@ def init_controller(args):
return controller
def parse_args(argv=sys.argv):
def parse_args(argv):
'''
Parses command line arguments with argparse.
'''
@ -302,11 +301,11 @@ def parse_args(argv=sys.argv):
args = arg_parser.parse_args(args=argv[1:])
return args
def main(argv=sys.argv):
def main(argv=None):
'''
Main method, entry point of warcprox command.
'''
args = parse_args(argv)
args = parse_args(argv or sys.argv)
if args.trace:
loglevel = warcprox.TRACE
@ -337,7 +336,7 @@ def main(argv=sys.argv):
controller.run_until_shutdown()
def ensure_rethinkdb_tables():
def ensure_rethinkdb_tables(argv=None):
'''
Creates rethinkdb tables if they don't already exist. Warcprox normally
creates the tables it needs on demand at startup, but if multiple instances
@ -345,41 +344,74 @@ def ensure_rethinkdb_tables():
tables. So it's a good idea to use this utility at an early step when
spinning up a cluster.
'''
raise Exception('adjust my args')
argv = argv or sys.argv
arg_parser = argparse.ArgumentParser(
prog=os.path.basename(sys.argv[0]),
prog=os.path.basename(argv[0]),
formatter_class=BetterArgumentDefaultsHelpFormatter)
arg_parser.add_argument(
'--rethinkdb-servers', dest='rethinkdb_servers', default='localhost',
help='rethinkdb servers e.g. db0.foo.org,db0.foo.org:38015,db1.foo.org')
'--rethinkdb-stats-url', dest='rethinkdb_stats_url', help=(
'rethinkdb stats table url, e.g. rethinkdb://db0.foo.org,'
'db1.foo.org:38015/my_warcprox_db/my_stats_table'))
group = arg_parser.add_mutually_exclusive_group()
group.add_argument(
'--rethinkdb-dedup-url', dest='rethinkdb_dedup_url', help=(
'rethinkdb dedup url, e.g. rethinkdb://db0.foo.org,'
'db1.foo.org:38015/my_warcprox_db/my_dedup_table'))
group.add_argument(
'--rethinkdb-big-table-url', dest='rethinkdb_big_table_url', help=(
'rethinkdb big table url (table will be populated with '
'various capture information and is suitable for use as '
'index for playback), e.g. rethinkdb://db0.foo.org,'
'db1.foo.org:38015/my_warcprox_db/captures'))
group.add_argument(
'--rethinkdb-trough-db-url', dest='rethinkdb_trough_db_url', help=(
'🐷   url pointing to trough configuration rethinkdb database, '
'e.g. rethinkdb://db0.foo.org,db1.foo.org:38015'
'/trough_configuration'))
arg_parser.add_argument(
'--rethinkdb-db', dest='rethinkdb_db', default='warcprox',
help='rethinkdb database name')
'--rethinkdb-services-url', dest='rethinkdb_services_url', help=(
'rethinkdb service registry table url; if provided, warcprox '
'will create and heartbeat entry for itself'))
arg_parser.add_argument(
'-q', '--quiet', dest='log_level',
action='store_const', default=logging.INFO, const=logging.WARN)
arg_parser.add_argument(
'-v', '--verbose', dest='log_level',
action='store_const', default=logging.INFO, const=logging.DEBUG)
args = arg_parser.parse_args(args=sys.argv[1:])
args = arg_parser.parse_args(args=argv[1:])
logging.basicConfig(
stream=sys.stdout, level=args.log_level,
format=(
stream=sys.stdout, level=args.log_level, format=(
'%(asctime)s %(levelname)s %(name)s.%(funcName)s'
'(%(filename)s:%(lineno)d) %(message)s'))
rr = doublethink.Rethinker(
args.rethinkdb_servers.split(','), args.rethinkdb_db)
options = warcprox.Options(**vars(args))
# services table
doublethink.ServiceRegistry(rr)
did_something = False
if args.rethinkdb_services_url:
parsed = doublethink.parse_rethinkdb_url(
options.rethinkdb_services_url)
rr = doublethink.Rethinker(servers=parsed.hosts, db=parsed.database)
svcreg = doublethink.ServiceRegistry(rr, table=parsed.table)
did_something = True
if args.rethinkdb_stats_url:
stats_db = warcprox.stats.RethinkStatsDb(options=options)
did_something = True
if args.rethinkdb_dedup_url:
dedup_db = warcprox.dedup.RethinkDedupDb(options=options)
did_something = True
if args.rethinkdb_big_table_url:
dedup_db = warcprox.bigtable.RethinkCapturesDedup(options=options)
did_something = True
if args.rethinkdb_trough_db_url:
dedup_db = warcprox.dedup.TroughDedupDb(options)
logging.warn(
'trough it responsible for creating most of the rethinkdb '
'tables that it uses')
did_something = True
# stats table
warcprox.stats.RethinkStatsDb(rr)
# captures table
warcprox.bigtable.RethinkCaptures(rr)
if not did_something:
logging.error('nothing to do, no --rethinkdb-* options supplied')
if __name__ == '__main__':
main()