Merge branch 'trough-dedup' into qa

* trough-dedup:
  fix warcprox-ensure-rethinkdb-tables and add tests
This commit is contained in:
Noah Levitt 2017-11-28 13:41:05 -08:00
commit d1472ed63c
2 changed files with 165 additions and 24 deletions

View File

@ -0,0 +1,109 @@
#!/usr/bin/env python
# vim: set fileencoding=utf-8:
'''
tests/test_ensure_rethinkdb_tables.py - automated tests of
ensure-rethinkdb-tables utility
Copyright (C) 2017 Internet Archive
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
USA.
'''
import warcprox.main
import pytest
import socket
import doublethink
import logging
import sys
logging.basicConfig(
stream=sys.stdout, level=warcprox.TRACE,
format='%(asctime)s %(process)d %(levelname)s %(threadName)s '
'%(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s')
def rethinkdb_is_running():
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
sock.connect(('127.0.0.1', 28015))
return True
except:
return False
if_rethinkdb = pytest.mark.skipif(
not rethinkdb_is_running(),
reason='rethinkdb not listening at 127.0.0.1:28015')
@if_rethinkdb
def test_individual_options():
rr = doublethink.Rethinker(['127.0.0.1'])
try:
warcprox.main.ensure_rethinkdb_tables([
'warcprox-ensure-rethinkdb-tables',
'--rethinkdb-stats-url=rethinkdb://127.0.0.1/db0/stats'])
assert rr.db('db0').table_list().run() == ['stats']
finally:
rr.db_drop('db0').run()
try:
warcprox.main.ensure_rethinkdb_tables([
'warcprox-ensure-rethinkdb-tables',
'--rethinkdb-services-url=rethinkdb://127.0.0.1/db1/services'])
assert rr.db('db1').table_list().run() == ['services']
finally:
rr.db_drop('db1').run()
try:
warcprox.main.ensure_rethinkdb_tables([
'warcprox-ensure-rethinkdb-tables',
'--rethinkdb-dedup-url=rethinkdb://127.0.0.1/db2/dedup'])
assert rr.db('db2').table_list().run() == ['dedup']
finally:
rr.db_drop('db2').run()
try:
warcprox.main.ensure_rethinkdb_tables([
'warcprox-ensure-rethinkdb-tables',
'--rethinkdb-big-table-url=rethinkdb://127.0.0.1/db3/captures'])
assert rr.db('db3').table_list().run() == ['captures']
finally:
rr.db_drop('db3').run()
try:
warcprox.main.ensure_rethinkdb_tables([
'warcprox-ensure-rethinkdb-tables',
'--rethinkdb-trough-db-url=rethinkdb://127.0.0.1/db4'])
assert rr.db('db4').table_list().run() == ['services']
# ['assignment', 'lock', 'schema', 'services']
finally:
rr.db_drop('db4').run()
@if_rethinkdb
def test_combos():
rr = doublethink.Rethinker(['127.0.0.1'])
try:
warcprox.main.ensure_rethinkdb_tables([
'warcprox-ensure-rethinkdb-tables',
'--rethinkdb-stats-url=rethinkdb://127.0.0.1/db00/stats',
'--rethinkdb-trough-db-url=rethinkdb://127.0.0.1/db01',
])
assert rr.db('db00').table_list().run() == ['stats']
assert rr.db('db01').table_list().run() == ['services']
# ['assignment', 'lock', 'schema', 'services']
finally:
rr.db_drop('db00').run()
rr.db_drop('db01').run()

View File

@ -43,7 +43,6 @@ import warcprox
import doublethink import doublethink
import cryptography.hazmat.backends.openssl import cryptography.hazmat.backends.openssl
import importlib import importlib
import doublethink
class BetterArgumentDefaultsHelpFormatter( class BetterArgumentDefaultsHelpFormatter(
argparse.ArgumentDefaultsHelpFormatter, argparse.ArgumentDefaultsHelpFormatter,
@ -62,7 +61,7 @@ class BetterArgumentDefaultsHelpFormatter(
else: else:
return argparse.ArgumentDefaultsHelpFormatter._get_help_string(self, action) return argparse.ArgumentDefaultsHelpFormatter._get_help_string(self, action)
def _build_arg_parser(prog=os.path.basename(sys.argv[0])): def _build_arg_parser(prog):
arg_parser = argparse.ArgumentParser(prog=prog, arg_parser = argparse.ArgumentParser(prog=prog,
description='warcprox - WARC writing MITM HTTP/S proxy', description='warcprox - WARC writing MITM HTTP/S proxy',
formatter_class=BetterArgumentDefaultsHelpFormatter) formatter_class=BetterArgumentDefaultsHelpFormatter)
@ -299,7 +298,7 @@ def init_controller(args):
return controller return controller
def parse_args(argv=sys.argv): def parse_args(argv):
''' '''
Parses command line arguments with argparse. Parses command line arguments with argparse.
''' '''
@ -307,11 +306,11 @@ def parse_args(argv=sys.argv):
args = arg_parser.parse_args(args=argv[1:]) args = arg_parser.parse_args(args=argv[1:])
return args return args
def main(argv=sys.argv): def main(argv=None):
''' '''
Main method, entry point of warcprox command. Main method, entry point of warcprox command.
''' '''
args = parse_args(argv) args = parse_args(argv or sys.argv)
if args.trace: if args.trace:
loglevel = warcprox.TRACE loglevel = warcprox.TRACE
@ -342,7 +341,7 @@ def main(argv=sys.argv):
controller.run_until_shutdown() controller.run_until_shutdown()
def ensure_rethinkdb_tables(): def ensure_rethinkdb_tables(argv=None):
''' '''
Creates rethinkdb tables if they don't already exist. Warcprox normally Creates rethinkdb tables if they don't already exist. Warcprox normally
creates the tables it needs on demand at startup, but if multiple instances creates the tables it needs on demand at startup, but if multiple instances
@ -350,41 +349,74 @@ def ensure_rethinkdb_tables():
tables. So it's a good idea to use this utility at an early step when tables. So it's a good idea to use this utility at an early step when
spinning up a cluster. spinning up a cluster.
''' '''
raise Exception('adjust my args') argv = argv or sys.argv
arg_parser = argparse.ArgumentParser( arg_parser = argparse.ArgumentParser(
prog=os.path.basename(sys.argv[0]), prog=os.path.basename(argv[0]),
formatter_class=BetterArgumentDefaultsHelpFormatter) formatter_class=BetterArgumentDefaultsHelpFormatter)
arg_parser.add_argument( arg_parser.add_argument(
'--rethinkdb-servers', dest='rethinkdb_servers', default='localhost', '--rethinkdb-stats-url', dest='rethinkdb_stats_url', help=(
help='rethinkdb servers e.g. db0.foo.org,db0.foo.org:38015,db1.foo.org') 'rethinkdb stats table url, e.g. rethinkdb://db0.foo.org,'
'db1.foo.org:38015/my_warcprox_db/my_stats_table'))
group = arg_parser.add_mutually_exclusive_group()
group.add_argument(
'--rethinkdb-dedup-url', dest='rethinkdb_dedup_url', help=(
'rethinkdb dedup url, e.g. rethinkdb://db0.foo.org,'
'db1.foo.org:38015/my_warcprox_db/my_dedup_table'))
group.add_argument(
'--rethinkdb-big-table-url', dest='rethinkdb_big_table_url', help=(
'rethinkdb big table url (table will be populated with '
'various capture information and is suitable for use as '
'index for playback), e.g. rethinkdb://db0.foo.org,'
'db1.foo.org:38015/my_warcprox_db/captures'))
group.add_argument(
'--rethinkdb-trough-db-url', dest='rethinkdb_trough_db_url', help=(
'🐷   url pointing to trough configuration rethinkdb database, '
'e.g. rethinkdb://db0.foo.org,db1.foo.org:38015'
'/trough_configuration'))
arg_parser.add_argument( arg_parser.add_argument(
'--rethinkdb-db', dest='rethinkdb_db', default='warcprox', '--rethinkdb-services-url', dest='rethinkdb_services_url', help=(
help='rethinkdb database name') 'rethinkdb service registry table url; if provided, warcprox '
'will create and heartbeat entry for itself'))
arg_parser.add_argument( arg_parser.add_argument(
'-q', '--quiet', dest='log_level', '-q', '--quiet', dest='log_level',
action='store_const', default=logging.INFO, const=logging.WARN) action='store_const', default=logging.INFO, const=logging.WARN)
arg_parser.add_argument( arg_parser.add_argument(
'-v', '--verbose', dest='log_level', '-v', '--verbose', dest='log_level',
action='store_const', default=logging.INFO, const=logging.DEBUG) action='store_const', default=logging.INFO, const=logging.DEBUG)
args = arg_parser.parse_args(args=sys.argv[1:]) args = arg_parser.parse_args(args=argv[1:])
logging.basicConfig( logging.basicConfig(
stream=sys.stdout, level=args.log_level, stream=sys.stdout, level=args.log_level, format=(
format=(
'%(asctime)s %(levelname)s %(name)s.%(funcName)s' '%(asctime)s %(levelname)s %(name)s.%(funcName)s'
'(%(filename)s:%(lineno)d) %(message)s')) '(%(filename)s:%(lineno)d) %(message)s'))
rr = doublethink.Rethinker( options = warcprox.Options(**vars(args))
args.rethinkdb_servers.split(','), args.rethinkdb_db)
# services table did_something = False
doublethink.ServiceRegistry(rr) if args.rethinkdb_services_url:
parsed = doublethink.parse_rethinkdb_url(
options.rethinkdb_services_url)
rr = doublethink.Rethinker(servers=parsed.hosts, db=parsed.database)
svcreg = doublethink.ServiceRegistry(rr, table=parsed.table)
did_something = True
if args.rethinkdb_stats_url:
stats_db = warcprox.stats.RethinkStatsDb(options=options)
did_something = True
if args.rethinkdb_dedup_url:
dedup_db = warcprox.dedup.RethinkDedupDb(options=options)
did_something = True
if args.rethinkdb_big_table_url:
dedup_db = warcprox.bigtable.RethinkCapturesDedup(options=options)
did_something = True
if args.rethinkdb_trough_db_url:
dedup_db = warcprox.dedup.TroughDedupDb(options)
logging.warn(
'trough it responsible for creating most of the rethinkdb '
'tables that it uses')
did_something = True
# stats table if not did_something:
warcprox.stats.RethinkStatsDb(rr) logging.error('nothing to do, no --rethinkdb-* options supplied')
# captures table
warcprox.bigtable.RethinkCaptures(rr)
if __name__ == '__main__': if __name__ == '__main__':
main() main()