diff --git a/tests/test_ensure_rethinkdb_tables.py b/tests/test_ensure_rethinkdb_tables.py new file mode 100644 index 0000000..030cddb --- /dev/null +++ b/tests/test_ensure_rethinkdb_tables.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8: +''' +tests/test_ensure_rethinkdb_tables.py - automated tests of +ensure-rethinkdb-tables utility + +Copyright (C) 2017 Internet Archive + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +USA. +''' + +import warcprox.main +import pytest +import socket +import doublethink +import logging +import sys + +logging.basicConfig( + stream=sys.stdout, level=warcprox.TRACE, + format='%(asctime)s %(process)d %(levelname)s %(threadName)s ' + '%(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s') + +def rethinkdb_is_running(): + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + try: + sock.connect(('127.0.0.1', 28015)) + return True + except: + return False + +if_rethinkdb = pytest.mark.skipif( + not rethinkdb_is_running(), + reason='rethinkdb not listening at 127.0.0.1:28015') + +@if_rethinkdb +def test_individual_options(): + rr = doublethink.Rethinker(['127.0.0.1']) + + try: + warcprox.main.ensure_rethinkdb_tables([ + 'warcprox-ensure-rethinkdb-tables', + '--rethinkdb-stats-url=rethinkdb://127.0.0.1/db0/stats']) + assert rr.db('db0').table_list().run() == ['stats'] + finally: + rr.db_drop('db0').run() + + try: + warcprox.main.ensure_rethinkdb_tables([ + 'warcprox-ensure-rethinkdb-tables', + '--rethinkdb-services-url=rethinkdb://127.0.0.1/db1/services']) + assert rr.db('db1').table_list().run() == ['services'] + finally: + rr.db_drop('db1').run() + + try: + warcprox.main.ensure_rethinkdb_tables([ + 'warcprox-ensure-rethinkdb-tables', + '--rethinkdb-dedup-url=rethinkdb://127.0.0.1/db2/dedup']) + assert rr.db('db2').table_list().run() == ['dedup'] + finally: + rr.db_drop('db2').run() + + try: + warcprox.main.ensure_rethinkdb_tables([ + 'warcprox-ensure-rethinkdb-tables', + '--rethinkdb-big-table-url=rethinkdb://127.0.0.1/db3/captures']) + assert rr.db('db3').table_list().run() == ['captures'] + finally: + rr.db_drop('db3').run() + + try: + warcprox.main.ensure_rethinkdb_tables([ + 'warcprox-ensure-rethinkdb-tables', + '--rethinkdb-trough-db-url=rethinkdb://127.0.0.1/db4']) + assert rr.db('db4').table_list().run() == ['services'] + # ['assignment', 'lock', 'schema', 'services'] + finally: + rr.db_drop('db4').run() + +@if_rethinkdb +def test_combos(): + rr = doublethink.Rethinker(['127.0.0.1']) + + try: + warcprox.main.ensure_rethinkdb_tables([ + 'warcprox-ensure-rethinkdb-tables', + '--rethinkdb-stats-url=rethinkdb://127.0.0.1/db00/stats', + '--rethinkdb-trough-db-url=rethinkdb://127.0.0.1/db01', + ]) + assert rr.db('db00').table_list().run() == ['stats'] + assert rr.db('db01').table_list().run() == ['services'] + # ['assignment', 'lock', 'schema', 'services'] + finally: + rr.db_drop('db00').run() + rr.db_drop('db01').run() diff --git a/warcprox/main.py b/warcprox/main.py index a2fca9c..06cd176 100644 --- a/warcprox/main.py +++ b/warcprox/main.py @@ -43,7 +43,6 @@ import warcprox import doublethink import cryptography.hazmat.backends.openssl import importlib -import doublethink class BetterArgumentDefaultsHelpFormatter( argparse.ArgumentDefaultsHelpFormatter, @@ -62,7 +61,7 @@ class BetterArgumentDefaultsHelpFormatter( else: return argparse.ArgumentDefaultsHelpFormatter._get_help_string(self, action) -def _build_arg_parser(prog=os.path.basename(sys.argv[0])): +def _build_arg_parser(prog): arg_parser = argparse.ArgumentParser(prog=prog, description='warcprox - WARC writing MITM HTTP/S proxy', formatter_class=BetterArgumentDefaultsHelpFormatter) @@ -294,7 +293,7 @@ def init_controller(args): return controller -def parse_args(argv=sys.argv): +def parse_args(argv): ''' Parses command line arguments with argparse. ''' @@ -302,11 +301,11 @@ def parse_args(argv=sys.argv): args = arg_parser.parse_args(args=argv[1:]) return args -def main(argv=sys.argv): +def main(argv=None): ''' Main method, entry point of warcprox command. ''' - args = parse_args(argv) + args = parse_args(argv or sys.argv) if args.trace: loglevel = warcprox.TRACE @@ -337,7 +336,7 @@ def main(argv=sys.argv): controller.run_until_shutdown() -def ensure_rethinkdb_tables(): +def ensure_rethinkdb_tables(argv=None): ''' Creates rethinkdb tables if they don't already exist. Warcprox normally creates the tables it needs on demand at startup, but if multiple instances @@ -345,41 +344,74 @@ def ensure_rethinkdb_tables(): tables. So it's a good idea to use this utility at an early step when spinning up a cluster. ''' - raise Exception('adjust my args') + argv = argv or sys.argv arg_parser = argparse.ArgumentParser( - prog=os.path.basename(sys.argv[0]), + prog=os.path.basename(argv[0]), formatter_class=BetterArgumentDefaultsHelpFormatter) arg_parser.add_argument( - '--rethinkdb-servers', dest='rethinkdb_servers', default='localhost', - help='rethinkdb servers e.g. db0.foo.org,db0.foo.org:38015,db1.foo.org') + '--rethinkdb-stats-url', dest='rethinkdb_stats_url', help=( + 'rethinkdb stats table url, e.g. rethinkdb://db0.foo.org,' + 'db1.foo.org:38015/my_warcprox_db/my_stats_table')) + group = arg_parser.add_mutually_exclusive_group() + group.add_argument( + '--rethinkdb-dedup-url', dest='rethinkdb_dedup_url', help=( + 'rethinkdb dedup url, e.g. rethinkdb://db0.foo.org,' + 'db1.foo.org:38015/my_warcprox_db/my_dedup_table')) + group.add_argument( + '--rethinkdb-big-table-url', dest='rethinkdb_big_table_url', help=( + 'rethinkdb big table url (table will be populated with ' + 'various capture information and is suitable for use as ' + 'index for playback), e.g. rethinkdb://db0.foo.org,' + 'db1.foo.org:38015/my_warcprox_db/captures')) + group.add_argument( + '--rethinkdb-trough-db-url', dest='rethinkdb_trough_db_url', help=( + '🐷   url pointing to trough configuration rethinkdb database, ' + 'e.g. rethinkdb://db0.foo.org,db1.foo.org:38015' + '/trough_configuration')) arg_parser.add_argument( - '--rethinkdb-db', dest='rethinkdb_db', default='warcprox', - help='rethinkdb database name') + '--rethinkdb-services-url', dest='rethinkdb_services_url', help=( + 'rethinkdb service registry table url; if provided, warcprox ' + 'will create and heartbeat entry for itself')) arg_parser.add_argument( '-q', '--quiet', dest='log_level', action='store_const', default=logging.INFO, const=logging.WARN) arg_parser.add_argument( '-v', '--verbose', dest='log_level', action='store_const', default=logging.INFO, const=logging.DEBUG) - args = arg_parser.parse_args(args=sys.argv[1:]) + args = arg_parser.parse_args(args=argv[1:]) logging.basicConfig( - stream=sys.stdout, level=args.log_level, - format=( + stream=sys.stdout, level=args.log_level, format=( '%(asctime)s %(levelname)s %(name)s.%(funcName)s' '(%(filename)s:%(lineno)d) %(message)s')) - rr = doublethink.Rethinker( - args.rethinkdb_servers.split(','), args.rethinkdb_db) + options = warcprox.Options(**vars(args)) - # services table - doublethink.ServiceRegistry(rr) + did_something = False + if args.rethinkdb_services_url: + parsed = doublethink.parse_rethinkdb_url( + options.rethinkdb_services_url) + rr = doublethink.Rethinker(servers=parsed.hosts, db=parsed.database) + svcreg = doublethink.ServiceRegistry(rr, table=parsed.table) + did_something = True + if args.rethinkdb_stats_url: + stats_db = warcprox.stats.RethinkStatsDb(options=options) + did_something = True + if args.rethinkdb_dedup_url: + dedup_db = warcprox.dedup.RethinkDedupDb(options=options) + did_something = True + if args.rethinkdb_big_table_url: + dedup_db = warcprox.bigtable.RethinkCapturesDedup(options=options) + did_something = True + if args.rethinkdb_trough_db_url: + dedup_db = warcprox.dedup.TroughDedupDb(options) + logging.warn( + 'trough it responsible for creating most of the rethinkdb ' + 'tables that it uses') + did_something = True - # stats table - warcprox.stats.RethinkStatsDb(rr) - - # captures table - warcprox.bigtable.RethinkCaptures(rr) + if not did_something: + logging.error('nothing to do, no --rethinkdb-* options supplied') if __name__ == '__main__': main()