From b82d82b5f111a5758f8143b5d0b63e13954ca2c7 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Thu, 30 Jun 2016 15:24:40 -0500 Subject: [PATCH] command line utility warcprox-ensure-rethinkdb-tables, creates rethinkdb tables if they don't already exist... warcprox normally creates them on demand at startup, but if multiple instances are starting up at the same time, you can end up with duplicate broken tables, so it's a good idea to use this utility when spinning up a cluster --- setup.py | 4 +++- warcprox/main.py | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index cb26402..0062bac 100755 --- a/setup.py +++ b/setup.py @@ -51,7 +51,7 @@ except: setuptools.setup( name='warcprox', - version='2.0.dev21', + version='2.0.dev22', description='WARC writing MITM HTTP/S proxy', url='https://github.com/internetarchive/warcprox', author='Noah Levitt', @@ -66,6 +66,8 @@ setuptools.setup( entry_points={ 'console_scripts': [ 'warcprox=warcprox.main:main', + ('warcprox-ensure-rethinkdb-tables=' + 'warcprox.main:ensure_rethinkdb_tables'), 'dump-anydbm=warcprox.dump_anydbm:main', ], }, diff --git a/warcprox/main.py b/warcprox/main.py index b203c1c..c4c3006 100644 --- a/warcprox/main.py +++ b/warcprox/main.py @@ -248,6 +248,49 @@ def main(argv=sys.argv): real_main(args) +def ensure_rethinkdb_tables(): + ''' + Creates rethinkdb tables if they don't already exist. Warcprox normally + creates the tables it needs on demand at startup, but if multiple instances + are starting up at the same time, you can end up with duplicate broken + tables. So it's a good idea to use this utility at an early step when + spinning up a cluster. + ''' + arg_parser = argparse.ArgumentParser( + prog=os.path.basename(sys.argv[0]), + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + arg_parser.add_argument( + '--rethinkdb-servers', dest='rethinkdb_servers', default='localhost', + help='rethinkdb servers e.g. db0.foo.org,db0.foo.org:38015,db1.foo.org') + arg_parser.add_argument( + '--rethinkdb-db', dest='rethinkdb_db', default='warcprox', + help='rethinkdb database name') + arg_parser.add_argument( + '-q', '--quiet', dest='log_level', + action='store_const', default=logging.INFO, const=logging.WARN) + arg_parser.add_argument( + '-v', '--verbose', dest='log_level', + action='store_const', default=logging.INFO, const=logging.DEBUG) + args = arg_parser.parse_args(args=sys.argv[1:]) + + logging.basicConfig( + stream=sys.stdout, level=args.log_level, + format=( + '%(asctime)s %(levelname)s %(name)s.%(funcName)s' + '(%(filename)s:%(lineno)d) %(message)s')) + + r = rethinkstuff.Rethinker( + args.rethinkdb_servers.split(','), args.rethinkdb_db) + + # services table + rethinkstuff.ServiceRegistry(r) + + # stats table + warcprox.stats.RethinkStatsDb(r) + + # captures table + warcprox.bigtable.RethinkCaptures(r) + if __name__ == '__main__': main()