diff --git a/setup.py b/setup.py index cb26402..0062bac 100755 --- a/setup.py +++ b/setup.py @@ -51,7 +51,7 @@ except: setuptools.setup( name='warcprox', - version='2.0.dev21', + version='2.0.dev22', description='WARC writing MITM HTTP/S proxy', url='https://github.com/internetarchive/warcprox', author='Noah Levitt', @@ -66,6 +66,8 @@ setuptools.setup( entry_points={ 'console_scripts': [ 'warcprox=warcprox.main:main', + ('warcprox-ensure-rethinkdb-tables=' + 'warcprox.main:ensure_rethinkdb_tables'), 'dump-anydbm=warcprox.dump_anydbm:main', ], }, diff --git a/warcprox/main.py b/warcprox/main.py index b203c1c..c4c3006 100644 --- a/warcprox/main.py +++ b/warcprox/main.py @@ -248,6 +248,49 @@ def main(argv=sys.argv): real_main(args) +def ensure_rethinkdb_tables(): + ''' + Creates rethinkdb tables if they don't already exist. Warcprox normally + creates the tables it needs on demand at startup, but if multiple instances + are starting up at the same time, you can end up with duplicate broken + tables. So it's a good idea to use this utility at an early step when + spinning up a cluster. + ''' + arg_parser = argparse.ArgumentParser( + prog=os.path.basename(sys.argv[0]), + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + arg_parser.add_argument( + '--rethinkdb-servers', dest='rethinkdb_servers', default='localhost', + help='rethinkdb servers e.g. db0.foo.org,db0.foo.org:38015,db1.foo.org') + arg_parser.add_argument( + '--rethinkdb-db', dest='rethinkdb_db', default='warcprox', + help='rethinkdb database name') + arg_parser.add_argument( + '-q', '--quiet', dest='log_level', + action='store_const', default=logging.INFO, const=logging.WARN) + arg_parser.add_argument( + '-v', '--verbose', dest='log_level', + action='store_const', default=logging.INFO, const=logging.DEBUG) + args = arg_parser.parse_args(args=sys.argv[1:]) + + logging.basicConfig( + stream=sys.stdout, level=args.log_level, + format=( + '%(asctime)s %(levelname)s %(name)s.%(funcName)s' + '(%(filename)s:%(lineno)d) %(message)s')) + + r = rethinkstuff.Rethinker( + args.rethinkdb_servers.split(','), args.rethinkdb_db) + + # services table + rethinkstuff.ServiceRegistry(r) + + # stats table + warcprox.stats.RethinkStatsDb(r) + + # captures table + warcprox.bigtable.RethinkCaptures(r) + if __name__ == '__main__': main()