mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
Merge branch 'trough-dedup' into qa
* trough-dedup: fix warcprox-ensure-rethinkdb-tables and add tests
This commit is contained in:
commit
d1472ed63c
109
tests/test_ensure_rethinkdb_tables.py
Normal file
109
tests/test_ensure_rethinkdb_tables.py
Normal file
@ -0,0 +1,109 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim: set fileencoding=utf-8:
|
||||||
|
'''
|
||||||
|
tests/test_ensure_rethinkdb_tables.py - automated tests of
|
||||||
|
ensure-rethinkdb-tables utility
|
||||||
|
|
||||||
|
Copyright (C) 2017 Internet Archive
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
||||||
|
USA.
|
||||||
|
'''
|
||||||
|
|
||||||
|
import warcprox.main
|
||||||
|
import pytest
|
||||||
|
import socket
|
||||||
|
import doublethink
|
||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
stream=sys.stdout, level=warcprox.TRACE,
|
||||||
|
format='%(asctime)s %(process)d %(levelname)s %(threadName)s '
|
||||||
|
'%(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s')
|
||||||
|
|
||||||
|
def rethinkdb_is_running():
|
||||||
|
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||||
|
try:
|
||||||
|
sock.connect(('127.0.0.1', 28015))
|
||||||
|
return True
|
||||||
|
except:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if_rethinkdb = pytest.mark.skipif(
|
||||||
|
not rethinkdb_is_running(),
|
||||||
|
reason='rethinkdb not listening at 127.0.0.1:28015')
|
||||||
|
|
||||||
|
@if_rethinkdb
|
||||||
|
def test_individual_options():
|
||||||
|
rr = doublethink.Rethinker(['127.0.0.1'])
|
||||||
|
|
||||||
|
try:
|
||||||
|
warcprox.main.ensure_rethinkdb_tables([
|
||||||
|
'warcprox-ensure-rethinkdb-tables',
|
||||||
|
'--rethinkdb-stats-url=rethinkdb://127.0.0.1/db0/stats'])
|
||||||
|
assert rr.db('db0').table_list().run() == ['stats']
|
||||||
|
finally:
|
||||||
|
rr.db_drop('db0').run()
|
||||||
|
|
||||||
|
try:
|
||||||
|
warcprox.main.ensure_rethinkdb_tables([
|
||||||
|
'warcprox-ensure-rethinkdb-tables',
|
||||||
|
'--rethinkdb-services-url=rethinkdb://127.0.0.1/db1/services'])
|
||||||
|
assert rr.db('db1').table_list().run() == ['services']
|
||||||
|
finally:
|
||||||
|
rr.db_drop('db1').run()
|
||||||
|
|
||||||
|
try:
|
||||||
|
warcprox.main.ensure_rethinkdb_tables([
|
||||||
|
'warcprox-ensure-rethinkdb-tables',
|
||||||
|
'--rethinkdb-dedup-url=rethinkdb://127.0.0.1/db2/dedup'])
|
||||||
|
assert rr.db('db2').table_list().run() == ['dedup']
|
||||||
|
finally:
|
||||||
|
rr.db_drop('db2').run()
|
||||||
|
|
||||||
|
try:
|
||||||
|
warcprox.main.ensure_rethinkdb_tables([
|
||||||
|
'warcprox-ensure-rethinkdb-tables',
|
||||||
|
'--rethinkdb-big-table-url=rethinkdb://127.0.0.1/db3/captures'])
|
||||||
|
assert rr.db('db3').table_list().run() == ['captures']
|
||||||
|
finally:
|
||||||
|
rr.db_drop('db3').run()
|
||||||
|
|
||||||
|
try:
|
||||||
|
warcprox.main.ensure_rethinkdb_tables([
|
||||||
|
'warcprox-ensure-rethinkdb-tables',
|
||||||
|
'--rethinkdb-trough-db-url=rethinkdb://127.0.0.1/db4'])
|
||||||
|
assert rr.db('db4').table_list().run() == ['services']
|
||||||
|
# ['assignment', 'lock', 'schema', 'services']
|
||||||
|
finally:
|
||||||
|
rr.db_drop('db4').run()
|
||||||
|
|
||||||
|
@if_rethinkdb
|
||||||
|
def test_combos():
|
||||||
|
rr = doublethink.Rethinker(['127.0.0.1'])
|
||||||
|
|
||||||
|
try:
|
||||||
|
warcprox.main.ensure_rethinkdb_tables([
|
||||||
|
'warcprox-ensure-rethinkdb-tables',
|
||||||
|
'--rethinkdb-stats-url=rethinkdb://127.0.0.1/db00/stats',
|
||||||
|
'--rethinkdb-trough-db-url=rethinkdb://127.0.0.1/db01',
|
||||||
|
])
|
||||||
|
assert rr.db('db00').table_list().run() == ['stats']
|
||||||
|
assert rr.db('db01').table_list().run() == ['services']
|
||||||
|
# ['assignment', 'lock', 'schema', 'services']
|
||||||
|
finally:
|
||||||
|
rr.db_drop('db00').run()
|
||||||
|
rr.db_drop('db01').run()
|
@ -43,7 +43,6 @@ import warcprox
|
|||||||
import doublethink
|
import doublethink
|
||||||
import cryptography.hazmat.backends.openssl
|
import cryptography.hazmat.backends.openssl
|
||||||
import importlib
|
import importlib
|
||||||
import doublethink
|
|
||||||
|
|
||||||
class BetterArgumentDefaultsHelpFormatter(
|
class BetterArgumentDefaultsHelpFormatter(
|
||||||
argparse.ArgumentDefaultsHelpFormatter,
|
argparse.ArgumentDefaultsHelpFormatter,
|
||||||
@ -62,7 +61,7 @@ class BetterArgumentDefaultsHelpFormatter(
|
|||||||
else:
|
else:
|
||||||
return argparse.ArgumentDefaultsHelpFormatter._get_help_string(self, action)
|
return argparse.ArgumentDefaultsHelpFormatter._get_help_string(self, action)
|
||||||
|
|
||||||
def _build_arg_parser(prog=os.path.basename(sys.argv[0])):
|
def _build_arg_parser(prog):
|
||||||
arg_parser = argparse.ArgumentParser(prog=prog,
|
arg_parser = argparse.ArgumentParser(prog=prog,
|
||||||
description='warcprox - WARC writing MITM HTTP/S proxy',
|
description='warcprox - WARC writing MITM HTTP/S proxy',
|
||||||
formatter_class=BetterArgumentDefaultsHelpFormatter)
|
formatter_class=BetterArgumentDefaultsHelpFormatter)
|
||||||
@ -299,7 +298,7 @@ def init_controller(args):
|
|||||||
|
|
||||||
return controller
|
return controller
|
||||||
|
|
||||||
def parse_args(argv=sys.argv):
|
def parse_args(argv):
|
||||||
'''
|
'''
|
||||||
Parses command line arguments with argparse.
|
Parses command line arguments with argparse.
|
||||||
'''
|
'''
|
||||||
@ -307,11 +306,11 @@ def parse_args(argv=sys.argv):
|
|||||||
args = arg_parser.parse_args(args=argv[1:])
|
args = arg_parser.parse_args(args=argv[1:])
|
||||||
return args
|
return args
|
||||||
|
|
||||||
def main(argv=sys.argv):
|
def main(argv=None):
|
||||||
'''
|
'''
|
||||||
Main method, entry point of warcprox command.
|
Main method, entry point of warcprox command.
|
||||||
'''
|
'''
|
||||||
args = parse_args(argv)
|
args = parse_args(argv or sys.argv)
|
||||||
|
|
||||||
if args.trace:
|
if args.trace:
|
||||||
loglevel = warcprox.TRACE
|
loglevel = warcprox.TRACE
|
||||||
@ -342,7 +341,7 @@ def main(argv=sys.argv):
|
|||||||
|
|
||||||
controller.run_until_shutdown()
|
controller.run_until_shutdown()
|
||||||
|
|
||||||
def ensure_rethinkdb_tables():
|
def ensure_rethinkdb_tables(argv=None):
|
||||||
'''
|
'''
|
||||||
Creates rethinkdb tables if they don't already exist. Warcprox normally
|
Creates rethinkdb tables if they don't already exist. Warcprox normally
|
||||||
creates the tables it needs on demand at startup, but if multiple instances
|
creates the tables it needs on demand at startup, but if multiple instances
|
||||||
@ -350,41 +349,74 @@ def ensure_rethinkdb_tables():
|
|||||||
tables. So it's a good idea to use this utility at an early step when
|
tables. So it's a good idea to use this utility at an early step when
|
||||||
spinning up a cluster.
|
spinning up a cluster.
|
||||||
'''
|
'''
|
||||||
raise Exception('adjust my args')
|
argv = argv or sys.argv
|
||||||
arg_parser = argparse.ArgumentParser(
|
arg_parser = argparse.ArgumentParser(
|
||||||
prog=os.path.basename(sys.argv[0]),
|
prog=os.path.basename(argv[0]),
|
||||||
formatter_class=BetterArgumentDefaultsHelpFormatter)
|
formatter_class=BetterArgumentDefaultsHelpFormatter)
|
||||||
arg_parser.add_argument(
|
arg_parser.add_argument(
|
||||||
'--rethinkdb-servers', dest='rethinkdb_servers', default='localhost',
|
'--rethinkdb-stats-url', dest='rethinkdb_stats_url', help=(
|
||||||
help='rethinkdb servers e.g. db0.foo.org,db0.foo.org:38015,db1.foo.org')
|
'rethinkdb stats table url, e.g. rethinkdb://db0.foo.org,'
|
||||||
|
'db1.foo.org:38015/my_warcprox_db/my_stats_table'))
|
||||||
|
group = arg_parser.add_mutually_exclusive_group()
|
||||||
|
group.add_argument(
|
||||||
|
'--rethinkdb-dedup-url', dest='rethinkdb_dedup_url', help=(
|
||||||
|
'rethinkdb dedup url, e.g. rethinkdb://db0.foo.org,'
|
||||||
|
'db1.foo.org:38015/my_warcprox_db/my_dedup_table'))
|
||||||
|
group.add_argument(
|
||||||
|
'--rethinkdb-big-table-url', dest='rethinkdb_big_table_url', help=(
|
||||||
|
'rethinkdb big table url (table will be populated with '
|
||||||
|
'various capture information and is suitable for use as '
|
||||||
|
'index for playback), e.g. rethinkdb://db0.foo.org,'
|
||||||
|
'db1.foo.org:38015/my_warcprox_db/captures'))
|
||||||
|
group.add_argument(
|
||||||
|
'--rethinkdb-trough-db-url', dest='rethinkdb_trough_db_url', help=(
|
||||||
|
'🐷 url pointing to trough configuration rethinkdb database, '
|
||||||
|
'e.g. rethinkdb://db0.foo.org,db1.foo.org:38015'
|
||||||
|
'/trough_configuration'))
|
||||||
arg_parser.add_argument(
|
arg_parser.add_argument(
|
||||||
'--rethinkdb-db', dest='rethinkdb_db', default='warcprox',
|
'--rethinkdb-services-url', dest='rethinkdb_services_url', help=(
|
||||||
help='rethinkdb database name')
|
'rethinkdb service registry table url; if provided, warcprox '
|
||||||
|
'will create and heartbeat entry for itself'))
|
||||||
arg_parser.add_argument(
|
arg_parser.add_argument(
|
||||||
'-q', '--quiet', dest='log_level',
|
'-q', '--quiet', dest='log_level',
|
||||||
action='store_const', default=logging.INFO, const=logging.WARN)
|
action='store_const', default=logging.INFO, const=logging.WARN)
|
||||||
arg_parser.add_argument(
|
arg_parser.add_argument(
|
||||||
'-v', '--verbose', dest='log_level',
|
'-v', '--verbose', dest='log_level',
|
||||||
action='store_const', default=logging.INFO, const=logging.DEBUG)
|
action='store_const', default=logging.INFO, const=logging.DEBUG)
|
||||||
args = arg_parser.parse_args(args=sys.argv[1:])
|
args = arg_parser.parse_args(args=argv[1:])
|
||||||
|
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
stream=sys.stdout, level=args.log_level,
|
stream=sys.stdout, level=args.log_level, format=(
|
||||||
format=(
|
|
||||||
'%(asctime)s %(levelname)s %(name)s.%(funcName)s'
|
'%(asctime)s %(levelname)s %(name)s.%(funcName)s'
|
||||||
'(%(filename)s:%(lineno)d) %(message)s'))
|
'(%(filename)s:%(lineno)d) %(message)s'))
|
||||||
|
|
||||||
rr = doublethink.Rethinker(
|
options = warcprox.Options(**vars(args))
|
||||||
args.rethinkdb_servers.split(','), args.rethinkdb_db)
|
|
||||||
|
|
||||||
# services table
|
did_something = False
|
||||||
doublethink.ServiceRegistry(rr)
|
if args.rethinkdb_services_url:
|
||||||
|
parsed = doublethink.parse_rethinkdb_url(
|
||||||
|
options.rethinkdb_services_url)
|
||||||
|
rr = doublethink.Rethinker(servers=parsed.hosts, db=parsed.database)
|
||||||
|
svcreg = doublethink.ServiceRegistry(rr, table=parsed.table)
|
||||||
|
did_something = True
|
||||||
|
if args.rethinkdb_stats_url:
|
||||||
|
stats_db = warcprox.stats.RethinkStatsDb(options=options)
|
||||||
|
did_something = True
|
||||||
|
if args.rethinkdb_dedup_url:
|
||||||
|
dedup_db = warcprox.dedup.RethinkDedupDb(options=options)
|
||||||
|
did_something = True
|
||||||
|
if args.rethinkdb_big_table_url:
|
||||||
|
dedup_db = warcprox.bigtable.RethinkCapturesDedup(options=options)
|
||||||
|
did_something = True
|
||||||
|
if args.rethinkdb_trough_db_url:
|
||||||
|
dedup_db = warcprox.dedup.TroughDedupDb(options)
|
||||||
|
logging.warn(
|
||||||
|
'trough it responsible for creating most of the rethinkdb '
|
||||||
|
'tables that it uses')
|
||||||
|
did_something = True
|
||||||
|
|
||||||
# stats table
|
if not did_something:
|
||||||
warcprox.stats.RethinkStatsDb(rr)
|
logging.error('nothing to do, no --rethinkdb-* options supplied')
|
||||||
|
|
||||||
# captures table
|
|
||||||
warcprox.bigtable.RethinkCaptures(rr)
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user