get all the tests to pass with ./tests/run-tests.sh

This commit is contained in:
Noah Levitt 2017-10-13 15:54:05 -07:00
parent 369dc5c124
commit 828a2c3dcf
5 changed files with 39 additions and 13 deletions

View File

@ -36,6 +36,7 @@ script:
- py.test -v tests
- py.test -v --rethinkdb-dedup-url=rethinkdb://localhost/test1/dedup tests
- py.test -v --rethinkdb-big-table-url=rethinkdb://localhost/test2/captures tests
- py.test -v --rethinkdb-trough-db-url=rethinkdb://localhost/trough_configuration tests
notifications:
slack:

View File

@ -86,22 +86,22 @@ RUN mkdir -vp /etc/service/trough-sync-local \
&& chmod a+x /etc/service/trough-sync-local/run
RUN mkdir -vp /etc/service/trough-sync-server \
&& echo '#!/bin/bash\nsource /opt/trough-ve3/bin/activate\nsleep 1\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec sync.py --server >>/tmp/trough-sync-server.out 2>&1' > /etc/service/trough-sync-server/run \
&& echo '#!/bin/bash\nsource /opt/trough-ve3/bin/activate\nsleep 5\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec sync.py --server >>/tmp/trough-sync-server.out 2>&1' > /etc/service/trough-sync-server/run \
&& chmod a+x /etc/service/trough-sync-server/run
RUN mkdir -vp /etc/service/trough-read \
&& echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 1\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6444 --master --processes=2 --harakiri=3200 --socket-timeout=3200 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/reader.py >>/tmp/trough-read.out 2>&1' > /etc/service/trough-read/run \
&& echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 5\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6444 --master --processes=2 --harakiri=3200 --socket-timeout=3200 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/reader.py >>/tmp/trough-read.out 2>&1' > /etc/service/trough-read/run \
&& chmod a+x /etc/service/trough-read/run
RUN mkdir -vp /etc/service/trough-write \
&& echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 1\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6222 --master --processes=2 --harakiri=240 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/writer.py >>/tmp/trough-write.out 2>&1' > /etc/service/trough-write/run \
&& echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 5\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6222 --master --processes=2 --harakiri=240 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/writer.py >>/tmp/trough-write.out 2>&1' > /etc/service/trough-write/run \
&& chmod a+x /etc/service/trough-write/run
RUN mkdir -vp /etc/service/trough-write-provisioner-local \
&& echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 1\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6112 --master --processes=2 --harakiri=20 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/write_provisioner_local.py >>/tmp/trough-write-provisioner-local.out 2>&1' > /etc/service/trough-write-provisioner-local/run \
&& echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 5\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6112 --master --processes=2 --harakiri=20 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/write_provisioner_local.py >>/tmp/trough-write-provisioner-local.out 2>&1' > /etc/service/trough-write-provisioner-local/run \
&& chmod a+x /etc/service/trough-write-provisioner-local/run
RUN mkdir -vp /etc/service/trough-write-provisioner-server \
&& echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 1\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6111 --master --processes=2 --harakiri=20 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/write_provisioner_server.py >>/tmp/trough-write-provisioner-server.out 2>&1' > /etc/service/trough-write-provisioner-server/run \
&& echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 5\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6111 --master --processes=2 --harakiri=20 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/write_provisioner_server.py >>/tmp/trough-write-provisioner-server.out 2>&1' > /etc/service/trough-write-provisioner-server/run \
&& chmod a+x /etc/service/trough-write-provisioner-server/run

View File

@ -31,7 +31,7 @@ script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
docker build -t internetarchive/warcprox-tests $script_dir
for python in python2.7 python3
for python in python3 python2.7
do
docker run --rm --volume="$script_dir/..:/warcprox" internetarchive/warcprox-tests /sbin/my_init -- \
bash -x -c "cd /tmp && git clone /warcprox && cd /tmp/warcprox \

View File

@ -30,6 +30,7 @@ import sqlite3
import requests
import doublethink
import rethinkdb as r
import datetime
class DedupDb(object):
logger = logging.getLogger("warcprox.dedup.DedupDb")
@ -219,11 +220,33 @@ class TroughDedupDb(object):
else:
return None
def sql_value(self, x):
if x is None:
return 'null'
elif isinstance(x, datetime.datetime):
return 'datetime(%r)' % x.isoformat()
elif isinstance(x, bool):
return int(x)
elif isinstance(x, str) or isinstance(x, bytes):
# py3: repr(u'abc') => 'abc'
# repr(b'abc') => b'abc'
# py2: repr(u'abc') => u'abc'
# repr(b'abc') => 'abc'
# Repr gives us a prefix we don't want in different situations
# depending on whether this is py2 or py3. Chop it off either way.
r = repr(x)
if r[:1] == "'":
return r
else:
return r[1:]
else:
raise Exception("don't know how to make an sql value from %r" % x)
def save(self, digest_key, response_record, bucket='__unspecified__'):
write_url = self._write_url(bucket)
record_id = response_record.get_header(warctools.WarcRecord.ID).decode('ascii')
url = response_record.get_header(warctools.WarcRecord.URL).decode('ascii')
warc_date = response_record.get_header(warctools.WarcRecord.DATE).decode('ascii')
record_id = response_record.get_header(warctools.WarcRecord.ID)
url = response_record.get_header(warctools.WarcRecord.URL)
warc_date = response_record.get_header(warctools.WarcRecord.DATE)
# XXX create table statement here is a temporary hack,
# see https://webarchive.jira.com/browse/AITFIVE-1465
@ -233,8 +256,9 @@ class TroughDedupDb(object):
' date datetime not null,\n'
' id varchar(100));\n' # warc record id
'insert into dedup (digest_key, url, date, id) '
'values (%r, %r, %r, %r);') % (
digest_key.decode('ascii'), url, warc_date, record_id)
'values (%s, %s, %s, %s);') % (
self.sql_value(digest_key), self.sql_value(url),
self.sql_value(warc_date), self.sql_value(record_id))
response = requests.post(write_url, sql)
if response.status_code != 200:
logging.warn(
@ -245,7 +269,8 @@ class TroughDedupDb(object):
read_url = self._read_url(bucket)
if not read_url:
return None
sql = 'select * from dedup where digest_key=%r;' % digest_key.decode('ascii')
sql = 'select * from dedup where digest_key=%s;' % (
self.sql_value(digest_key))
response = requests.post(read_url, sql)
if response.status_code != 200:
logging.warn(

View File

@ -60,7 +60,7 @@ class BetterArgumentDefaultsHelpFormatter(
if isinstance(action, argparse._StoreConstAction):
return action.help
else:
return super()._get_help_string(action)
return argparse.ArgumentDefaultsHelpFormatter._get_help_string(self, action)
def _build_arg_parser(prog=os.path.basename(sys.argv[0])):
arg_parser = argparse.ArgumentParser(prog=prog,