mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
get all the tests to pass with ./tests/run-tests.sh
This commit is contained in:
parent
369dc5c124
commit
828a2c3dcf
@ -36,6 +36,7 @@ script:
|
||||
- py.test -v tests
|
||||
- py.test -v --rethinkdb-dedup-url=rethinkdb://localhost/test1/dedup tests
|
||||
- py.test -v --rethinkdb-big-table-url=rethinkdb://localhost/test2/captures tests
|
||||
- py.test -v --rethinkdb-trough-db-url=rethinkdb://localhost/trough_configuration tests
|
||||
|
||||
notifications:
|
||||
slack:
|
||||
|
@ -86,22 +86,22 @@ RUN mkdir -vp /etc/service/trough-sync-local \
|
||||
&& chmod a+x /etc/service/trough-sync-local/run
|
||||
|
||||
RUN mkdir -vp /etc/service/trough-sync-server \
|
||||
&& echo '#!/bin/bash\nsource /opt/trough-ve3/bin/activate\nsleep 1\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec sync.py --server >>/tmp/trough-sync-server.out 2>&1' > /etc/service/trough-sync-server/run \
|
||||
&& echo '#!/bin/bash\nsource /opt/trough-ve3/bin/activate\nsleep 5\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec sync.py --server >>/tmp/trough-sync-server.out 2>&1' > /etc/service/trough-sync-server/run \
|
||||
&& chmod a+x /etc/service/trough-sync-server/run
|
||||
|
||||
RUN mkdir -vp /etc/service/trough-read \
|
||||
&& echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 1\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6444 --master --processes=2 --harakiri=3200 --socket-timeout=3200 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/reader.py >>/tmp/trough-read.out 2>&1' > /etc/service/trough-read/run \
|
||||
&& echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 5\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6444 --master --processes=2 --harakiri=3200 --socket-timeout=3200 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/reader.py >>/tmp/trough-read.out 2>&1' > /etc/service/trough-read/run \
|
||||
&& chmod a+x /etc/service/trough-read/run
|
||||
|
||||
RUN mkdir -vp /etc/service/trough-write \
|
||||
&& echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 1\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6222 --master --processes=2 --harakiri=240 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/writer.py >>/tmp/trough-write.out 2>&1' > /etc/service/trough-write/run \
|
||||
&& echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 5\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6222 --master --processes=2 --harakiri=240 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/writer.py >>/tmp/trough-write.out 2>&1' > /etc/service/trough-write/run \
|
||||
&& chmod a+x /etc/service/trough-write/run
|
||||
|
||||
RUN mkdir -vp /etc/service/trough-write-provisioner-local \
|
||||
&& echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 1\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6112 --master --processes=2 --harakiri=20 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/write_provisioner_local.py >>/tmp/trough-write-provisioner-local.out 2>&1' > /etc/service/trough-write-provisioner-local/run \
|
||||
&& echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 5\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6112 --master --processes=2 --harakiri=20 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/write_provisioner_local.py >>/tmp/trough-write-provisioner-local.out 2>&1' > /etc/service/trough-write-provisioner-local/run \
|
||||
&& chmod a+x /etc/service/trough-write-provisioner-local/run
|
||||
|
||||
RUN mkdir -vp /etc/service/trough-write-provisioner-server \
|
||||
&& echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 1\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6111 --master --processes=2 --harakiri=20 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/write_provisioner_server.py >>/tmp/trough-write-provisioner-server.out 2>&1' > /etc/service/trough-write-provisioner-server/run \
|
||||
&& echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 5\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6111 --master --processes=2 --harakiri=20 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/write_provisioner_server.py >>/tmp/trough-write-provisioner-server.out 2>&1' > /etc/service/trough-write-provisioner-server/run \
|
||||
&& chmod a+x /etc/service/trough-write-provisioner-server/run
|
||||
|
||||
|
@ -31,7 +31,7 @@ script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
|
||||
docker build -t internetarchive/warcprox-tests $script_dir
|
||||
|
||||
for python in python2.7 python3
|
||||
for python in python3 python2.7
|
||||
do
|
||||
docker run --rm --volume="$script_dir/..:/warcprox" internetarchive/warcprox-tests /sbin/my_init -- \
|
||||
bash -x -c "cd /tmp && git clone /warcprox && cd /tmp/warcprox \
|
||||
|
@ -30,6 +30,7 @@ import sqlite3
|
||||
import requests
|
||||
import doublethink
|
||||
import rethinkdb as r
|
||||
import datetime
|
||||
|
||||
class DedupDb(object):
|
||||
logger = logging.getLogger("warcprox.dedup.DedupDb")
|
||||
@ -219,11 +220,33 @@ class TroughDedupDb(object):
|
||||
else:
|
||||
return None
|
||||
|
||||
def sql_value(self, x):
|
||||
if x is None:
|
||||
return 'null'
|
||||
elif isinstance(x, datetime.datetime):
|
||||
return 'datetime(%r)' % x.isoformat()
|
||||
elif isinstance(x, bool):
|
||||
return int(x)
|
||||
elif isinstance(x, str) or isinstance(x, bytes):
|
||||
# py3: repr(u'abc') => 'abc'
|
||||
# repr(b'abc') => b'abc'
|
||||
# py2: repr(u'abc') => u'abc'
|
||||
# repr(b'abc') => 'abc'
|
||||
# Repr gives us a prefix we don't want in different situations
|
||||
# depending on whether this is py2 or py3. Chop it off either way.
|
||||
r = repr(x)
|
||||
if r[:1] == "'":
|
||||
return r
|
||||
else:
|
||||
return r[1:]
|
||||
else:
|
||||
raise Exception("don't know how to make an sql value from %r" % x)
|
||||
|
||||
def save(self, digest_key, response_record, bucket='__unspecified__'):
|
||||
write_url = self._write_url(bucket)
|
||||
record_id = response_record.get_header(warctools.WarcRecord.ID).decode('ascii')
|
||||
url = response_record.get_header(warctools.WarcRecord.URL).decode('ascii')
|
||||
warc_date = response_record.get_header(warctools.WarcRecord.DATE).decode('ascii')
|
||||
record_id = response_record.get_header(warctools.WarcRecord.ID)
|
||||
url = response_record.get_header(warctools.WarcRecord.URL)
|
||||
warc_date = response_record.get_header(warctools.WarcRecord.DATE)
|
||||
|
||||
# XXX create table statement here is a temporary hack,
|
||||
# see https://webarchive.jira.com/browse/AITFIVE-1465
|
||||
@ -233,8 +256,9 @@ class TroughDedupDb(object):
|
||||
' date datetime not null,\n'
|
||||
' id varchar(100));\n' # warc record id
|
||||
'insert into dedup (digest_key, url, date, id) '
|
||||
'values (%r, %r, %r, %r);') % (
|
||||
digest_key.decode('ascii'), url, warc_date, record_id)
|
||||
'values (%s, %s, %s, %s);') % (
|
||||
self.sql_value(digest_key), self.sql_value(url),
|
||||
self.sql_value(warc_date), self.sql_value(record_id))
|
||||
response = requests.post(write_url, sql)
|
||||
if response.status_code != 200:
|
||||
logging.warn(
|
||||
@ -245,7 +269,8 @@ class TroughDedupDb(object):
|
||||
read_url = self._read_url(bucket)
|
||||
if not read_url:
|
||||
return None
|
||||
sql = 'select * from dedup where digest_key=%r;' % digest_key.decode('ascii')
|
||||
sql = 'select * from dedup where digest_key=%s;' % (
|
||||
self.sql_value(digest_key))
|
||||
response = requests.post(read_url, sql)
|
||||
if response.status_code != 200:
|
||||
logging.warn(
|
||||
|
@ -60,7 +60,7 @@ class BetterArgumentDefaultsHelpFormatter(
|
||||
if isinstance(action, argparse._StoreConstAction):
|
||||
return action.help
|
||||
else:
|
||||
return super()._get_help_string(action)
|
||||
return argparse.ArgumentDefaultsHelpFormatter._get_help_string(self, action)
|
||||
|
||||
def _build_arg_parser(prog=os.path.basename(sys.argv[0])):
|
||||
arg_parser = argparse.ArgumentParser(prog=prog,
|
||||
|
Loading…
x
Reference in New Issue
Block a user