mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
get all the tests to pass with ./tests/run-tests.sh
This commit is contained in:
parent
369dc5c124
commit
828a2c3dcf
@ -36,6 +36,7 @@ script:
|
|||||||
- py.test -v tests
|
- py.test -v tests
|
||||||
- py.test -v --rethinkdb-dedup-url=rethinkdb://localhost/test1/dedup tests
|
- py.test -v --rethinkdb-dedup-url=rethinkdb://localhost/test1/dedup tests
|
||||||
- py.test -v --rethinkdb-big-table-url=rethinkdb://localhost/test2/captures tests
|
- py.test -v --rethinkdb-big-table-url=rethinkdb://localhost/test2/captures tests
|
||||||
|
- py.test -v --rethinkdb-trough-db-url=rethinkdb://localhost/trough_configuration tests
|
||||||
|
|
||||||
notifications:
|
notifications:
|
||||||
slack:
|
slack:
|
||||||
|
@ -86,22 +86,22 @@ RUN mkdir -vp /etc/service/trough-sync-local \
|
|||||||
&& chmod a+x /etc/service/trough-sync-local/run
|
&& chmod a+x /etc/service/trough-sync-local/run
|
||||||
|
|
||||||
RUN mkdir -vp /etc/service/trough-sync-server \
|
RUN mkdir -vp /etc/service/trough-sync-server \
|
||||||
&& echo '#!/bin/bash\nsource /opt/trough-ve3/bin/activate\nsleep 1\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec sync.py --server >>/tmp/trough-sync-server.out 2>&1' > /etc/service/trough-sync-server/run \
|
&& echo '#!/bin/bash\nsource /opt/trough-ve3/bin/activate\nsleep 5\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec sync.py --server >>/tmp/trough-sync-server.out 2>&1' > /etc/service/trough-sync-server/run \
|
||||||
&& chmod a+x /etc/service/trough-sync-server/run
|
&& chmod a+x /etc/service/trough-sync-server/run
|
||||||
|
|
||||||
RUN mkdir -vp /etc/service/trough-read \
|
RUN mkdir -vp /etc/service/trough-read \
|
||||||
&& echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 1\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6444 --master --processes=2 --harakiri=3200 --socket-timeout=3200 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/reader.py >>/tmp/trough-read.out 2>&1' > /etc/service/trough-read/run \
|
&& echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 5\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6444 --master --processes=2 --harakiri=3200 --socket-timeout=3200 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/reader.py >>/tmp/trough-read.out 2>&1' > /etc/service/trough-read/run \
|
||||||
&& chmod a+x /etc/service/trough-read/run
|
&& chmod a+x /etc/service/trough-read/run
|
||||||
|
|
||||||
RUN mkdir -vp /etc/service/trough-write \
|
RUN mkdir -vp /etc/service/trough-write \
|
||||||
&& echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 1\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6222 --master --processes=2 --harakiri=240 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/writer.py >>/tmp/trough-write.out 2>&1' > /etc/service/trough-write/run \
|
&& echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 5\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6222 --master --processes=2 --harakiri=240 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/writer.py >>/tmp/trough-write.out 2>&1' > /etc/service/trough-write/run \
|
||||||
&& chmod a+x /etc/service/trough-write/run
|
&& chmod a+x /etc/service/trough-write/run
|
||||||
|
|
||||||
RUN mkdir -vp /etc/service/trough-write-provisioner-local \
|
RUN mkdir -vp /etc/service/trough-write-provisioner-local \
|
||||||
&& echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 1\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6112 --master --processes=2 --harakiri=20 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/write_provisioner_local.py >>/tmp/trough-write-provisioner-local.out 2>&1' > /etc/service/trough-write-provisioner-local/run \
|
&& echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 5\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6112 --master --processes=2 --harakiri=20 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/write_provisioner_local.py >>/tmp/trough-write-provisioner-local.out 2>&1' > /etc/service/trough-write-provisioner-local/run \
|
||||||
&& chmod a+x /etc/service/trough-write-provisioner-local/run
|
&& chmod a+x /etc/service/trough-write-provisioner-local/run
|
||||||
|
|
||||||
RUN mkdir -vp /etc/service/trough-write-provisioner-server \
|
RUN mkdir -vp /etc/service/trough-write-provisioner-server \
|
||||||
&& echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 1\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6111 --master --processes=2 --harakiri=20 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/write_provisioner_server.py >>/tmp/trough-write-provisioner-server.out 2>&1' > /etc/service/trough-write-provisioner-server/run \
|
&& echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 5\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6111 --master --processes=2 --harakiri=20 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/write_provisioner_server.py >>/tmp/trough-write-provisioner-server.out 2>&1' > /etc/service/trough-write-provisioner-server/run \
|
||||||
&& chmod a+x /etc/service/trough-write-provisioner-server/run
|
&& chmod a+x /etc/service/trough-write-provisioner-server/run
|
||||||
|
|
||||||
|
@ -31,7 +31,7 @@ script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|||||||
|
|
||||||
docker build -t internetarchive/warcprox-tests $script_dir
|
docker build -t internetarchive/warcprox-tests $script_dir
|
||||||
|
|
||||||
for python in python2.7 python3
|
for python in python3 python2.7
|
||||||
do
|
do
|
||||||
docker run --rm --volume="$script_dir/..:/warcprox" internetarchive/warcprox-tests /sbin/my_init -- \
|
docker run --rm --volume="$script_dir/..:/warcprox" internetarchive/warcprox-tests /sbin/my_init -- \
|
||||||
bash -x -c "cd /tmp && git clone /warcprox && cd /tmp/warcprox \
|
bash -x -c "cd /tmp && git clone /warcprox && cd /tmp/warcprox \
|
||||||
|
@ -30,6 +30,7 @@ import sqlite3
|
|||||||
import requests
|
import requests
|
||||||
import doublethink
|
import doublethink
|
||||||
import rethinkdb as r
|
import rethinkdb as r
|
||||||
|
import datetime
|
||||||
|
|
||||||
class DedupDb(object):
|
class DedupDb(object):
|
||||||
logger = logging.getLogger("warcprox.dedup.DedupDb")
|
logger = logging.getLogger("warcprox.dedup.DedupDb")
|
||||||
@ -219,11 +220,33 @@ class TroughDedupDb(object):
|
|||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def sql_value(self, x):
|
||||||
|
if x is None:
|
||||||
|
return 'null'
|
||||||
|
elif isinstance(x, datetime.datetime):
|
||||||
|
return 'datetime(%r)' % x.isoformat()
|
||||||
|
elif isinstance(x, bool):
|
||||||
|
return int(x)
|
||||||
|
elif isinstance(x, str) or isinstance(x, bytes):
|
||||||
|
# py3: repr(u'abc') => 'abc'
|
||||||
|
# repr(b'abc') => b'abc'
|
||||||
|
# py2: repr(u'abc') => u'abc'
|
||||||
|
# repr(b'abc') => 'abc'
|
||||||
|
# Repr gives us a prefix we don't want in different situations
|
||||||
|
# depending on whether this is py2 or py3. Chop it off either way.
|
||||||
|
r = repr(x)
|
||||||
|
if r[:1] == "'":
|
||||||
|
return r
|
||||||
|
else:
|
||||||
|
return r[1:]
|
||||||
|
else:
|
||||||
|
raise Exception("don't know how to make an sql value from %r" % x)
|
||||||
|
|
||||||
def save(self, digest_key, response_record, bucket='__unspecified__'):
|
def save(self, digest_key, response_record, bucket='__unspecified__'):
|
||||||
write_url = self._write_url(bucket)
|
write_url = self._write_url(bucket)
|
||||||
record_id = response_record.get_header(warctools.WarcRecord.ID).decode('ascii')
|
record_id = response_record.get_header(warctools.WarcRecord.ID)
|
||||||
url = response_record.get_header(warctools.WarcRecord.URL).decode('ascii')
|
url = response_record.get_header(warctools.WarcRecord.URL)
|
||||||
warc_date = response_record.get_header(warctools.WarcRecord.DATE).decode('ascii')
|
warc_date = response_record.get_header(warctools.WarcRecord.DATE)
|
||||||
|
|
||||||
# XXX create table statement here is a temporary hack,
|
# XXX create table statement here is a temporary hack,
|
||||||
# see https://webarchive.jira.com/browse/AITFIVE-1465
|
# see https://webarchive.jira.com/browse/AITFIVE-1465
|
||||||
@ -233,8 +256,9 @@ class TroughDedupDb(object):
|
|||||||
' date datetime not null,\n'
|
' date datetime not null,\n'
|
||||||
' id varchar(100));\n' # warc record id
|
' id varchar(100));\n' # warc record id
|
||||||
'insert into dedup (digest_key, url, date, id) '
|
'insert into dedup (digest_key, url, date, id) '
|
||||||
'values (%r, %r, %r, %r);') % (
|
'values (%s, %s, %s, %s);') % (
|
||||||
digest_key.decode('ascii'), url, warc_date, record_id)
|
self.sql_value(digest_key), self.sql_value(url),
|
||||||
|
self.sql_value(warc_date), self.sql_value(record_id))
|
||||||
response = requests.post(write_url, sql)
|
response = requests.post(write_url, sql)
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
logging.warn(
|
logging.warn(
|
||||||
@ -245,7 +269,8 @@ class TroughDedupDb(object):
|
|||||||
read_url = self._read_url(bucket)
|
read_url = self._read_url(bucket)
|
||||||
if not read_url:
|
if not read_url:
|
||||||
return None
|
return None
|
||||||
sql = 'select * from dedup where digest_key=%r;' % digest_key.decode('ascii')
|
sql = 'select * from dedup where digest_key=%s;' % (
|
||||||
|
self.sql_value(digest_key))
|
||||||
response = requests.post(read_url, sql)
|
response = requests.post(read_url, sql)
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
logging.warn(
|
logging.warn(
|
||||||
|
@ -60,7 +60,7 @@ class BetterArgumentDefaultsHelpFormatter(
|
|||||||
if isinstance(action, argparse._StoreConstAction):
|
if isinstance(action, argparse._StoreConstAction):
|
||||||
return action.help
|
return action.help
|
||||||
else:
|
else:
|
||||||
return super()._get_help_string(action)
|
return argparse.ArgumentDefaultsHelpFormatter._get_help_string(self, action)
|
||||||
|
|
||||||
def _build_arg_parser(prog=os.path.basename(sys.argv[0])):
|
def _build_arg_parser(prog=os.path.basename(sys.argv[0])):
|
||||||
arg_parser = argparse.ArgumentParser(prog=prog,
|
arg_parser = argparse.ArgumentParser(prog=prog,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user