From 828a2c3dcf12aae498c1210dba3296e55e87795d Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Fri, 13 Oct 2017 15:54:05 -0700 Subject: [PATCH] get all the tests to pass with ./tests/run-tests.sh --- .travis.yml | 1 + tests/Dockerfile | 10 +++++----- tests/run-tests.sh | 2 +- warcprox/dedup.py | 37 +++++++++++++++++++++++++++++++------ warcprox/main.py | 2 +- 5 files changed, 39 insertions(+), 13 deletions(-) diff --git a/.travis.yml b/.travis.yml index a1848b7..a9e844f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -36,6 +36,7 @@ script: - py.test -v tests - py.test -v --rethinkdb-dedup-url=rethinkdb://localhost/test1/dedup tests - py.test -v --rethinkdb-big-table-url=rethinkdb://localhost/test2/captures tests +- py.test -v --rethinkdb-trough-db-url=rethinkdb://localhost/trough_configuration tests notifications: slack: diff --git a/tests/Dockerfile b/tests/Dockerfile index 2bb46b0..5e380d8 100644 --- a/tests/Dockerfile +++ b/tests/Dockerfile @@ -86,22 +86,22 @@ RUN mkdir -vp /etc/service/trough-sync-local \ && chmod a+x /etc/service/trough-sync-local/run RUN mkdir -vp /etc/service/trough-sync-server \ - && echo '#!/bin/bash\nsource /opt/trough-ve3/bin/activate\nsleep 1\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec sync.py --server >>/tmp/trough-sync-server.out 2>&1' > /etc/service/trough-sync-server/run \ + && echo '#!/bin/bash\nsource /opt/trough-ve3/bin/activate\nsleep 5\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec sync.py --server >>/tmp/trough-sync-server.out 2>&1' > /etc/service/trough-sync-server/run \ && chmod a+x /etc/service/trough-sync-server/run RUN mkdir -vp /etc/service/trough-read \ - && echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 1\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6444 --master --processes=2 --harakiri=3200 --socket-timeout=3200 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/reader.py >>/tmp/trough-read.out 2>&1' > /etc/service/trough-read/run \ + && echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 5\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6444 --master --processes=2 --harakiri=3200 --socket-timeout=3200 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/reader.py >>/tmp/trough-read.out 2>&1' > /etc/service/trough-read/run \ && chmod a+x /etc/service/trough-read/run RUN mkdir -vp /etc/service/trough-write \ - && echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 1\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6222 --master --processes=2 --harakiri=240 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/writer.py >>/tmp/trough-write.out 2>&1' > /etc/service/trough-write/run \ + && echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 5\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6222 --master --processes=2 --harakiri=240 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/writer.py >>/tmp/trough-write.out 2>&1' > /etc/service/trough-write/run \ && chmod a+x /etc/service/trough-write/run RUN mkdir -vp /etc/service/trough-write-provisioner-local \ - && echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 1\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6112 --master --processes=2 --harakiri=20 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/write_provisioner_local.py >>/tmp/trough-write-provisioner-local.out 2>&1' > /etc/service/trough-write-provisioner-local/run \ + && echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 5\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6112 --master --processes=2 --harakiri=20 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/write_provisioner_local.py >>/tmp/trough-write-provisioner-local.out 2>&1' > /etc/service/trough-write-provisioner-local/run \ && chmod a+x /etc/service/trough-write-provisioner-local/run RUN mkdir -vp /etc/service/trough-write-provisioner-server \ - && echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 1\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6111 --master --processes=2 --harakiri=20 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/write_provisioner_server.py >>/tmp/trough-write-provisioner-server.out 2>&1' > /etc/service/trough-write-provisioner-server/run \ + && echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 5\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6111 --master --processes=2 --harakiri=20 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/write_provisioner_server.py >>/tmp/trough-write-provisioner-server.out 2>&1' > /etc/service/trough-write-provisioner-server/run \ && chmod a+x /etc/service/trough-write-provisioner-server/run diff --git a/tests/run-tests.sh b/tests/run-tests.sh index 68d77a4..f962ca8 100755 --- a/tests/run-tests.sh +++ b/tests/run-tests.sh @@ -31,7 +31,7 @@ script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" docker build -t internetarchive/warcprox-tests $script_dir -for python in python2.7 python3 +for python in python3 python2.7 do docker run --rm --volume="$script_dir/..:/warcprox" internetarchive/warcprox-tests /sbin/my_init -- \ bash -x -c "cd /tmp && git clone /warcprox && cd /tmp/warcprox \ diff --git a/warcprox/dedup.py b/warcprox/dedup.py index 0a45b7c..21c89f8 100644 --- a/warcprox/dedup.py +++ b/warcprox/dedup.py @@ -30,6 +30,7 @@ import sqlite3 import requests import doublethink import rethinkdb as r +import datetime class DedupDb(object): logger = logging.getLogger("warcprox.dedup.DedupDb") @@ -219,11 +220,33 @@ class TroughDedupDb(object): else: return None + def sql_value(self, x): + if x is None: + return 'null' + elif isinstance(x, datetime.datetime): + return 'datetime(%r)' % x.isoformat() + elif isinstance(x, bool): + return int(x) + elif isinstance(x, str) or isinstance(x, bytes): + # py3: repr(u'abc') => 'abc' + # repr(b'abc') => b'abc' + # py2: repr(u'abc') => u'abc' + # repr(b'abc') => 'abc' + # Repr gives us a prefix we don't want in different situations + # depending on whether this is py2 or py3. Chop it off either way. + r = repr(x) + if r[:1] == "'": + return r + else: + return r[1:] + else: + raise Exception("don't know how to make an sql value from %r" % x) + def save(self, digest_key, response_record, bucket='__unspecified__'): write_url = self._write_url(bucket) - record_id = response_record.get_header(warctools.WarcRecord.ID).decode('ascii') - url = response_record.get_header(warctools.WarcRecord.URL).decode('ascii') - warc_date = response_record.get_header(warctools.WarcRecord.DATE).decode('ascii') + record_id = response_record.get_header(warctools.WarcRecord.ID) + url = response_record.get_header(warctools.WarcRecord.URL) + warc_date = response_record.get_header(warctools.WarcRecord.DATE) # XXX create table statement here is a temporary hack, # see https://webarchive.jira.com/browse/AITFIVE-1465 @@ -233,8 +256,9 @@ class TroughDedupDb(object): ' date datetime not null,\n' ' id varchar(100));\n' # warc record id 'insert into dedup (digest_key, url, date, id) ' - 'values (%r, %r, %r, %r);') % ( - digest_key.decode('ascii'), url, warc_date, record_id) + 'values (%s, %s, %s, %s);') % ( + self.sql_value(digest_key), self.sql_value(url), + self.sql_value(warc_date), self.sql_value(record_id)) response = requests.post(write_url, sql) if response.status_code != 200: logging.warn( @@ -245,7 +269,8 @@ class TroughDedupDb(object): read_url = self._read_url(bucket) if not read_url: return None - sql = 'select * from dedup where digest_key=%r;' % digest_key.decode('ascii') + sql = 'select * from dedup where digest_key=%s;' % ( + self.sql_value(digest_key)) response = requests.post(read_url, sql) if response.status_code != 200: logging.warn( diff --git a/warcprox/main.py b/warcprox/main.py index b7f3ec6..c8c0ae8 100644 --- a/warcprox/main.py +++ b/warcprox/main.py @@ -60,7 +60,7 @@ class BetterArgumentDefaultsHelpFormatter( if isinstance(action, argparse._StoreConstAction): return action.help else: - return super()._get_help_string(action) + return argparse.ArgumentDefaultsHelpFormatter._get_help_string(self, action) def _build_arg_parser(prog=os.path.basename(sys.argv[0])): arg_parser = argparse.ArgumentParser(prog=prog,