install and run trough in docker container for testing

This commit is contained in:
Noah Levitt 2017-10-11 17:28:47 -07:00
parent d177b3b80d
commit 369dc5c124
2 changed files with 55 additions and 8 deletions

View File

@ -1,7 +1,7 @@
#
# Dockerfile for warcprox tests
#
# Copyright (C) 2015-2016 Internet Archive
# Copyright (C) 2015-2017 Internet Archive
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
@ -23,19 +23,19 @@ FROM phusion/baseimage
MAINTAINER Noah Levitt <nlevitt@archive.org>
# see https://github.com/stuartpb/rethinkdb-dockerfiles/blob/master/trusty/2.1.3/Dockerfile
# and https://github.com/chali/hadoop-cdh-pseudo-docker/blob/master/Dockerfile
ENV LANG=C.UTF-8
RUN apt-get update && apt-get --auto-remove -y dist-upgrade
# Add the RethinkDB repository and public key
# "RethinkDB Packaging <packaging@rethinkdb.com>" http://download.rethinkdb.com/apt/pubkey.gpg
RUN apt-key adv --keyserver pgp.mit.edu --recv-keys 1614552E5765227AEC39EFCFA7E00EF33A8F2399 \
RUN curl -s https://download.rethinkdb.com/apt/pubkey.gpg | apt-key add - \
&& echo "deb http://download.rethinkdb.com/apt xenial main" > /etc/apt/sources.list.d/rethinkdb.list \
&& apt-get update && apt-get -y install rethinkdb
RUN mkdir -vp /etc/service/rethinkdb \
&& echo "#!/bin/sh\nrethinkdb --bind 0.0.0.0 --directory /tmp/rethink-data --runuser rethinkdb --rungroup rethinkdb\n" > /etc/service/rethinkdb/run \
&& echo "#!/bin/bash\nexec rethinkdb --bind 0.0.0.0 --directory /tmp/rethink-data --runuser rethinkdb --rungroup rethinkdb\n" > /etc/service/rethinkdb/run \
&& chmod a+x /etc/service/rethinkdb/run
RUN apt-get -y install git
@ -53,6 +53,55 @@ RUN pip install virtualenv
RUN apt-get -y install tor
RUN mkdir -vp /etc/service/tor \
&& echo "#!/bin/sh\ntor\n" > /etc/service/tor/run \
&& echo "#!/bin/sh\nexec tor\n" > /etc/service/tor/run \
&& chmod a+x /etc/service/tor/run
# hadoop hdfs for trough
RUN curl -s https://archive.cloudera.com/cdh5/ubuntu/xenial/amd64/cdh/archive.key | apt-key add - \
&& . /etc/lsb-release \
&& echo "deb [arch=amd64] http://archive.cloudera.com/cdh5/ubuntu/$DISTRIB_CODENAME/amd64/cdh $DISTRIB_CODENAME-cdh5 contrib" >> /etc/apt/sources.list.d/cloudera.list
RUN apt-get update
RUN apt-get install -y openjdk-8-jdk hadoop-conf-pseudo
RUN su hdfs -c 'hdfs namenode -format'
RUN mv -v /etc/hadoop/conf/core-site.xml /etc/hadoop/conf/core-site.xml.orig \
&& cat /etc/hadoop/conf/core-site.xml.orig | sed 's,localhost:8020,0.0.0.0:8020,' > /etc/hadoop/conf/core-site.xml
RUN mv -v /etc/hadoop/conf/hdfs-site.xml /etc/hadoop/conf/hdfs-site.xml.orig \
&& cat /etc/hadoop/conf/hdfs-site.xml.orig | sed 's,^</configuration>$, <property>\n <name>dfs.permissions.enabled</name>\n <value>false</value>\n </property>\n</configuration>,' > /etc/hadoop/conf/hdfs-site.xml
RUN echo '#!/bin/bash\nservice hadoop-hdfs-namenode start\nservice hadoop-hdfs-datanode start' > /etc/my_init.d/50_start_hdfs.sh \
&& chmod a+x /etc/my_init.d/50_start_hdfs.sh
# trough itself
RUN virtualenv -p python3 /opt/trough-ve3 \
&& . /opt/trough-ve3/bin/activate \
&& pip install git+https://github.com/jkafader/snakebite@feature/python3-version-string \
&& pip install git+https://github.com/nlevitt/trough.git@toward-warcprox-dedup
RUN mkdir -vp /etc/service/trough-sync-local \
&& echo "#!/bin/bash\nsource /opt/trough-ve3/bin/activate\nexec sync.py >>/tmp/trough-sync-local.out 2>&1" > /etc/service/trough-sync-local/run \
&& chmod a+x /etc/service/trough-sync-local/run
RUN mkdir -vp /etc/service/trough-sync-server \
&& echo '#!/bin/bash\nsource /opt/trough-ve3/bin/activate\nsleep 1\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec sync.py --server >>/tmp/trough-sync-server.out 2>&1' > /etc/service/trough-sync-server/run \
&& chmod a+x /etc/service/trough-sync-server/run
RUN mkdir -vp /etc/service/trough-read \
&& echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 1\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6444 --master --processes=2 --harakiri=3200 --socket-timeout=3200 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/reader.py >>/tmp/trough-read.out 2>&1' > /etc/service/trough-read/run \
&& chmod a+x /etc/service/trough-read/run
RUN mkdir -vp /etc/service/trough-write \
&& echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 1\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6222 --master --processes=2 --harakiri=240 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/writer.py >>/tmp/trough-write.out 2>&1' > /etc/service/trough-write/run \
&& chmod a+x /etc/service/trough-write/run
RUN mkdir -vp /etc/service/trough-write-provisioner-local \
&& echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 1\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6112 --master --processes=2 --harakiri=20 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/write_provisioner_local.py >>/tmp/trough-write-provisioner-local.out 2>&1' > /etc/service/trough-write-provisioner-local/run \
&& chmod a+x /etc/service/trough-write-provisioner-local/run
RUN mkdir -vp /etc/service/trough-write-provisioner-server \
&& echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 1\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6111 --master --processes=2 --harakiri=20 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/write_provisioner_server.py >>/tmp/trough-write-provisioner-server.out 2>&1' > /etc/service/trough-write-provisioner-server/run \
&& chmod a+x /etc/service/trough-write-provisioner-server/run

View File

@ -5,8 +5,6 @@
# features enabled, against that instance of rethinkdb, and also run without
# rethinkdb features enabled. With python 2.7 and 3.4.
#
# tests/conftest.py - command line options for warcprox tests
#
# Copyright (C) 2015-2017 Internet Archive
#
# This program is free software; you can redistribute it and/or
@ -44,7 +42,7 @@ do
&& py.test -v tests \
&& py.test -v --rethinkdb-dedup-url=rethinkdb://localhost/test1/dedup tests \
&& py.test -v --rethinkdb-big-table-url=rethinkdb://localhost/test2/captures tests \
&& py.test -v --rethinkdb-trough-db-url=rethinkdb://localhost/test3 tests \
&& py.test -v --rethinkdb-trough-db-url=rethinkdb://localhost/trough_configuration tests \
"
done