From 369dc5c124fac8fba5fb532e51205a68010f4f71 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Wed, 11 Oct 2017 17:28:47 -0700 Subject: [PATCH] install and run trough in docker container for testing --- tests/Dockerfile | 59 ++++++++++++++++++++++++++++++++++++++++++---- tests/run-tests.sh | 4 +--- 2 files changed, 55 insertions(+), 8 deletions(-) diff --git a/tests/Dockerfile b/tests/Dockerfile index 04c6d72..2bb46b0 100644 --- a/tests/Dockerfile +++ b/tests/Dockerfile @@ -1,7 +1,7 @@ # # Dockerfile for warcprox tests # -# Copyright (C) 2015-2016 Internet Archive +# Copyright (C) 2015-2017 Internet Archive # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -23,19 +23,19 @@ FROM phusion/baseimage MAINTAINER Noah Levitt # see https://github.com/stuartpb/rethinkdb-dockerfiles/blob/master/trusty/2.1.3/Dockerfile +# and https://github.com/chali/hadoop-cdh-pseudo-docker/blob/master/Dockerfile ENV LANG=C.UTF-8 RUN apt-get update && apt-get --auto-remove -y dist-upgrade # Add the RethinkDB repository and public key -# "RethinkDB Packaging " http://download.rethinkdb.com/apt/pubkey.gpg -RUN apt-key adv --keyserver pgp.mit.edu --recv-keys 1614552E5765227AEC39EFCFA7E00EF33A8F2399 \ +RUN curl -s https://download.rethinkdb.com/apt/pubkey.gpg | apt-key add - \ && echo "deb http://download.rethinkdb.com/apt xenial main" > /etc/apt/sources.list.d/rethinkdb.list \ && apt-get update && apt-get -y install rethinkdb RUN mkdir -vp /etc/service/rethinkdb \ - && echo "#!/bin/sh\nrethinkdb --bind 0.0.0.0 --directory /tmp/rethink-data --runuser rethinkdb --rungroup rethinkdb\n" > /etc/service/rethinkdb/run \ + && echo "#!/bin/bash\nexec rethinkdb --bind 0.0.0.0 --directory /tmp/rethink-data --runuser rethinkdb --rungroup rethinkdb\n" > /etc/service/rethinkdb/run \ && chmod a+x /etc/service/rethinkdb/run RUN apt-get -y install git @@ -53,6 +53,55 @@ RUN pip install virtualenv RUN apt-get -y install tor RUN mkdir -vp /etc/service/tor \ - && echo "#!/bin/sh\ntor\n" > /etc/service/tor/run \ + && echo "#!/bin/sh\nexec tor\n" > /etc/service/tor/run \ && chmod a+x /etc/service/tor/run +# hadoop hdfs for trough +RUN curl -s https://archive.cloudera.com/cdh5/ubuntu/xenial/amd64/cdh/archive.key | apt-key add - \ + && . /etc/lsb-release \ + && echo "deb [arch=amd64] http://archive.cloudera.com/cdh5/ubuntu/$DISTRIB_CODENAME/amd64/cdh $DISTRIB_CODENAME-cdh5 contrib" >> /etc/apt/sources.list.d/cloudera.list + +RUN apt-get update +RUN apt-get install -y openjdk-8-jdk hadoop-conf-pseudo + +RUN su hdfs -c 'hdfs namenode -format' + +RUN mv -v /etc/hadoop/conf/core-site.xml /etc/hadoop/conf/core-site.xml.orig \ + && cat /etc/hadoop/conf/core-site.xml.orig | sed 's,localhost:8020,0.0.0.0:8020,' > /etc/hadoop/conf/core-site.xml + +RUN mv -v /etc/hadoop/conf/hdfs-site.xml /etc/hadoop/conf/hdfs-site.xml.orig \ + && cat /etc/hadoop/conf/hdfs-site.xml.orig | sed 's,^$, \n dfs.permissions.enabled\n false\n \n,' > /etc/hadoop/conf/hdfs-site.xml + +RUN echo '#!/bin/bash\nservice hadoop-hdfs-namenode start\nservice hadoop-hdfs-datanode start' > /etc/my_init.d/50_start_hdfs.sh \ + && chmod a+x /etc/my_init.d/50_start_hdfs.sh + +# trough itself +RUN virtualenv -p python3 /opt/trough-ve3 \ + && . /opt/trough-ve3/bin/activate \ + && pip install git+https://github.com/jkafader/snakebite@feature/python3-version-string \ + && pip install git+https://github.com/nlevitt/trough.git@toward-warcprox-dedup + +RUN mkdir -vp /etc/service/trough-sync-local \ + && echo "#!/bin/bash\nsource /opt/trough-ve3/bin/activate\nexec sync.py >>/tmp/trough-sync-local.out 2>&1" > /etc/service/trough-sync-local/run \ + && chmod a+x /etc/service/trough-sync-local/run + +RUN mkdir -vp /etc/service/trough-sync-server \ + && echo '#!/bin/bash\nsource /opt/trough-ve3/bin/activate\nsleep 1\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec sync.py --server >>/tmp/trough-sync-server.out 2>&1' > /etc/service/trough-sync-server/run \ + && chmod a+x /etc/service/trough-sync-server/run + +RUN mkdir -vp /etc/service/trough-read \ + && echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 1\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6444 --master --processes=2 --harakiri=3200 --socket-timeout=3200 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/reader.py >>/tmp/trough-read.out 2>&1' > /etc/service/trough-read/run \ + && chmod a+x /etc/service/trough-read/run + +RUN mkdir -vp /etc/service/trough-write \ + && echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 1\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6222 --master --processes=2 --harakiri=240 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/writer.py >>/tmp/trough-write.out 2>&1' > /etc/service/trough-write/run \ + && chmod a+x /etc/service/trough-write/run + +RUN mkdir -vp /etc/service/trough-write-provisioner-local \ + && echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 1\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6112 --master --processes=2 --harakiri=20 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/write_provisioner_local.py >>/tmp/trough-write-provisioner-local.out 2>&1' > /etc/service/trough-write-provisioner-local/run \ + && chmod a+x /etc/service/trough-write-provisioner-local/run + +RUN mkdir -vp /etc/service/trough-write-provisioner-server \ + && echo '#!/bin/bash\nvenv=/opt/trough-ve3\nsource $venv/bin/activate\nsleep 1\npython -c $"import doublethink ; from trough.settings import settings ; rr = doublethink.Rethinker(settings[\"RETHINKDB_HOSTS\"]) ; rr.db(\"trough_configuration\").wait().run()"\nexec uwsgi --venv=$venv --http :6111 --master --processes=2 --harakiri=20 --max-requests=50000 --vacuum --die-on-term --wsgi-file $venv/bin/write_provisioner_server.py >>/tmp/trough-write-provisioner-server.out 2>&1' > /etc/service/trough-write-provisioner-server/run \ + && chmod a+x /etc/service/trough-write-provisioner-server/run + diff --git a/tests/run-tests.sh b/tests/run-tests.sh index 747f042..68d77a4 100755 --- a/tests/run-tests.sh +++ b/tests/run-tests.sh @@ -5,8 +5,6 @@ # features enabled, against that instance of rethinkdb, and also run without # rethinkdb features enabled. With python 2.7 and 3.4. # -# tests/conftest.py - command line options for warcprox tests -# # Copyright (C) 2015-2017 Internet Archive # # This program is free software; you can redistribute it and/or @@ -44,7 +42,7 @@ do && py.test -v tests \ && py.test -v --rethinkdb-dedup-url=rethinkdb://localhost/test1/dedup tests \ && py.test -v --rethinkdb-big-table-url=rethinkdb://localhost/test2/captures tests \ - && py.test -v --rethinkdb-trough-db-url=rethinkdb://localhost/test3 tests \ + && py.test -v --rethinkdb-trough-db-url=rethinkdb://localhost/trough_configuration tests \ " done