From bd21fec6d420a1d91d81df79780f687c774ab00d Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Thu, 3 Apr 2014 08:56:18 -0700 Subject: [PATCH] update run-uwsgi.sh and add run-gunicorn.sh update README and INSTALL, fix typo only list wb handlers on home page by default pep8 fixes --- INSTALL.rst | 17 ++++++++------- README.rst | 4 ++-- pywb/cdx/cdxobject.py | 2 +- pywb/core/handlers.py | 12 ++++------- pywb/core/views.py | 10 +++++++++ pywb/ui/index.html | 4 +++- pywb/warc/archiveindexer.py | 2 ++ run-gunicorn.sh | 10 +++++++++ run-uwsgi.sh | 41 ++++++++++--------------------------- uwsgi.ini | 13 ++++++++++++ 10 files changed, 64 insertions(+), 51 deletions(-) create mode 100755 run-gunicorn.sh create mode 100644 uwsgi.ini diff --git a/INSTALL.rst b/INSTALL.rst index db36704b..0cc1430d 100644 --- a/INSTALL.rst +++ b/INSTALL.rst @@ -55,7 +55,7 @@ To start a pywb with sample data: 3. Run ``wayback`` (shorthand for ``python -m pywb.apps.wayback``) to start the pywb wayback server with reference WSGI implementation. -OR run ``run-uwsgi.sh`` to start with uWSGI (see below for more info). +OR run ``run-uwsgi.sh`` or ``run-gunicorn.sh`` to start with uWSGI or gunicorn (see below for more info). 4. Test pywb in your browser! (pywb is set to run on port 8080 by default). @@ -71,18 +71,17 @@ If everything worked, the following pages should be loading (served from | ``http://iana.org`` | http://localhost:8080/pywb/iana.org | http://localhost:8080/pywb/\*/iana.org | +------------------------+----------------------------------------+--------------------------------------------+ -uWSGI startup script -^^^^^^^^^^^^^^^^^^^^ +uWSGI and gunicorn startup scripts +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -A sample uWSGI start up script, ``run-uwsgi.sh`` which assumes a default -uWSGI installation is provided as well. - -Currently, uWSGI is not installed automatically with this distribution, -but it is recommended for production environments. +pywb includes sample uWSGI and gunicorn scripts ``run-uwsgi.sh`` and +``run-gunicorn.sh`` which pip install uwsgi and gunicorn and attempt to launch +the wsgi app with those containers. Please see `uWSGI Installation `_ -for more details on installing uWSGI. +and `Gunicorn QuickStart `_ +for more details on installing these containers. Vagrant ~~~~~~~ diff --git a/README.rst b/README.rst index 03309398..38838898 100644 --- a/README.rst +++ b/README.rst @@ -50,7 +50,7 @@ Given an archive of warcs at ``myarchive/warcs`` 2. Run ``cdx-indexer --sort myarchive/cdx myarchive/warcs`` to generate .cdx files for each warc/arc file in ``myarchive/warcs`` -3. Edit ```` to contain the following. You may replace ``pywb`` with +3. Edit **config.yaml** to contain the following. You may replace ``pywb`` with a name of your choice -- it will be the path to your collection. (Multiple collections can be added for different sets of .cdx files as well) @@ -67,7 +67,7 @@ Given an archive of warcs at ``myarchive/warcs`` If your archives contain ``http://my-archive-page.example.com``, all captures should be accessible by browsing to http://localhost:8080/pywb/\*/my-archived-page.example.com - (You can also ./run-uwsgi.sh for running with those WSGI containers) + (You can also use ``run-uwsgi.sh`` or ``run-gunicorn.sh`` to launch using those WSGI containers) See `INSTALL.rst `_ for additional installation info. diff --git a/pywb/cdx/cdxobject.py b/pywb/cdx/cdxobject.py index e90d6567..b64b0861 100644 --- a/pywb/cdx/cdxobject.py +++ b/pywb/cdx/cdxobject.py @@ -1,4 +1,4 @@ -try: # pragma: no cover +try: # pragma: no cover from collections import OrderedDict except ImportError: # pragma: no cover from ordereddict import OrderedDict diff --git a/pywb/core/handlers.py b/pywb/core/handlers.py index 5e85b1dc..d124ac67 100644 --- a/pywb/core/handlers.py +++ b/pywb/core/handlers.py @@ -47,28 +47,24 @@ class WBHandler(WbUrlHandler): return WbResponse.text_response('No Lookup Url Specified') def __str__(self): - return 'WBHandler: ' + str(self.index_reader) + ', ' + str(self.replay) + return 'Web Archive Replay Handler' #================================================================= # Static Content Handler #================================================================= class StaticHandler(BaseHandler): - def __init__(self, static_path, pkg='pywb'): + def __init__(self, static_path): mimetypes.init() self.static_path = static_path - self.pkg = pkg + self.block_loader = BlockLoader() def __call__(self, wbrequest): full_path = self.static_path + wbrequest.wb_url_str try: - #if full_path.startswith('.') or full_path.startswith('file://'): - # data = open(full_path, 'rb') - #else: - # data = pkgutil.get_data(self.pkg, full_path) - data = BlockLoader().load(full_path) + data = self.block_loader.load(full_path) if 'wsgi.file_wrapper' in wbrequest.env: reader = wbrequest.env['wsgi.file_wrapper'](data) diff --git a/pywb/core/views.py b/pywb/core/views.py index 26634e90..8ada84af 100644 --- a/pywb/core/views.py +++ b/pywb/core/views.py @@ -2,6 +2,8 @@ from pywb.utils.timeutils import timestamp_to_datetime from pywb.framework.wbrequestresponse import WbResponse from pywb.framework.memento import make_timemap, LINK_FORMAT +from pywb.core.handlers import WBHandler + import urlparse import logging @@ -54,6 +56,14 @@ def request_hostname(env): return env.get('HTTP_HOST', 'localhost') +@template_filter() +def is_wb_handler(obj): + if not hasattr(obj, 'handler'): + return False + + return isinstance(obj.handler, WBHandler) + + #================================================================= class J2TemplateView: def __init__(self, filename): diff --git a/pywb/ui/index.html b/pywb/ui/index.html index 22fd5637..3bbabbe2 100644 --- a/pywb/ui/index.html +++ b/pywb/ui/index.html @@ -3,7 +3,9 @@ The following archive collections are available:
    -{% for route in routes %} +{% for route in routes %} + {% if route | is_wb_handler %}
  • {{ '/' + route.path }}: {{ route | string }}
  • + {% endif %} {% endfor %}
diff --git a/pywb/warc/archiveindexer.py b/pywb/warc/archiveindexer.py index e6e14c1f..6ee3a10c 100644 --- a/pywb/warc/archiveindexer.py +++ b/pywb/warc/archiveindexer.py @@ -344,9 +344,11 @@ def remove_ext(filename): return filename + def cdx_filename(filename): return remove_ext(filename) + '.cdx' + def index_to_dir(inputs, output, sort): for fullpath, filename in iter_file_or_dir(inputs): diff --git a/run-gunicorn.sh b/run-gunicorn.sh new file mode 100755 index 00000000..1ab57bf3 --- /dev/null +++ b/run-gunicorn.sh @@ -0,0 +1,10 @@ +#!/bin/sh +pip install gunicorn + +if [ $? -ne 0 ]; then + "uwsgi install failed" + exit 1 +fi + +export PYWB_CONFIG_FILE=config.yaml +gunicorn -w 4 pywb.apps.wayback -b 0.0.0.0:8080 diff --git a/run-uwsgi.sh b/run-uwsgi.sh index d2dd926f..79da11df 100755 --- a/run-uwsgi.sh +++ b/run-uwsgi.sh @@ -1,39 +1,20 @@ #!/bin/sh +# requires uwsgi +pip install uwsgi + +if [ $? -ne 0 ]; then + "uwsgi install failed" + exit 1 +fi + + mypath=$(cd `dirname $0` && pwd) -# Set a different config file -#export 'PYWB_CONFIG_FILE=myconfig.yaml' +params="$mypath/uwsgi.ini" -app="pywb.apps.wayback" - -params="--http-socket :8080 -b 65536" -#params="--static-map /static=$mypath/static --http-socket :8080 -b 65536" - -if [ -z "$1" ]; then - # Standard root config - params="$params --wsgi $app" -else - # run with --mount to specify a non-root context - # requires a file not a package, so creating a mount_run.py to load the package - echo "#!/bin/python\n" > $mypath/mount_run.py - echo "import $app\napplication = $app.application" >> $mypath/mount_run.py - params="$params --mount $1=mount_run.py --no-default-app --manage-script-name" -fi - -# Support for virtualenv if [ -n "$VIRTUAL_ENV" ] ; then params="$params -H $VIRTUAL_ENV" fi -# Support for default, non-virtualenv path on OS X -osx_uwsgi_path="/System/Library/Frameworks/Python.framework/Versions/2.7/bin/uwsgi" - -if [ -e "$osx_uwsgi_path" ]; then - uwsgi=$osx_uwsgi_path -else - uwsgi="uwsgi" -fi - -$uwsgi $params - +uwsgi $params diff --git a/uwsgi.ini b/uwsgi.ini new file mode 100644 index 00000000..38f79b5f --- /dev/null +++ b/uwsgi.ini @@ -0,0 +1,13 @@ +[uwsgi] +if-not-env = PORT +http-socket = :8080 +endif = + +master = true +processes = 10 +buffer-size = 65536 +die-on-term = true + +# specify config file here +env = PYWB_CONFIG_FILE=config.yaml +wsgi = pywb.apps.wayback