Merge pull request #28 from ikreymer/pkg-reorg

pywb pkg refactoring: create pywb.framework, pywb.core and pywb.apps
2025-03-15 00:03:28 +01:00 · 2014-03-03 12:04:12 -08:00 · 2014-03-03 12:04:12 -08:00 · 5a28bc6992
commit 5a28bc6992
parent 06a22c845b 2d4ae62fbe
45 changed files with 759 additions and 660 deletions
--- a/pywb/apps/init.py
+++ b/pywb/apps/init.py
--- a/pywb/apps/cdx_server.py
+++ b/pywb/apps/cdx_server.py
@ -0,0 +1,17 @@
+from pywb.framework.wsgi_wrappers import init_app, start_wsgi_server
+
+from pywb.core.cdx_handler import create_cdx_server_app
+
+#=================================================================
+# init cdx server app
+#=================================================================
+
+# cdx-server only config
+DEFAULT_CONFIG = 'pywb/cdx/config.yaml'
+
+application = init_app(create_cdx_server_app,
+                       load_yaml=True,
+                       config_file=DEFAULT_CONFIG)
+
+if __name__ == "__main__":
+    start_wsgi_server(application)
--- a/pywb/apps/wayback.py
+++ b/pywb/apps/wayback.py
@ -0,0 +1,10 @@
+from pywb.framework.wsgi_wrappers import init_app, start_wsgi_server
+from pywb.core.pywb_init import create_wb_router
+
+#=================================================================
+# init pywb app
+#=================================================================
+application = init_app(create_wb_router, load_yaml=True)
+
+if __name__ == "__main__":
+    start_wsgi_server(application)
--- a/pywb/cdx/cdxdomainspecific.py
+++ b/pywb/cdx/cdxdomainspecific.py
@ -9,6 +9,7 @@ from pywb.utils.canonicalize import unsurt, UrlCanonicalizer

 from query import CDXQuery

+
 #=================================================================
 def load_domain_specific_cdx_rules(ds_rules_file, surt_ordered):
    """
--- a/pywb/cdx/cdxobject.py
+++ b/pywb/cdx/cdxobject.py
@ -4,9 +4,11 @@ import itertools
 from urllib import urlencode
 from urlparse import parse_qs

+from pywb.utils.wbexception import WbException
+

 #=================================================================
-class CDXException(Exception):
+class CDXException(WbException):
    def status(self):
        return '400 Bad Request'

@ -61,7 +63,7 @@ class CDXObject(OrderedDict):
                cdxformat = i

        if not cdxformat:
-            raise Exception('unknown {0}-field cdx format'.format(len(fields)))
+            raise CDXException('unknown {0}-field cdx format'.format(len(fields)))

        for header, field in itertools.izip(cdxformat, fields):
            self[header] = field
@ -85,8 +87,15 @@ class CDXObject(OrderedDict):
        """
        if fields is None:
            return str(self) + '\n'
-        else:
-            return ' '.join(self[x] for x in fields) + '\n'
+
+        try:
+            result = ' '.join(self[x] for x in fields) + '\n'
+        except KeyError as ke:
+            msg = 'Invalid field "{0}" found in fields= argument'
+            msg = msg.format(ke.message)
+            raise CDXException(msg)
+
+        return result

    def __str__(self):
        if self.cdxline:
@ -109,7 +118,7 @@ class IDXObject(OrderedDict):

        if len(fields) < self.NUM_REQ_FIELDS:
            msg = 'invalid idx format: {0} fields found, {1} required'
-            raise Exception(msg.format(len(fields), self.NUM_REQ_FIELDS))
+            raise CDXException(msg.format(len(fields), self.NUM_REQ_FIELDS))

        for header, field in itertools.izip(self.FORMAT, fields):
            self[header] = field
--- a/pywb/cdx/cdxops.py
+++ b/pywb/cdx/cdxops.py
@ -31,8 +31,18 @@ def cdx_load(sources, query, perms_checker=None, process=True):
    if perms_checker:
        cdx_iter = restrict_cdx(cdx_iter, query, perms_checker)

+    if query.output == 'text':
+        cdx_iter = cdx_to_text(cdx_iter, query.fields)
+
    return cdx_iter

+
+#=================================================================
+def cdx_to_text(cdx_iter, fields):
+    for cdx in cdx_iter:
+        yield cdx.to_text(fields)
+
+
 #=================================================================
 def restrict_cdx(cdx_iter, query, perms_checker):
    """
@ -56,6 +66,7 @@ def restrict_cdx(cdx_iter, query, perms_checker):

        yield cdx

+
 #=================================================================
 def process_cdx(cdx_iter, query):
    if query.resolve_revisits:
@ -255,7 +266,6 @@ def cdx_resolve_revisits(cdx_iter):
    originals = {}

    for cdx in cdx_iter:
-        
        is_revisit = cdx.is_revisit()

        digest = cdx['digest']
--- a/pywb/cdx/cdxserver.py
+++ b/pywb/cdx/cdxserver.py
@ -126,14 +126,19 @@ class CDXServer(BaseCDXServer):
            logging.warn('No CDX Sources configured from paths=%s', paths)

    def _add_cdx_source(self, source):
-        if source is None: return
+        if source is None:
+            return
+
        logging.debug('Adding CDX Source: %s', source)
        self.sources.append(source)

    def add_cdx_source(self, source, config):
-        if source is None: return
+        if source is None:
+            return
+
        if isinstance(source, CDXSource):
            self._add_cdx_source(source)
+
        elif isinstance(source, str):
            if os.path.isdir(source):
                for fn in os.listdir(source):
@ -213,5 +218,3 @@ def create_cdx_server(config, ds_rules_file=None):
                      surt_ordered=surt_ordered,
                      ds_rules_file=ds_rules_file,
                      perms_checker=perms_checker)
-
-
--- a/pywb/cdx/cdxsource.py
+++ b/pywb/cdx/cdxsource.py
@ -8,6 +8,7 @@ import urllib
 import urllib2
 import itertools

+
 #=================================================================
 class CDXSource(object):
    """
@ -92,7 +93,6 @@ class RedisCDXSource(CDXSource):
        if config:
            self.key_prefix = config.get('redis_key_prefix', self.key_prefix)

-
    def load_cdx(self, query):
        """
        Load cdx from redis cache, from an ordered list
--- a/pywb/cdx/query.py
+++ b/pywb/cdx/query.py
@ -1,5 +1,6 @@
 from urllib import urlencode
 from urlparse import parse_qs
+from cdxobject import CDXException


 #=================================================================
@ -62,6 +63,9 @@ class CDXQuery(object):
    @property
    def fields(self):
        v = self.params.get('fields')
+        # check old param name
+        if not v:
+            v = self.params.get('fl')
        return v.split(',') if v else None

    @property
@ -105,9 +109,6 @@ class CDXQuery(object):
        """
        params = parse_qs(env['QUERY_STRING'])

-        if not 'output' in params:
-            params['output'] = 'text'
-
        # parse_qs produces arrays for single values
        # cdx processing expects singleton params for all params,
        # except filters, so convert here
@ -116,4 +117,8 @@ class CDXQuery(object):
            if name != 'filter':
                params[name] = val[0]

+        if not 'output' in params:
+            params['output'] = 'text'
+
+
        return params
--- a/pywb/cdx/test/cdxserver_test.py
+++ b/pywb/cdx/test/cdxserver_test.py
@ -187,6 +187,7 @@ import pytest

 def cdx_ops_test(url, sources = [test_cdx_dir + 'iana.cdx'], **kwparams):
    kwparams['url'] = url
+    kwparams['output'] = 'cdxobject'
    fields = kwparams.get('fields')
    if fields:
        fields = fields.split(',')
--- a/pywb/cdx/test/wsgi_cdxserver_test.py
+++ b/pywb/cdx/test/wsgi_cdxserver_test.py
@ -1,15 +0,0 @@
-import webtest
-from pywb.cdx.wsgi_cdxserver import create_app
-from pywb import get_test_dir
-
-class TestCdx:
-    def setup(self):
-        self.app = create_app(get_test_dir() + 'cdx/')
-        self.testapp = webtest.TestApp(self.app)
-
-    def test_cdx(self):
-        resp = self.testapp.get('/cdx?url=http://www.iana.org/_css/2013.1/screen.css')
-        assert resp.content_type == 'text/plain'
-        assert resp.content_length > 0
-
-
--- a/pywb/cdx/wsgi_cdxserver.py
+++ b/pywb/cdx/wsgi_cdxserver.py
@ -1,103 +0,0 @@
-from werkzeug.wrappers import BaseResponse
-from cdxserver import create_cdx_server
-from pywb import get_test_dir
-from query import CDXQuery
-
-import logging
-import os
-import yaml
-import pkg_resources
-
-#=================================================================
-CONFIG_FILE = 'config.yaml'
-
-RULES_FILE = 'rules.yaml'
-
-DEFAULT_PORT = 8080
-
-#=================================================================
-
-class CDXQueryRequest(object):
-    def __init__(self, environ):
-        self.query = CDXQuery.from_wsgi_env(environ)
-
-
-class WSGICDXServer(object):
-    def __init__(self, config, rules_file):
-        self.cdxserver = create_cdx_server(config, rules_file)
-
-    def __call__(self, environ, start_response):
-        request = CDXQueryRequest(environ)
-        try:
-            logging.debug('request.args=%s', request.query)
-            result = self.cdxserver.load_cdx_query(request.query)
-
-            # TODO: select response type by "output" parameter
-            response = PlainTextResponse(result, request.query.fields)
-            return response(environ, start_response)
-        except Exception as exc:
-            logging.error('load_cdx failed', exc_info=1)
-            # TODO: error response should be different for each response
-            # type
-            start_response('400 Error', [('Content-Type', 'text/plain')])
-            return [str(exc)]
-
-def cdx_text_out(cdx, fields):
-    if not fields:
-        return str(cdx) + '\n'
-    else:
-        logging.info('cdx fields=%s', cdx.keys)
-        # TODO: this will results in an exception if fields contain
-        # non-existent field name.
-        return ' '.join(cdx[x] for x in fields) + '\n'
-
-class PlainTextResponse(BaseResponse):
-    def __init__(self, cdxitr, fields, status=200, content_type='text/plain'):
-        super(PlainTextResponse, self).__init__(
-            response=(
-                cdx.to_text(fields) for cdx in cdxitr
-                ),
-            status=status, content_type=content_type)
-
-# class JsonResponse(Response):
-#     pass
-# class MementoResponse(Response):
-#     pass
-
-def create_app(config=None):
-    logging.basicConfig(format='%(asctime)s: [%(levelname)s]: %(message)s',
-                        level=logging.DEBUG)
-
-    if not config:
-        index_paths = get_test_dir() + 'cdx/'
-        config = dict(index_paths=index_paths)
-
-    return WSGICDXServer(config, RULES_FILE)
-
-if __name__ == "__main__":
-    from optparse import OptionParser
-    from werkzeug.serving import run_simple
-
-    opt = OptionParser('%prog [OPTIONS]')
-    opt.add_option('-p', '--port', type='int', default=None)
-
-    options, args = opt.parse_args()
-
-    configdata = pkg_resources.resource_string(__name__, CONFIG_FILE)
-    config = yaml.load(configdata)
-
-    port = options.port
-    if port is None:
-        port = (config and config.get('port')) or DEFAULT_PORT
-
-    app = create_app(config)
-
-    logging.debug('Starting CDX Server on port %s', port)
-    try:
-        run_simple('0.0.0.0', port, app, use_reloader=True, use_debugger=True)
-    except KeyboardInterrupt as ex:
-        pass
-    logging.debug('Stopping CDX Server')
-else:
-    # XXX pass production config
-    application = create_app()
--- a/pywb/config_utils.py
+++ b/pywb/config_utils.py
@ -1,56 +0,0 @@
-import views
-import handlers
-import replay_views
-import logging
-
-from pywb.warc.recordloader import ArcWarcRecordLoader
-from pywb.warc.resolvingloader import ResolvingLoader
-from pywb.rewrite.rewrite_content import RewriteContent
-
-#=================================================================
-# Config Loading
-#=================================================================
-def load_template_file(file, desc = None, view_class = views.J2TemplateView):
-    if file:
-        logging.debug('Adding {0}: {1}'.format(desc if desc else name, file))
-        file = view_class(file)
-
-    return file
-
-#=================================================================
-def create_wb_handler(cdx_server, config, ds_rules_file=None):
-
-    record_loader = ArcWarcRecordLoader(cookie_maker = config.get('cookie_maker'))
-    paths = config.get('archive_paths')
-
-    resolving_loader = ResolvingLoader(paths=paths,
-                                       cdx_server=cdx_server,
-                                       record_loader=record_loader)
-
-    replayer = replay_views.ReplayView(
-        content_loader = resolving_loader,
-
-        content_rewriter = RewriteContent(ds_rules_file=ds_rules_file),
-
-        head_insert_view = load_template_file(config.get('head_insert_html'), 'Head Insert'),
-
-        buffer_response = config.get('buffer_response', True),
-
-        redir_to_exact = config.get('redir_to_exact', True),
-
-        reporter = config.get('reporter')
-    )
-
-
-    wb_handler = handlers.WBHandler(
-        cdx_server,
-
-        replayer,
-
-        html_view = load_template_file(config.get('query_html'), 'Captures Page', views.J2HtmlCapturesView),
-
-        search_view = load_template_file(config.get('search_html'), 'Search Page'),
-    )
-
-    return wb_handler
-
--- a/pywb/core/init.py
+++ b/pywb/core/init.py
--- a/pywb/core/cdx_handler.py
+++ b/pywb/core/cdx_handler.py
@ -0,0 +1,43 @@
+from pywb.cdx.query import CDXQuery
+from pywb.cdx.cdxserver import create_cdx_server
+
+from pywb.framework.archivalrouter import ArchivalRouter, Route
+from pywb.framework.basehandlers import BaseHandler
+
+from views import TextCapturesView
+
+
+#=================================================================
+class CDXHandler(BaseHandler):
+    """
+    Handler which passes wsgi request to cdx server and
+    returns a text-based cdx response
+    """
+    def __init__(self, index_reader, view=None):
+        self.index_reader = index_reader
+        self.view = view if view else TextCapturesView()
+
+    def __call__(self, wbrequest):
+        params = CDXQuery.extract_params_from_wsgi_env(wbrequest.env)
+        cdx_lines = self.index_reader.load_cdx(**params)
+
+        return self.view.render_response(wbrequest, cdx_lines)
+
+    def __str__(self):
+        return 'CDX Handler: ' + str(self.index_reader)
+
+
+#=================================================================
+DEFAULT_RULES = 'pywb/rules.yaml'
+
+#=================================================================
+def create_cdx_server_app(config):
+    """
+    Create a cdx server config to be wrapped in a wsgi app
+    Currently using single access point '/cdx'
+    TODO: more complex example with multiple collections?
+    """
+    cdx_server = create_cdx_server(config, DEFAULT_RULES)
+    port = config.get('port')
+    routes = [Route('cdx', CDXHandler(cdx_server))]
+    return ArchivalRouter(routes, port=port)
--- a/pywb/core/handlers.py
+++ b/pywb/core/handlers.py
@ -1,30 +1,13 @@
-import urlparse
 import pkgutil
 import mimetypes
 import time

-from pywb.rewrite.wburl import WbUrl
-from pywb.cdx.query import CDXQuery
-from wbrequestresponse import WbResponse
-from wbexceptions import WbException, NotFoundException
+from pywb.framework.basehandlers import BaseHandler, WbUrlHandler
+from pywb.framework.wbrequestresponse import WbResponse
+from pywb.framework.wbexceptions import WbException, NotFoundException
 from views import TextCapturesView


-#=================================================================
-class BaseHandler(object):
-    def __call__(self, wbrequest):
-        return wbrequest
-
-    def get_wburl_type(self):
-        return None
-
-
-#=================================================================
-class WbUrlHandler(BaseHandler):
-    def get_wburl_type(self):
-        return WbUrl
-
-
 #=================================================================
 # Standard WB Handler
 #=================================================================
@ -33,11 +16,15 @@ class WBHandler(WbUrlHandler):
                 html_view=None, search_view=None):

        self.index_reader = index_reader
+
        self.replay = replay

-        self.text_view = TextCapturesView()
+        self.text_query_view = TextCapturesView()
+
+        self.query_view = html_view
+        if not self.query_view:
+            self.query_view = text_query_view

-        self.html_view = html_view
        self.search_view = search_view

    def __call__(self, wbrequest):
@ -49,11 +36,10 @@ class WBHandler(WbUrlHandler):

        # new special modifier to always show cdx index
        if wbrequest.wb_url.mod == 'cdx_':
-            return self.text_view.render_response(wbrequest, cdx_lines)
+            return self.text_query_view.render_response(wbrequest, cdx_lines)

        if (wbrequest.wb_url.type == wbrequest.wb_url.QUERY) or (wbrequest.wb_url.type == wbrequest.wb_url.URL_QUERY):
-            query_view = self.html_view if self.html_view else self.text_view
-            return query_view.render_response(wbrequest, cdx_lines)
+            return self.query_view.render_response(wbrequest, cdx_lines)

        with PerfTimer(wbrequest.env.get('X_PERF'), 'replay') as t:
            return self.replay(wbrequest, cdx_lines)
@ -70,29 +56,11 @@ class WBHandler(WbUrlHandler):
        return 'WBHandler: ' + str(self.index_reader) + ', ' + str(self.replay)


-#=================================================================
-# CDX-Server Handler -- pass all params to cdx server
-#=================================================================
-class CDXHandler(BaseHandler):
-    def __init__(self, index_reader, view = None):
-        self.index_reader = index_reader
-        self.view = view if view else TextCapturesView()
-
-    def __call__(self, wbrequest):
-        params = CDXQuery.extract_params_from_wsgi_env(wbrequest.env)
-        cdx_lines = self.index_reader.load_cdx(**params)
-
-        return self.view.render_response(wbrequest, cdx_lines)
-
-    def __str__(self):
-        return 'Index Reader: ' + str(self.index_reader)
-
-
 #=================================================================
 # Static Content Handler
 #=================================================================
 class StaticHandler(BaseHandler):
-    def __init__(self, static_path, pkg = __package__):
+    def __init__(self, static_path, pkg = 'pywb'):
        mimetypes.init()

        self.static_path = static_path
--- a/pywb/core/indexreader.py
+++ b/pywb/core/indexreader.py
@ -29,6 +29,7 @@ class IndexReader(object):
            params.update(wbrequest.custom_params)

        params['allowFuzzy'] = True
+        params['output'] = 'cdxobject'

        cdxlines = self.load_cdx(url=wburl.url, **params)

--- a/pywb/core/pywb_init.py
+++ b/pywb/core/pywb_init.py
@ -0,0 +1,181 @@
+from pywb.framework.archivalrouter import ArchivalRouter, Route
+from pywb.framework.proxy import ProxyArchivalRouter
+
+from pywb.warc.recordloader import ArcWarcRecordLoader
+from pywb.warc.resolvingloader import ResolvingLoader
+
+from pywb.rewrite.rewrite_content import RewriteContent
+
+from indexreader import IndexReader
+from views import J2TemplateView, J2HtmlCapturesView
+from replay_views import ReplayView
+
+from handlers import WBHandler
+from handlers import StaticHandler
+from cdx_handler import CDXHandler
+from handlers import DebugEchoHandler, DebugEchoEnvHandler
+
+
+import os
+import yaml
+import logging
+
+
+#=================================================================
+DEFAULTS = {
+    'hostpaths':  ['http://localhost:8080'],
+    'collections': {'pywb': './sample_archive/cdx/'},
+    'archive_paths': './sample_archive/warcs/',
+
+    'head_insert_html': 'ui/head_insert.html',
+    'query_html': 'ui/query.html',
+    'search_html': 'ui/search.html',
+    'home_html': 'ui/index.html',
+    'error_html': 'ui/error.html',
+
+    'static_routes': {'static/default': 'static/'},
+
+    'domain_specific_rules': 'pywb/rules.yaml',
+}
+
+#=================================================================
+class DictChain:
+    def __init__(self, *dicts):
+        self.dicts = dicts
+
+    def get(self, key, default_val=None):
+        for d in self.dicts:
+            val = d.get(key)
+            if val is not None:
+                return val
+        return default_val
+
+
+#=================================================================
+def load_template_file(file, desc=None, view_class=J2TemplateView):
+    if file:
+        logging.debug('Adding {0}: {1}'.format(desc if desc else name, file))
+        file = view_class(file)
+
+    return file
+
+
+#=================================================================
+def create_wb_handler(cdx_server, config, ds_rules_file=None):
+
+    cookie_maker=config.get('cookie_maker')
+    record_loader = ArcWarcRecordLoader(cookie_maker=cookie_maker)
+
+    paths = config.get('archive_paths')
+
+    resolving_loader = ResolvingLoader(paths=paths,
+                                       cdx_server=cdx_server,
+                                       record_loader=record_loader)
+
+    head_insert_view = load_template_file(config.get('head_insert_html'),
+                                          'Head Insert')
+
+    replayer = ReplayView(
+        content_loader=resolving_loader,
+
+        content_rewriter=RewriteContent(ds_rules_file=ds_rules_file),
+
+        head_insert_view=head_insert_view,
+
+        buffer_response=config.get('buffer_response', True),
+
+        redir_to_exact=config.get('redir_to_exact', True),
+
+        reporter=config.get('reporter')
+    )
+
+    html_view = load_template_file(config.get('query_html'),
+                                   'Captures Page',
+                                   J2HtmlCapturesView)
+
+
+    search_view = load_template_file(config.get('search_html'),
+                                     'Search Page')
+
+    wb_handler = WBHandler(
+        cdx_server,
+        replayer,
+        html_view=html_view,
+        search_view=search_view,
+    )
+
+    return wb_handler
+
+
+#=================================================================
+def create_wb_router(passed_config = {}):
+
+    config = DictChain(passed_config, DEFAULTS)
+
+    routes = []
+
+    hostpaths = config.get('hostpaths')
+
+    port = config.get('port')
+
+    # collections based on cdx source
+    collections = config.get('collections')
+
+    for name, value in collections.iteritems():
+        if isinstance(value, str):
+            value = {'index_paths': value}
+
+        route_config = DictChain(value, config)
+
+        ds_rules_file = route_config.get('domain_specific_rules', None)
+        cdx_server = IndexReader(route_config, ds_rules_file)
+
+        wb_handler = create_wb_handler(
+            cdx_server=cdx_server,
+            config=route_config,
+            ds_rules_file=ds_rules_file,
+        )
+
+        logging.debug('Adding Collection: ' + name)
+
+        route_class = route_config.get('route_class', Route)
+
+        routes.append(route_class(name, wb_handler, config = route_config))
+
+        # cdx query handler
+        if route_config.get('enable_cdx_api', False):
+            routes.append(Route(name + '-cdx', CDXHandler(cdx_server)))
+
+
+    if config.get('debug_echo_env', False):
+        routes.append(Route('echo_env', DebugEchoEnvHandler()))
+
+    if config.get('debug_echo_req', False):
+        routes.append(Route('echo_req', DebugEchoHandler()))
+
+
+    static_routes = config.get('static_routes')
+
+    for static_name, static_path in static_routes.iteritems():
+        routes.append(Route(static_name, StaticHandler(static_path)))
+
+    # Check for new proxy mode!
+    if config.get('enable_http_proxy', False):
+        router = ProxyArchivalRouter
+    else:
+        router = ArchivalRouter
+
+    # Finally, create wb router
+    return router(
+        routes,
+        # Specify hostnames that pywb will be running on
+        # This will help catch occasionally missed rewrites that fall-through to the host
+        # (See archivalrouter.ReferRedirect)
+        hostpaths = hostpaths,
+        port = port,
+
+        abs_path = config.get('absolute_paths', True),
+
+        home_view = load_template_file(config.get('home_html'), 'Home Page'),
+        error_view = load_template_file(config.get('error_html'), 'Error Page')
+    )
--- a/pywb/core/replay_views.py
+++ b/pywb/core/replay_views.py
@ -2,9 +2,9 @@ import StringIO

 from pywb.rewrite.url_rewriter import UrlRewriter
 from pywb.utils.bufferedreaders import ChunkedDataReader
-from wbrequestresponse import WbResponse
+from pywb.framework.wbrequestresponse import WbResponse

-from wbexceptions import CaptureException, InternalRedirect
+from pywb.framework.wbexceptions import CaptureException, InternalRedirect
 from pywb.warc.recordloader import ArchiveLoadFailed

 from pywb.utils.loaders import LimitReader
@ -51,7 +51,7 @@ class ReplayView:
                self._redirect_if_needed(wbrequest, cdx)

                # one more check for referrer-based self-redirect
-                self._reject_referrer_self_redirect(wbrequest, status_headers)
+                self._reject_referrer_self_redirect(wbrequest)

                response = None

@ -177,25 +177,30 @@ class ReplayView:


    def _reject_self_redirect(self, wbrequest, cdx, status_headers):
-        # self-redirect via location
+        """
+        Check if response is a 3xx redirect to the same url
+        If so, reject this capture to avoid causing redirect loop
+        """
        if status_headers.statusline.startswith('3'):
            request_url = wbrequest.wb_url.url.lower()
            location_url = status_headers.get_header('Location').lower()

-            #TODO: canonicalize before testing?
            if (UrlRewriter.strip_protocol(request_url) == UrlRewriter.strip_protocol(location_url)):
                raise CaptureException('Self Redirect: ' + str(cdx))

-    def _reject_referrer_self_redirect(self, wbrequest, status_headers):
-        # at correct timestamp now, but must check for referrer redirect
-        # indirect self-redirect, via meta-refresh, if referrer is same as current url
-        if status_headers.statusline.startswith('2'):
-            # build full url even if using relative-rewriting
-            request_url = wbrequest.host_prefix + wbrequest.rel_prefix + str(wbrequest.wb_url)
-            referrer_url = wbrequest.referrer
-            if (referrer_url and UrlRewriter.strip_protocol(request_url) == UrlRewriter.strip_protocol(referrer_url)):
-                raise CaptureException('Self Redirect via Referrer: ' + str(wbrequest.wb_url))
-
-
+    def _reject_referrer_self_redirect(self, wbrequest):
+        """
+        Perform final check for referrer based self-redirect.
+        This method should be called after verifying request timestamp matches capture.
+        if referrer is same as current url, reject this response and try another capture
+        """
+        if not wbrequest.referrer:
+            return

+        # build full url even if using relative-rewriting
+        request_url = (wbrequest.host_prefix +
+                       wbrequest.rel_prefix + str(wbrequest.wb_url))

+        if (UrlRewriter.strip_protocol(request_url) ==
+            UrlRewriter.strip_protocol(wbrequest.referrer)):
+            raise CaptureException('Self Redirect via Referrer: ' + str(wbrequest.wb_url))
--- a/pywb/core/views.py
+++ b/pywb/core/views.py
@ -1,6 +1,6 @@
-import pywb.utils.timeutils as timeutils
+from pywb.utils.timeutils import timestamp_to_datetime
+from pywb.framework.wbrequestresponse import WbResponse

-import wbrequestresponse
 import urlparse
 import time

@ -18,7 +18,7 @@ class StaticTextView:
        return self.text

    def render_response(self, **kwargs):
-        return wbrequestresponse.WbResponse.text_stream(self.text)
+        return WbResponse.text_stream(self.text)

 #=================================================================
 class J2TemplateView:
@ -34,7 +34,7 @@ class J2TemplateView:
        if template_dir.startswith('.') or template_dir.startswith('file://'):
            loader = FileSystemLoader(template_dir)
        else:
-            loader = PackageLoader(__package__, template_dir)
+            loader = PackageLoader('pywb', template_dir)

        jinja_env = Environment(loader = loader, trim_blocks = True)
        jinja_env.filters['format_ts'] = J2TemplateView.format_ts
@ -51,13 +51,13 @@ class J2TemplateView:
    def render_response(self, **kwargs):
        template_result = self.render_to_string(**kwargs)
        status = kwargs.get('status', '200 OK')
-        return wbrequestresponse.WbResponse.text_response(str(template_result), status = status, content_type = 'text/html; charset=utf-8')
+        return WbResponse.text_response(str(template_result), status = status, content_type = 'text/html; charset=utf-8')


    # Filters
    @staticmethod
    def format_ts(value, format_='%a, %b %d %Y %H:%M:%S'):
-        value = timeutils.timestamp_to_datetime(value)
+        value = timestamp_to_datetime(value)
        return value.strftime(format_)

    @staticmethod
@ -90,7 +90,7 @@ class TextCapturesView:
                cdx += '\n'
            return cdx
        cdx_lines = imap(to_str, cdx_lines)
-        return wbrequestresponse.WbResponse.text_stream(cdx_lines)
+        return WbResponse.text_stream(cdx_lines)



--- a/pywb/framework/init.py
+++ b/pywb/framework/init.py
--- a/pywb/framework/archivalrouter.py
+++ b/pywb/framework/archivalrouter.py
@ -1,17 +1,31 @@
 import urlparse
 import re

-from wbrequestresponse import WbRequest, WbResponse
 from pywb.rewrite.url_rewriter import UrlRewriter
+from wbrequestresponse import WbRequest, WbResponse


 #=================================================================
 # ArchivalRouter -- route WB requests in archival mode
 #=================================================================
-class ArchivalRouter:
-    def __init__(self, routes, hostpaths=None, abs_path=True, home_view=None, error_view=None):
+class ArchivalRouter(object):
+    def __init__(self, routes,
+                 hostpaths=None,
+                 port=None,
+                 abs_path=True,
+                 home_view=None,
+                 error_view=None):
+
        self.routes = routes
-        self.fallback = ReferRedirect(hostpaths)
+
+        # optional port setting may be ignored by wsgi container
+        self.port = port
+
+        if hostpaths:
+            self.fallback = ReferRedirect(hostpaths)
+        else:
+            self.fallback = None
+
        self.abs_path = abs_path

        self.home_view = home_view
@ -29,26 +43,27 @@ class ArchivalRouter:

        return self.fallback(env, self.routes) if self.fallback else None

-
    def render_home_page(self):
        # render the homepage!
        if self.home_view:
-            return self.home_view.render_response(routes = self.routes)
+            return self.home_view.render_response(routes=self.routes)
        else:
            # default home page template
            text = '\n'.join(map(str, self.routes))
            return WbResponse.text_response(text)

+
 #=================================================================
 # Route by matching regex (or fixed prefix)
 # of request uri (excluding first '/')
 #=================================================================
-class Route:
+class Route(object):
    # match upto next / or ? or end
-    SLASH_QUERY_LOOKAHEAD ='(?=/|$|\?)'
+    SLASH_QUERY_LOOKAHEAD = '(?=/|$|\?)'

+    def __init__(self, regex, handler, coll_group=0, config={},
+                 lookahead=SLASH_QUERY_LOOKAHEAD):

-    def __init__(self, regex, handler, coll_group = 0, config = {}, lookahead = SLASH_QUERY_LOOKAHEAD):
        self.path = regex
        if regex:
            self.regex = re.compile(regex + lookahead)
@ -59,12 +74,11 @@ class Route:
        self.coll_group = coll_group
        self._custom_init(config)

-
    def __call__(self, env, use_abs_prefix):
        wbrequest = self.parse_request(env, use_abs_prefix)
        return self.handler(wbrequest) if wbrequest else None

-    def parse_request(self, env, use_abs_prefix, request_uri = None):
+    def parse_request(self, env, use_abs_prefix, request_uri=None):
        if not request_uri:
            request_uri = env['REL_REQUEST_URI']

@ -75,10 +89,12 @@ class Route:
        matched_str = matcher.group(0)
        if matched_str:
            rel_prefix = env['SCRIPT_NAME'] + '/' + matched_str + '/'
-            wb_url_str = request_uri[len(matched_str) + 2:] # remove the '/' + rel_prefix part of uri
+            # remove the '/' + rel_prefix part of uri
+            wb_url_str = request_uri[len(matched_str) + 2:]
        else:
            rel_prefix = env['SCRIPT_NAME'] + '/'
-            wb_url_str = request_uri[1:] # the request_uri is the wb_url, since no coll
+            # the request_uri is the wb_url, since no coll
+            wb_url_str = request_uri[1:]

        coll = matcher.group(self.coll_group)

@ -88,20 +104,19 @@ class Route:
                              rel_prefix=rel_prefix,
                              coll=coll,
                              use_abs_prefix=use_abs_prefix,
-                              wburl_class = self.handler.get_wburl_type(),
+                              wburl_class=self.handler.get_wburl_type(),
                              urlrewriter_class=UrlRewriter)

-
        # Allow for applying of additional filters
        self._apply_filters(wbrequest, matcher)

        return wbrequest

-
    def _apply_filters(self, wbrequest, matcher):
        for filter in self.filters:
            last_grp = len(matcher.groups())
-            wbrequest.query_filter.append(filter.format(matcher.group(last_grp)))
+            filter_str = filter.format(matcher.group(last_grp))
+            wbrequest.query_filter.append(filter_str)

    def _custom_init(self, config):
        self.filters = config.get('filters', [])
@ -112,7 +127,8 @@ class Route:


 #=================================================================
-# ReferRedirect -- redirect urls that have 'fallen through' based on the referrer settings
+# ReferRedirect -- redirect urls that have 'fallen through'
+# based on the referrer settings
 #=================================================================
 class ReferRedirect:
    def __init__(self, match_prefixs):
@ -121,7 +137,6 @@ class ReferRedirect:
        else:
            self.match_prefixs = [match_prefixs]

-
    def __call__(self, env, routes):
        referrer = env.get('HTTP_REFERER')

@ -133,7 +148,7 @@ class ReferRedirect:
        ref_split = urlparse.urlsplit(referrer)

        # ensure referrer starts with one of allowed hosts
-        if not any (referrer.startswith(i) for i in self.match_prefixs):
+        if not any(referrer.startswith(i) for i in self.match_prefixs):
            if ref_split.netloc != env.get('HTTP_HOST'):
                return None

@ -144,13 +159,12 @@ class ReferRedirect:
        if app_path:
            # must start with current app name, if not root
            if not path.startswith(app_path):
-                 return None
+                return None

            path = path[len(app_path):]

-
        for route in routes:
-            ref_request = route.parse_request(env, False, request_uri = path)
+            ref_request = route.parse_request(env, False, request_uri=path)
            if ref_request:
                break

@ -174,6 +188,10 @@ class ReferRedirect:
            # 2013/path.html -> /path.html
            rel_request_uri = rel_request_uri[len(timestamp_path) - 1:]

-        final_url = urlparse.urlunsplit((ref_split.scheme, ref_split.netloc, rewriter.rewrite(rel_request_uri), '', ''))
+        final_url = urlparse.urlunsplit((ref_split.scheme,
+                                         ref_split.netloc,
+                                         rewriter.rewrite(rel_request_uri),
+                                         '',
+                                         ''))

        return WbResponse.redir_response(final_url)
--- a/pywb/framework/basehandlers.py
+++ b/pywb/framework/basehandlers.py
@ -0,0 +1,23 @@
+from pywb.rewrite.wburl import WbUrl
+
+
+#=================================================================
+class BaseHandler(object):
+    """
+    Represents a base handler class that handles any request
+    """
+    def __call__(self, wbrequest):
+        return wbrequest
+
+    def get_wburl_type(self):
+        return None
+
+
+#=================================================================
+class WbUrlHandler(BaseHandler):
+    """
+    Represents a handler which assumes the request contains a WbUrl
+    Ensure that the WbUrl is parsed in the request
+    """
+    def get_wburl_type(self):
+        return WbUrl
--- a/pywb/framework/proxy.py
+++ b/pywb/framework/proxy.py
@ -2,23 +2,37 @@ from wbrequestresponse import WbResponse, WbRequest
 from archivalrouter import ArchivalRouter
 import urlparse

+
 #=================================================================
 # An experimental router which combines both archival and proxy modes
-# http proxy mode support is very simple: only latest capture is available currently
+# http proxy mode support is very simple so far:
+# only latest capture is available currently
 #=================================================================
+class ProxyArchivalRouter(ArchivalRouter):
+    def __init__(self, routes,
+                 hostpaths=None,
+                 port=None,
+                 abs_path=True,
+                 home_view=None,
+                 error_view=None):
+
+        (super(ProxyArchivalRouter, self).
+                              __init__(routes,
+                                       hostpaths=hostpaths,
+                                       port=port,
+                                       abs_path=abs_path,
+                                       home_view=home_view,
+                                       error_view=error_view))

-class ProxyArchivalRouter:
-    def __init__(self, routes, hostpaths = None, abs_path = True, home_view = None, error_view = None):
-        self.archival = ArchivalRouter(routes, hostpaths, abs_path, home_view, error_view)
        self.proxy = ProxyRouter(routes[0].handler, hostpaths, error_view)
-        self.error_view = error_view
+        #self.error_view = error_view

    def __call__(self, env):
-        response = self.archival(env)
+        response = self.proxy(env)
        if response:
            return response

-        response = self.proxy(env)
+        response = super(ProxyArchivalRouter, self).__call__(env)
        if response:
            return response

@ -29,7 +43,7 @@ class ProxyArchivalRouter:
 # Only supports latest capture replay at the moment
 #=================================================================
 class ProxyRouter:
-    def __init__(self, handler, hostpaths = None, error_view = None):
+    def __init__(self, handler, hostpaths=None, error_view=None):
        self.handler = handler
        self.hostpaths = hostpaths

@ -56,27 +70,26 @@ class ProxyRouter:

        return self.handler(wbrequest)

-
    # Proxy Auto-Config (PAC) script for the proxy
    def make_pac_response(self, env):
        server_hostport = env['SERVER_NAME'] + ':' + env['SERVER_PORT']

        buff = 'function FindProxyForURL (url, host) {\n'

-        direct_cond ='    if (shExpMatch(host, "{0}")) {{ return "DIRECT"; }}\n'
+        direct = '    if (shExpMatch(host, "{0}")) {{ return "DIRECT"; }}\n'

        for hostpath in self.hostpaths:
            parts = urlparse.urlsplit(hostpath).netloc.split(':')
-            buff += direct_cond.format(parts[0])
+            buff += direct.format(parts[0])

-        buff += direct_cond.format(env['SERVER_NAME'])
+        buff += direct.format(env['SERVER_NAME'])

        #buff += '\n    return "PROXY {0}";\n}}\n'.format(self.hostpaths[0])
        buff += '\n    return "PROXY {0}";\n}}\n'.format(server_hostport)

-        return WbResponse.text_response(buff, content_type = 'application/x-ns-proxy-autoconfig')
-
+        content_type = 'application/x-ns-proxy-autoconfig'

+        return WbResponse.text_response(buff, content_type=content_type)


 #=================================================================
@ -85,10 +98,11 @@ class ProxyRouter:
 class ProxyHttpsUrlRewriter:
    HTTP = 'http://'
    HTTPS = 'https://'
+
    def __init__(self, wbrequest, prefix):
        pass

-    def rewrite(self, url, mod = None):
+    def rewrite(self, url, mod=None):
        if url.startswith(self.HTTPS):
            return self.HTTP + url[len(self.HTTPS):]
        else:
@ -97,6 +111,5 @@ class ProxyHttpsUrlRewriter:
    def get_timestamp_url(self, timestamp, url):
        return url

-    def get_abs_url(self, url = ''):
+    def get_abs_url(self, url=''):
        return url
-
--- a/pywb/framework/test/test_archivalrouter.py
+++ b/pywb/framework/test/test_archivalrouter.py
@ -84,8 +84,8 @@ False

 """

-from pywb.archivalrouter import Route, ReferRedirect
-from pywb.handlers import BaseHandler, WbUrlHandler
+from pywb.framework.archivalrouter import Route, ReferRedirect
+from pywb.framework.basehandlers import BaseHandler, WbUrlHandler
 import pprint

 def print_req(req):
--- a/pywb/framework/test/test_wbrequestresponse.py
+++ b/pywb/framework/test/test_wbrequestresponse.py
@ -41,7 +41,7 @@ from pywb.rewrite.wburl import WbUrl
 from pywb.rewrite.url_rewriter import UrlRewriter
 from pywb.utils.statusandheaders import StatusAndHeaders

-from pywb.wbrequestresponse import WbRequest, WbResponse
+from pywb.framework.wbrequestresponse import WbRequest, WbResponse


 def print_req_from_uri(request_uri, env={}, use_abs_prefix=False):
--- a/pywb/framework/wbexceptions.py
+++ b/pywb/framework/wbexceptions.py
@ -1,23 +1,22 @@
+from pywb.utils.wbexception import WbException


-class WbException(Exception):
-    pass
-
 class NotFoundException(WbException):
    def status(self):
        return '404 Not Found'

+
 # Exceptions that effect a specific capture and result in a retry
 class CaptureException(WbException):
    def status(self):
        return '500 Internal Server Error'

+
 class InternalRedirect(WbException):
-    def __init__(self, location, status = '302 Internal Redirect'):
+    def __init__(self, location, status='302 Internal Redirect'):
        WbException.__init__(self, 'Redirecting -> ' + location)
        self.status = status
        self.httpHeaders = [('Location', location)]

    def status(self):
        return self.status
-
--- a/pywb/framework/wbrequestresponse.py
+++ b/pywb/framework/wbrequestresponse.py
@ -26,7 +26,6 @@ class WbRequest:
        except KeyError:
            return ''

-
    def __init__(self, env,
                 request_uri=None,
                 rel_prefix='',
@ -40,7 +39,10 @@ class WbRequest:

        self.env = env

-        self.request_uri = request_uri if request_uri else env.get('REL_REQUEST_URI')
+        if request_uri:
+            self.request_uri = request_uri
+        else:
+            self.request_uri = env.get('REL_REQUEST_URI')

        self.coll = coll

@ -55,7 +57,6 @@ class WbRequest:
        else:
            self.wb_prefix = rel_prefix

-
        if not wb_url_str:
            wb_url_str = '/'

@ -83,7 +84,6 @@ class WbRequest:
        # PERF
        env['X_PERF'] = {}

-
    def _is_ajax(self):
        value = self.env.get('HTTP_X_REQUESTED_WITH')
        if not value:
@ -96,7 +96,6 @@ class WbRequest:
            return True
        return False

-
    def __repr__(self):
        varlist = vars(self)
        varstr = pprint.pformat(varlist)
@ -111,32 +110,39 @@ class WbResponse:
    Holds a status_headers object and a response iter, to be
    returned to wsgi container.
    """
-    def __init__(self, status_headers, value = []):
+    def __init__(self, status_headers, value=[]):
        self.status_headers = status_headers
        self.body = value

    @staticmethod
-    def text_stream(text, status = '200 OK', content_type = 'text/plain'):
-        return WbResponse(StatusAndHeaders(status, [('Content-Type', content_type)]), value = text)
+    def text_stream(stream, status='200 OK', content_type='text/plain'):
+        status_headers = StatusAndHeaders(status,
+                                          [('Content-Type', content_type)])
+
+        return WbResponse(status_headers, value=stream)

    @staticmethod
-    def text_response(text, status = '200 OK', content_type = 'text/plain'):
-        return WbResponse(StatusAndHeaders(status, [('Content-Type', content_type)]), value = [text])
+    def text_response(text, status='200 OK', content_type='text/plain'):
+        status_headers = StatusAndHeaders(status,
+                                          [('Content-Type', content_type)])
+
+        return WbResponse(status_headers, value=[text])

    @staticmethod
-    def redir_response(location, status = '302 Redirect'):
-        return WbResponse(StatusAndHeaders(status, [('Location', location)]))
-
+    def redir_response(location, status='302 Redirect'):
+        return WbResponse(StatusAndHeaders(status,
+                                           [('Location', location)]))

    def __call__(self, env, start_response):

        # PERF
        perfstats = env.get('X_PERF')
        if perfstats:
-            self.status_headers.headers.append(('X-Archive-Perf-Stats', str(perfstats)))
+            self.status_headers.headers.append(('X-Archive-Perf-Stats',
+                                                str(perfstats)))

-
-        start_response(self.status_headers.statusline, self.status_headers.headers)
+        start_response(self.status_headers.statusline,
+                       self.status_headers.headers)

        if env['REQUEST_METHOD'] == 'HEAD':
            if hasattr(self.body, 'close'):
@ -148,6 +154,5 @@ class WbResponse:
        else:
            return [str(self.body)]

-
    def __repr__(self):
        return str(vars(self))
--- a/pywb/framework/wsgi_wrappers.py
+++ b/pywb/framework/wsgi_wrappers.py
@ -0,0 +1,165 @@
+from pywb.utils.wbexception import WbException
+from pywb.utils.loaders import load_yaml_config
+
+from wbexceptions import NotFoundException, InternalRedirect
+from wbrequestresponse import WbResponse, StatusAndHeaders
+
+
+import os
+import importlib
+import logging
+
+
+DEFAULT_PORT = 8080
+
+#=================================================================
+# adapted from wsgiref.request_uri, but doesn't include domain name
+# and allows all characters which are allowed in the path segment
+# according to: http://tools.ietf.org/html/rfc3986#section-3.3
+# explained here:
+# http://stackoverflow.com/questions/4669692/
+#   valid-characters-for-directory-part-of-a-url-for-short-links
+
+
+def rel_request_uri(environ, include_query=1):
+    """
+    Return the requested path, optionally including the query string
+
+    # Simple test:
+    >>> rel_request_uri({'PATH_INFO': '/web/example.com'})
+    '/web/example.com'
+
+    # Test all unecoded special chars and double-quote
+    # (double-quote must be encoded but not single quote)
+    >>> rel_request_uri({'PATH_INFO': "/web/example.com/0~!+$&'()*+,;=:\\\""})
+    "/web/example.com/0~!+$&'()*+,;=:%22"
+    """
+    from urllib import quote
+    url = quote(environ.get('PATH_INFO', ''), safe='/~!$&\'()*+,;=:@')
+    if include_query and environ.get('QUERY_STRING'):
+        url += '?' + environ['QUERY_STRING']
+
+    return url
+
+
+#=================================================================
+class WSGIApp(object):
+    def __init__(self, wb_router):
+        self.wb_router = wb_router
+        self.port = DEFAULT_PORT
+        if hasattr(wb_router, 'port'):
+            self.port = wb_router.port
+
+    # Top-level wsgi application
+    def __call__(self, env, start_response):
+        if env.get('SCRIPT_NAME') or not env.get('REQUEST_URI'):
+            env['REL_REQUEST_URI'] = rel_request_uri(env)
+        else:
+            env['REL_REQUEST_URI'] = env['REQUEST_URI']
+
+        wb_router = self.wb_router
+        response = None
+
+        try:
+            response = wb_router(env)
+
+            if not response:
+                msg = 'No handler for "{0}"'.format(env['REL_REQUEST_URI'])
+                raise NotFoundException(msg)
+
+        except InternalRedirect as ir:
+            response = WbResponse(StatusAndHeaders(ir.status, ir.httpHeaders))
+
+        except WbException as e:
+            response = handle_exception(env, wb_router.error_view, e, False)
+
+        except Exception as e:
+            response = handle_exception(env, wb_router.error_view, e, True)
+
+        return response(env, start_response)
+
+
+#=================================================================
+def handle_exception(env, error_view, exc, print_trace):
+    if hasattr(exc, 'status'):
+        status = exc.status()
+    else:
+        status = '400 Bad Request'
+
+    if print_trace:
+        import traceback
+        err_details = traceback.format_exc(exc)
+        print err_details
+    else:
+        logging.info(str(exc))
+        err_details = None
+
+    if error_view:
+        import traceback
+        return error_view.render_response(err_msg=str(exc),
+                                          err_details=err_details,
+                                          status=status)
+    else:
+        return WbResponse.text_response(status + ' Error: ' + str(exc),
+                                        status=status)
+
+#=================================================================
+DEFAULT_CONFIG_FILE = 'config.yaml'
+
+
+#=================================================================
+def init_app(init_func, load_yaml=True, config_file=None):
+    logging.basicConfig(format='%(asctime)s: [%(levelname)s]: %(message)s',
+                        level=logging.DEBUG)
+    logging.info('')
+
+    if load_yaml:
+        if not config_file:
+            config_file = os.environ.get('PYWB_CONFIG_FILE')
+        if not config_file:
+            config_file = DEFAULT_CONFIG_FILE
+
+        config = load_yaml_config(config_file)
+
+    try:
+        if load_yaml:
+            wb_router = init_func(config)
+        else:
+            wb_router = init_func()
+    except:
+        msg = '*** pywb app init FAILED config from "%s"!\n'
+        logging.exception(msg, init_func.__name__)
+        raise
+    else:
+        msg = '*** pywb app inited with config from "%s"!\n'
+        logging.info(msg, init_func.__name__)
+
+    return WSGIApp(wb_router)
+
+
+#=================================================================
+def start_wsgi_server(the_app):
+    from wsgiref.simple_server import make_server
+    from optparse import OptionParser
+
+    opt = OptionParser('%prog [OPTIONS]')
+    opt.add_option('-p', '--port', type='int', default=None)
+
+    options, args = opt.parse_args()
+
+    port = options.port
+
+    port = the_app.port
+
+    if not port:
+        port = DEFAULT_PORT
+
+    logging.debug('Starting CDX Server on port %s', port)
+
+    try:
+        httpd = make_server('', port, the_app)
+        httpd.serve_forever()
+    except KeyboardInterrupt as ex:
+        pass
+
+    logging.debug('Stopping CDX Server')
--- a/pywb/pywb_init.py
+++ b/pywb/pywb_init.py
@ -1,128 +0,0 @@
-import handlers
-import archivalrouter
-import config_utils
-import proxy
-from indexreader import IndexReader
-
-import os
-import yaml
-import logging
-
-#=================================================================
-DEFAULTS = {
-    'hostpaths':  ['http://localhost:8080'],
-    'collections': {'pywb': './sample_archive/cdx/'},
-    'archive_paths': './sample_archive/warcs/',
-
-    'head_insert_html': 'ui/head_insert.html',
-    'query_html': 'ui/query.html',
-    'search_html': 'ui/search.html',
-    'home_html': 'ui/index.html',
-    'error_html': 'ui/error.html',
-
-    'static_routes': {'static/default': 'static/'},
-
-    'domain_specific_rules': 'rules.yaml',
-}
-
-class DictChain:
-    def __init__(self, *dicts):
-        self.dicts = dicts
-
-    def get(self, key, default_val=None):
-        for d in self.dicts:
-            val = d.get(key)
-            if val is not None:
-                return val
-        return default_val
-
-
-#=================================================================
-## Reference non-YAML config
-#=================================================================
-def pywb_config_manual(passed_config = {}):
-
-    config = DictChain(passed_config, DEFAULTS)
-
-    routes = []
-
-    hostpaths = config.get('hostpaths')
-
-    # collections based on cdx source
-    collections = config.get('collections')
-
-    for name, value in collections.iteritems():
-        if isinstance(value, str):
-            value = {'index_paths': value}
-
-        route_config = DictChain(value, config)
-
-        ds_rules_file = route_config.get('domain_specific_rules', None)
-        cdx_server = IndexReader(route_config, ds_rules_file)
-
-        wb_handler = config_utils.create_wb_handler(
-            cdx_server=cdx_server,
-            config=route_config,
-            ds_rules_file=ds_rules_file,
-        )
-
-        logging.debug('Adding Collection: ' + name)
-
-        route_class = route_config.get('route_class', archivalrouter.Route)
-
-        routes.append(route_class(name, wb_handler, config = route_config))
-
-        # cdx query handler
-        if route_config.get('enable_cdx_api', False):
-            routes.append(archivalrouter.Route(name + '-cdx', handlers.CDXHandler(cdx_server)))
-
-
-    if config.get('debug_echo_env', False):
-        routes.append(archivalrouter.Route('echo_env', handlers.DebugEchoEnvHandler()))
-
-    if config.get('debug_echo_req', False):
-        routes.append(archivalrouter.Route('echo_req', handlers.DebugEchoHandler()))
-
-
-    static_routes = config.get('static_routes')
-
-    for static_name, static_path in static_routes.iteritems():
-        routes.append(archivalrouter.Route(static_name, handlers.StaticHandler(static_path)))
-
-    # Check for new proxy mode!
-    if config.get('enable_http_proxy', False):
-        router = proxy.ProxyArchivalRouter
-    else:
-        router = archivalrouter.ArchivalRouter
-
-    # Finally, create wb router
-    return router(
-        routes,
-        # Specify hostnames that pywb will be running on
-        # This will help catch occasionally missed rewrites that fall-through to the host
-        # (See archivalrouter.ReferRedirect)
-        hostpaths = hostpaths,
-
-        abs_path = config.get('absolute_paths', True),
-
-        home_view = config_utils.load_template_file(config.get('home_html'), 'Home Page'),
-        error_view = config_utils.load_template_file(config.get('error_html'), 'Error Page')
-    )
-
-
-
-#=================================================================
-# YAML config loader
-#=================================================================
-DEFAULT_CONFIG_FILE = 'config.yaml'
-
-
-def pywb_config(config_file = None):
-    if not config_file:
-        config_file = os.environ.get('PYWB_CONFIG', DEFAULT_CONFIG_FILE)
-
-    with open(config_file) as fh:
-        config = yaml.load(fh)
-
-    return pywb_config_manual(config)
-
--- a/pywb/utils/canonicalize.py
+++ b/pywb/utils/canonicalize.py
@ -4,6 +4,9 @@
 import surt
 import urlparse

+from wbexception import WbException
+
+
 #=================================================================
 class UrlCanonicalizer(object):
    def __init__(self, surt_ordered=True):
@ -14,7 +17,7 @@ class UrlCanonicalizer(object):


 #=================================================================
-class UrlCanonicalizeException(Exception):
+class UrlCanonicalizeException(WbException):
    def status(self):
        return '400 Bad Request'

@ -164,7 +167,8 @@ def calc_search_range(url, match_type, surt_ordered=True, url_canon=None):

    elif match_type == 'domain':
        if not surt_ordered:
-            raise UrlCanonicalizeException('matchType=domain unsupported for non-surt')
+            msg = 'matchType=domain unsupported for non-surt'
+            raise UrlCanonicalizeException(msg)

        host = start_key.split(')/')[0]

--- a/pywb/utils/dsrules.py
+++ b/pywb/utils/dsrules.py
@ -1,10 +1,9 @@
-import yaml
 import pkgutil
+from loaders import load_yaml_config
+

 #=================================================================
-
-DEFAULT_RULES_FILE = 'rules.yaml'
-DEFAULT_RULES_PKG = 'pywb'
+DEFAULT_RULES_FILE = 'pywb/rules.yaml'


 #=================================================================
@ -23,10 +22,14 @@ class RuleSet(object):

        self.rules = []

-        ds_rules_file = kwargs.get('ds_rules_file')
        default_rule_config = kwargs.get('default_rule_config')

-        config = self.load_default_rules(ds_rules_file)
+        ds_rules_file = kwargs.get('ds_rules_file')
+
+        if not ds_rules_file:
+            ds_rules_file = DEFAULT_RULES_FILE
+
+        config = load_yaml_config(ds_rules_file)

        rulesmap = config.get('rules') if config else None

@ -53,22 +56,6 @@ class RuleSet(object):
        if not def_key_found and default_rule_config is not None:
            self.rules.append(rule_cls(self.DEFAULT_KEY, default_rule_config))

-    @staticmethod
-    def load_default_rules(filename=None, pkg=None):
-        config = None
-
-        if not filename:
-            filename = DEFAULT_RULES_FILE
-
-        if not pkg:
-            pkg = DEFAULT_RULES_PKG
-
-        if filename:
-            yaml_str = pkgutil.get_data(pkg, filename)
-            config = yaml.load(yaml_str)
-
-        return config
-
    def iter_matching(self, urlkey):
        """
        Iterate over all matching rules for given urlkey
--- a/pywb/utils/loaders.py
+++ b/pywb/utils/loaders.py
@ -7,11 +7,20 @@ import os
 import hmac
 import urllib2
 import time
+import pkg_resources


 #=================================================================
 def is_http(filename):
-    return any(filename.startswith(x) for x in ['http://', 'https://'])
+    return filename.startswith(('http://', 'https://'))
+
+
+#=================================================================
+def load_yaml_config(config_file):
+    import yaml
+    configdata = BlockLoader().load(config_file)
+    config = yaml.load(configdata)
+    return config


 #=================================================================
@ -24,27 +33,46 @@ class BlockLoader(object):
    def __init__(self, cookie_maker=None):
        self.cookie_maker = cookie_maker

-    def load(self, url, offset, length):
+    def load(self, url, offset=0, length=-1):
        """
        Determine loading method based on uri
        """
        if is_http(url):
            return self.load_http(url, offset, length)
        else:
-            return self.load_file(url, offset, length)
+            return self.load_file_or_resource(url, offset, length)

-    def load_file(self, url, offset, length):
+    def load_file_or_resource(self, url, offset, length):
        """
        Load a file-like reader from the local file system
        """

+        file_only = False
+
        if url.startswith('file://'):
            url = url[len('file://'):]
+            file_only = True

-        afile = open(url, 'rb')
-        afile.seek(offset)
+        try:
+            # first, try as file
+            afile = open(url, 'rb')

-        if length > 0:
+        except IOError:
+            if file_only:
+                raise
+
+            # then, try as package.path/file
+            pkg_split = url.split('/', 1)
+            if len(pkg_split) == 1:
+                raise
+
+            afile = pkg_resources.resource_stream(pkg_split[0],
+                                                  pkg_split[1])
+
+        if offset > 0:
+            afile.seek(offset)
+
+        if length >= 0:
            return LimitReader(afile, length)
        else:
            return afile
--- a/pywb/utils/test/loaders_test.py
+++ b/pywb/utils/test/loaders_test.py
@ -30,9 +30,9 @@
 >>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb')).readline()
 ' CDX N b a m s k r M S V g\\n'

-#DecompressingBufferedReader readline() with decompression
->>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx.gz', 'rb'), decomp_type = 'gzip').readline()
-' CDX N b a m s k r M S V g\\n'
+#DecompressingBufferedReader readline() with decompression (zipnum file, no header)
+>>> DecompressingBufferedReader(open(test_zip_dir + 'zipnum-sample.cdx.gz', 'rb'), decomp_type = 'gzip').readline()
+'com,example)/ 20140127171200 http://example.com text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1046 334 dupes.warc.gz\\n'

 >>> BlockLoader(HMACCookieMaker('test', 'test', 5)).load('http://example.com', 41, 14).read()
 'Example Domain'
@ -60,7 +60,7 @@ from pywb.utils.bufferedreaders import DecompressingBufferedReader
 from pywb import get_test_dir
 #test_cdx_dir = os.path.dirname(os.path.realpath(__file__)) + '/../sample-data/'
 test_cdx_dir = get_test_dir() + 'cdx/'
-
+test_zip_dir = get_test_dir() + 'zipcdx/'

 def read_multiple(reader, inc_reads):
    result = None
--- a/pywb/utils/timeutils.py
+++ b/pywb/utils/timeutils.py
@ -171,7 +171,6 @@ def timestamp_to_datetime(string):
    # pad to 6 digits
    string = _pad_timestamp(string, PAD_6)

-
    def clamp(val, min_, max_):
        try:
            val = int(val)
--- a/pywb/utils/wbexception.py
+++ b/pywb/utils/wbexception.py
@ -0,0 +1,3 @@
+class WbException(Exception):
+    def status(self):
+        return '500 Internal Server Error'
--- a/pywb/warc/recordloader.py
+++ b/pywb/warc/recordloader.py
@ -9,6 +9,9 @@ from pywb.utils.statusandheaders import StatusAndHeadersParserException
 from pywb.utils.loaders import BlockLoader
 from pywb.utils.bufferedreaders import DecompressingBufferedReader

+from pywb.utils.wbexception import WbException
+
+
 #=================================================================
 ArcWarcRecord = collections.namedtuple('ArchiveRecord',
                                       'type, rec_headers, ' +
@ -16,7 +19,7 @@ ArcWarcRecord = collections.namedtuple('ArchiveRecord',


 #=================================================================
-class ArchiveLoadFailed(Exception):
+class ArchiveLoadFailed(WbException):
    def __init__(self, reason, filename=''):
        super(ArchiveLoadFailed, self).__init__(filename + ':' + str(reason))
        #self.filename = filename
@ -62,9 +65,9 @@ class ArcWarcRecordLoader:
        decomp_type = 'gzip'

        # Create decompressing stream
-        stream = DecompressingBufferedReader(stream = raw,
-                                             decomp_type = decomp_type,
-                                             block_size = self.block_size)
+        stream = DecompressingBufferedReader(stream=raw,
+                                             decomp_type=decomp_type,
+                                             block_size=self.block_size)

        (the_format, rec_headers) = self._detect_type_load_headers(stream)

--- a/pywb/warc/resolvingloader.py
+++ b/pywb/warc/resolvingloader.py
@ -176,6 +176,6 @@ class ResolvingLoader:
        params = {'url': url,
                  'closest': timestamp,
                  'filter': 'digest:' + digest,
-                  'output': 'raw'}
+                  'output': 'cdxobject'}

        return self.cdx_server.load_cdx(**params)
--- a/pywb/wbapp.py
+++ b/pywb/wbapp.py
@ -1,124 +0,0 @@
-from wbexceptions import WbException, NotFoundException, InternalRedirect
-from wbrequestresponse import WbResponse, StatusAndHeaders
-
-from pywb.cdx.cdxserver import CDXException
-from pywb.utils.canonicalize import UrlCanonicalizeException
-from pywb.warc.recordloader import ArchiveLoadFailed
-
-import os
-import importlib
-import logging
-
-
-
-#=================================================================
-# adapted -from wsgiref.request_uri, but doesn't include domain name and allows all characters
-# allowed in the path segment according to: http://tools.ietf.org/html/rfc3986#section-3.3
-# explained here: http://stackoverflow.com/questions/4669692/valid-characters-for-directory-part-of-a-url-for-short-links
-def rel_request_uri(environ, include_query=1):
-    """
-    Return the requested path, optionally including the query string
-
-    # Simple test:
-    >>> rel_request_uri({'PATH_INFO': '/web/example.com'})
-    '/web/example.com'
-
-    # Test all unecoded special chars and double-quote
-    # (double-quote must be encoded but not single quote)
-    >>> rel_request_uri({'PATH_INFO': "/web/example.com/0~!+$&'()*+,;=:\\\""})
-    "/web/example.com/0~!+$&'()*+,;=:%22"
-    """
-    from urllib import quote
-    url = quote(environ.get('PATH_INFO',''), safe='/~!$&\'()*+,;=:@')
-    if include_query and environ.get('QUERY_STRING'):
-        url += '?' + environ['QUERY_STRING']
-
-    return url
-
-#=================================================================
-def create_wb_app(wb_router):
-
-    # Top-level wsgi application
-    def application(env, start_response):
-        if env.get('SCRIPT_NAME') or not env.get('REQUEST_URI'):
-            env['REL_REQUEST_URI'] = rel_request_uri(env)
-        else:
-            env['REL_REQUEST_URI'] = env['REQUEST_URI']
-
-        response = None
-
-        try:
-            response = wb_router(env)
-
-            if not response:
-                raise NotFoundException('No handler for "{0}"'.format(env['REL_REQUEST_URI']))
-
-        except InternalRedirect as ir:
-            response = WbResponse(StatusAndHeaders(ir.status, ir.httpHeaders))
-
-        except (WbException, CDXException,
-                UrlCanonicalizeException, ArchiveLoadFailed) as e:
-            response = handle_exception(env, wb_router.error_view, e, False)
-
-        except Exception as e:
-            response = handle_exception(env, wb_router.error_view, e, True)
-
-        return response(env, start_response)
-
-
-    return application
-
-
-def handle_exception(env, error_view, exc, print_trace):
-    if hasattr(exc, 'status'):
-        status = exc.status()
-    else:
-        status = '400 Bad Request'
-
-    if print_trace:
-        import traceback
-        err_details = traceback.format_exc(exc)
-        print err_details
-    else:
-        logging.info(str(exc))
-        err_details = None
-
-    if error_view:
-        import traceback
-        return error_view.render_response(err_msg = str(exc), err_details = err_details, status = status)
-    else:
-        return WbResponse.text_response(status + ' Error: ' + str(exc), status = status)
-
-
-#=================================================================
-DEFAULT_CONFIG_FILE = 'config.yaml'
-
-def main():
-    try:
-        logging.basicConfig(format = '%(asctime)s: [%(levelname)s]: %(message)s', level = logging.DEBUG)
-
-        # see if there's a custom init module
-        config_name = os.environ.get('PYWB_CONFIG_MODULE')
-
-        if not config_name:
-            # use default module
-            config_name = 'pywb.pywb_init'
-            logging.info('Loading from default config module "{0}"'.format(config_name))
-            logging.info('')
-
-        module = importlib.import_module(config_name)
-
-        app = create_wb_app(module.pywb_config())
-        logging.info('')
-        logging.info('*** pywb inited with settings from {0}.pywb_config()!\n'.format(config_name))
-        return app
-
-    except Exception:
-        logging.exception('*** pywb could not init with settings from {0}.pywb_config()!\n'.format(config_name))
-        raise
-
-#=================================================================
-if __name__ == "__main__":
-    pass
-else:
-    application = main()
--- a/run.sh
+++ b/run.sh
@ -10,14 +10,14 @@ mypath=$(cd `dirname $0` && pwd)
 # ex: my_pywb.pywb_config()
 #export 'PYWB_CONFIG=my_pywb'

-app="pywb.wbapp"
+app="pywb.apps.wayback"

 params="--http-socket :8080 -b 65536"
 #params="--static-map /static=$mypath/static --http-socket :8080 -b 65536"

 if [ -z "$1" ]; then
    # Standard root config
-    params="$params --wsgi pywb.wbapp"
+    params="$params --wsgi $app"
 else
    # run with --mount 
    # requires a file not a package, so creating a mount_run.py to load the package
--- a/sample_archive/cdx/iana.cdx.gz
+++ b/sample_archive/cdx/iana.cdx.gz
--- a/setup.py
+++ b/setup.py
@ -14,7 +14,14 @@ setup(
    license='GPL',
    packages=find_packages(),
    provides=[
-        'pywb','pywb.utils','pywb.cdx','pywb.warc','pywb.rewrite'
+        'pywb',
+        'pywb.utils',
+        'pywb.cdx',
+        'pywb.warc',
+        'pywb.rewrite',
+        'pywb.framework'
+        'pywb.core',
+        'pywb.apps'
        ],
    package_data={
        'pywb': ['ui/*', 'static/*', '*.yaml'],
@ -34,7 +41,6 @@ setup(
        'pyyaml',
        'WebTest',
        'pytest',
-        'werkzeug>=0.9.4',
        ],
    # tests_require=['WebTest', 'pytest'],
    zip_safe=False
--- a/test_config.yaml
+++ b/test_config.yaml
@ -90,6 +90,9 @@ enable_http_proxy: true
 # enable cdx server api for querying cdx directly (experimental)
 enable_cdx_api: true

+# test different port
+port: 9000
+
 # optional reporter callback func
 # if set, called with request and cdx object
 reporter: !!python/object/new:tests.fixture.PrintReporter []
--- a/tests/test_cdx_server_app.py
+++ b/tests/test_cdx_server_app.py
@ -1,32 +1,26 @@
-import os
 import re
+import webtest

-import pytest
 from urllib import urlencode

-from werkzeug.test import Client
-from werkzeug.wrappers import BaseResponse, Response
-
-import yaml
-
 from pywb.cdx.cdxobject import CDXObject
-from pywb.cdx.wsgi_cdxserver import create_app
+from pywb.apps.cdx_server import application

-from tests.fixture import testconfig
+import pytest

+#================================================================
@pytest.fixture
-def client(testconfig):
-    app = create_app(testconfig)
-    return Client(app, Response)
+def client():
+    return webtest.TestApp(application)

-# ================================================================

-def query(client, url, **params):
+#================================================================
+def query(client, url, is_error=False, **params):
    params['url'] = url
-    return client.get('/cdx?' + urlencode(params, doseq=1))
+    return client.get('/cdx?' + urlencode(params, doseq=1), expect_errors=is_error)

-# ================================================================

+#================================================================
 def test_exact_url(client):
    """
    basic exact match, no filters, etc.
@ -34,48 +28,54 @@ def test_exact_url(client):
    resp = query(client, 'http://www.iana.org/')

    assert resp.status_code == 200
-    print resp.data
+    print resp.body

+
+#================================================================
 def test_prefix_match(client):
    """
    prefix match test
    """
    resp = query(client, 'http://www.iana.org/', matchType='prefix')

-    print resp.data.splitlines()
+    print resp.body.splitlines()
    assert resp.status_code == 200

    suburls = 0
-    for l in resp.data.splitlines():
+    for l in resp.body.splitlines():
        fields = l.split(' ')
        if len(fields[0]) > len('org,iana)/'):
            suburls += 1
    assert suburls > 0
-               
+
+
+#================================================================
 def test_filters(client):
    """
    filter cdxes by mimetype and filename field, exact match.
    """
    resp = query(client, 'http://www.iana.org/_css/2013.1/screen.css',
                 filter=('mimetype:warc/revisit', 'filename:dupes.warc.gz'))
-    
-    assert resp.status_code == 200
-    assert resp.mimetype == 'text/plain'

-    for l in resp.data.splitlines():
+    assert resp.status_code == 200
+    assert resp.content_type == 'text/plain'
+
+    for l in resp.body.splitlines():
        fields = l.split(' ')
        assert fields[0] == 'org,iana)/_css/2013.1/screen.css'
        assert fields[3] == 'warc/revisit'
        assert fields[10] == 'dupes.warc.gz'

+
+#================================================================
 def test_limit(client):
    resp = query(client, 'http://www.iana.org/_css/2013.1/screen.css',
                 limit='1')

    assert resp.status_code == 200
-    assert resp.mimetype == 'text/plain'
+    assert resp.content_type == 'text/plain'

-    cdxes = resp.data.splitlines()
+    cdxes = resp.body.splitlines()
    assert len(cdxes) == 1
    fields = cdxes[0].split(' ')
    assert fields[0] == 'org,iana)/_css/2013.1/screen.css'
@ -86,15 +86,17 @@ def test_limit(client):
                 limit='1', reverse='1')

    assert resp.status_code == 200
-    assert resp.mimetype == 'text/plain'
+    assert resp.content_type == 'text/plain'

-    cdxes = resp.data.splitlines()
+    cdxes = resp.body.splitlines()
    assert len(cdxes) == 1
    fields = cdxes[0].split(' ')
    assert fields[0] == 'org,iana)/_css/2013.1/screen.css'
    assert fields[1] == '20140127171239'
    assert fields[3] == 'warc/revisit'

+
+#================================================================
 def test_fields(client):
    """
    retrieve subset of fields with ``fields`` parameter.
@ -104,7 +106,7 @@ def test_fields(client):

    assert resp.status_code == 200

-    cdxes = resp.data.splitlines()
+    cdxes = resp.body.splitlines()

    for cdx in cdxes:
        fields = cdx.split(' ')
@ -113,16 +115,21 @@ def test_fields(client):
        assert re.match(r'\d{14}$', fields[1])
        assert re.match(r'\d{3}|-', fields[2])

+
+#================================================================
 def test_fields_undefined(client):
    """
-    server shall respond with Bad Request (TODO: with proper explanation),
+    server shall respond with Bad Request and name of undefined
    when ``fields`` parameter contains undefined name(s).
    """
    resp = query(client, 'http://www.iana.org/_css/2013.1/print.css',
+                 is_error=True,
                 fields='urlkey,nosuchfield')

    resp.status_code == 400
-    
+
+
+#================================================================
 def test_resolveRevisits(client):
    """
    with ``resolveRevisits=true``, server adds three fields pointing to
@ -132,9 +139,9 @@ def test_resolveRevisits(client):
                 resolveRevisits='true'
                 )
    assert resp.status_code == 200
-    assert resp.mimetype == 'text/plain'
+    assert resp.content_type == 'text/plain'

-    cdxes = resp.data.splitlines()
+    cdxes = resp.body.splitlines()
    originals = {}
    for cdx in cdxes:
        fields = cdx.split(' ')
@ -151,6 +158,8 @@ def test_resolveRevisits(client):
            orig = originals.get(sha)
            assert orig == (int(orig_size), int(orig_offset), orig_fn)

+
+#================================================================
 def test_resolveRevisits_orig_fields(client):
    """
    when resolveRevisits=true, extra three fields are named
@ -162,9 +171,9 @@ def test_resolveRevisits_orig_fields(client):
                 fields='urlkey,orig.length,orig.offset,orig.filename'
                 )
    assert resp.status_code == 200
-    assert resp.mimetype == 'text/plain'
+    assert resp.content_type == 'text/plain'

-    cdxes = resp.data.splitlines()
+    cdxes = resp.body.splitlines()
    for cdx in cdxes:
        fields = cdx.split(' ')
        assert len(fields) == 4
@ -172,6 +181,8 @@ def test_resolveRevisits_orig_fields(client):
        assert (orig_len == '-' and orig_offset == '-' and orig_fn == '-' or
                (int(orig_len), int(orig_offset), orig_fn))

+
+#================================================================
 def test_collapseTime_resolveRevisits_reverse(client):
    resp = query(client, 'http://www.iana.org/_css/2013.1/print.css',
                 collapseTime='11',
@ -179,11 +190,10 @@ def test_collapseTime_resolveRevisits_reverse(client):
                 reverse='true'
                 )

-    cdxes = [CDXObject(l) for l in resp.data.splitlines()]
-    
+    cdxes = [CDXObject(l) for l in resp.body.splitlines()]
+
    assert len(cdxes) == 3

    # timestamp is in descending order
    for i in range(len(cdxes) - 1):
        assert cdxes[i]['timestamp'] >= cdxes[i + 1]['timestamp']
-
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@ -1,6 +1,6 @@
 import webtest
-from pywb.pywb_init import pywb_config
-from pywb.wbapp import create_wb_app
+from pywb.core.pywb_init import create_wb_router
+from pywb.framework.wsgi_wrappers import init_app
 from pywb.cdx.cdxobject import CDXObject

 from fixture import TestExclusionPerms
@ -11,8 +11,13 @@ class TestWb:
    def setup(self):
        #self.app = pywb.wbapp.create_wb_app(pywb.pywb_init.pywb_config())
        # save it in self - useful for debugging
-        self.router = pywb_config(self.TEST_CONFIG)
-        self.app = create_wb_app(self.router)
+        self.app = init_app(create_wb_router,
+                            load_yaml=True,
+                            config_file=self.TEST_CONFIG)
+
+        #self.router = pywb_config(self.TEST_CONFIG)
+        #self.app = create_wb_app(self.router)
+
        self.testapp = webtest.TestApp(self.app)

    def _assert_basic_html(self, resp):