remove obsolete code and tests!

disable test_auto_colls for now until fully supported in new system
2025-03-15 00:03:28 +01:00 · 2017-04-24 18:52:15 -07:00 · 2017-04-24 18:52:15 -07:00 · 52dc46fe6a
commit 52dc46fe6a
parent 24c968640d
37 changed files with 2 additions and 5864 deletions
--- a/pywb/cdx/README.md
+++ b/pywb/cdx/README.md
@ -1,28 +0,0 @@
-### pywb.cdx package
-
-This package contains the CDX processing suite of the pywb wayback tool suite.
-
-The CDX Server loads, filters and transforms cdx from multiple sources in response
-to a given query.
-
-#### Sample App
-
-A very simple reference WSGI app is included.
-
-Run: `python -m pywb.cdx.wsgi_cdxserver` to start the app, keyboard interrupt to stop.
-
-The default [config.yaml](config.yaml) points to the sample data directory
-and uses port 8080.
-
-The domain specific [rules.yaml](rules.yaml) are also loaded.
-
-#### CDX Server API Reference
-
-Goal is to provide compatiblity with this feature set and more:
-https://github.com/internetarchive/wayback/tree/master/wayback-cdx-server
-
-TODO
-
-
-
-
--- a/pywb/cdx/cdxdomainspecific.py
+++ b/pywb/cdx/cdxdomainspecific.py
@ -1,185 +0,0 @@
-import yaml
-import re
-import logging
-import pkg_resources
-
-from six.moves.urllib.parse import urlsplit
-
-from pywb.utils.dsrules import BaseRule, RuleSet
-
-from pywb.utils.canonicalize import unsurt, UrlCanonicalizer
-from pywb.utils.loaders import to_native_str
-
-
-#=================================================================
-def load_domain_specific_cdx_rules(ds_rules_file, surt_ordered):
-    canon = None
-    fuzzy = None
-
-    # Load Canonicalizer Rules
-    rules = RuleSet(CDXDomainSpecificRule, 'canonicalize',
-                    ds_rules_file=ds_rules_file)
-
-    if not surt_ordered:
-        for rule in rules.rules:
-            rule.unsurt()
-
-    if rules:
-        canon = CustomUrlCanonicalizer(rules, surt_ordered)
-
-    # Load Fuzzy Lookup Rules
-    rules = RuleSet(CDXDomainSpecificRule, 'fuzzy_lookup',
-                    ds_rules_file=ds_rules_file)
-
-    if not surt_ordered:
-        for rule in rules.rules:
-            rule.unsurt()
-
-    if rules:
-        fuzzy = FuzzyQuery(rules)
-
-    logging.debug('CustomCanonilizer? ' + str(bool(canon)))
-    logging.debug('FuzzyMatcher? ' + str(bool(canon)))
-    return (canon, fuzzy)
-
-
-#=================================================================
-class CustomUrlCanonicalizer(UrlCanonicalizer):
-    def __init__(self, rules, surt_ordered=True):
-        super(CustomUrlCanonicalizer, self).__init__(surt_ordered)
-        self.rules = rules
-
-    def __call__(self, url):
-        urlkey = super(CustomUrlCanonicalizer, self).__call__(url)
-
-        for rule in self.rules.iter_matching(urlkey):
-            m = rule.regex.match(urlkey)
-            if not m:
-                continue
-
-            if rule.replace:
-                return m.expand(rule.replace)
-
-        return urlkey
-
-
-#=================================================================
-class FuzzyQuery(object):
-    def __init__(self, rules):
-        self.rules = rules
-
-    def __call__(self, query):
-        matched_rule = None
-
-        urlkey = to_native_str(query.key, 'utf-8')
-        url = query.url
-        filter_ = query.filters
-        output = query.output
-
-        for rule in self.rules.iter_matching(urlkey):
-            m = rule.regex.search(urlkey)
-            if not m:
-                continue
-
-            matched_rule = rule
-
-            groups = m.groups()
-            for g in groups:
-                for f in matched_rule.filter:
-                    filter_.append(f.format(g))
-
-            break
-
-        if not matched_rule:
-            return None
-
-        repl = '?'
-        if matched_rule.replace:
-            repl = matched_rule.replace
-
-        inx = url.find(repl)
-        if inx > 0:
-            url = url[:inx + len(repl)]
-
-        if matched_rule.match_type == 'domain':
-            host = urlsplit(url).netloc
-            # remove the subdomain
-            url = host.split('.', 1)[1]
-
-        params = query.params
-        params.update({'url': url,
-                       'matchType': matched_rule.match_type,
-                       'filter': filter_})
-
-        if 'reverse' in params:
-            del params['reverse']
-
-        if 'closest' in params:
-            del params['closest']
-
-        if 'end_key' in params:
-            del params['end_key']
-
-        return params
-
-
-#=================================================================
-class CDXDomainSpecificRule(BaseRule):
-    DEFAULT_FILTER = ['~urlkey:{0}']
-    DEFAULT_MATCH_TYPE = 'prefix'
-
-    def __init__(self, name, config):
-        super(CDXDomainSpecificRule, self).__init__(name, config)
-
-        if not isinstance(config, dict):
-            self.regex = self.make_regex(config)
-            self.replace = None
-            self.filter = self.DEFAULT_FILTER
-            self.match_type = self.DEFAULT_MATCH_TYPE
-        else:
-            self.regex = self.make_regex(config.get('match'))
-            self.replace = config.get('replace')
-            self.filter = config.get('filter', self.DEFAULT_FILTER)
-            self.match_type = config.get('type', self.DEFAULT_MATCH_TYPE)
-
-    def unsurt(self):
-        """
-        urlkey is assumed to be in surt format by default
-        In the case of non-surt format, this method is called
-        to desurt any urls
-        """
-        self.url_prefix = list(map(unsurt, self.url_prefix))
-        if self.regex:
-            self.regex = re.compile(unsurt(self.regex.pattern))
-
-        if self.replace:
-            self.replace = unsurt(self.replace)
-
-    @staticmethod
-    def make_regex(config):
-        # just query args
-        if isinstance(config, list):
-            string = CDXDomainSpecificRule.make_query_match_regex(config)
-
-        # split out base and args
-        elif isinstance(config, dict):
-            string = config.get('regex', '')
-            string += CDXDomainSpecificRule.make_query_match_regex(
-                      config.get('args', []))
-
-        # else assume string
-        else:
-            string = str(config)
-
-        return re.compile(string)
-
-    @staticmethod
-    def make_query_match_regex(params_list):
-        params_list.sort()
-
-        def conv(value):
-            return '[?&]({0}=[^&]+)'.format(re.escape(value))
-
-        params_list = list(map(conv, params_list))
-        final_str = '.*'.join(params_list)
-        return final_str
--- a/pywb/cdx/cdxserver.py
+++ b/pywb/cdx/cdxserver.py
@ -1,230 +0,0 @@
-from pywb.utils.canonicalize import UrlCanonicalizer
-from pywb.utils.wbexception import NotFoundException
-
-from pywb.cdx.cdxops import cdx_load
-from pywb.cdx.cdxsource import CDXSource, CDXFile, RemoteCDXSource, RedisCDXSource
-from pywb.cdx.zipnum import ZipNumCluster
-from pywb.cdx.cdxobject import CDXObject, CDXException
-from pywb.cdx.query import CDXQuery
-from pywb.cdx.cdxdomainspecific import load_domain_specific_cdx_rules
-
-from pywb.utils.loaders import is_http
-
-from itertools import chain
-import logging
-import os
-
-
-#=================================================================
-class BaseCDXServer(object):
-    def __init__(self, **kwargs):
-        ds_rules_file = kwargs.get('ds_rules_file')
-        surt_ordered = kwargs.get('surt_ordered', True)
-
-        # load from domain-specific rules
-        if ds_rules_file:
-            self.url_canon, self.fuzzy_query = (
-                load_domain_specific_cdx_rules(ds_rules_file, surt_ordered))
-        # or custom passed in canonicalizer
-        else:
-            self.url_canon = kwargs.get('url_canon')
-            self.fuzzy_query = kwargs.get('fuzzy_query')
-
-        # set default canonicalizer if none set thus far
-        if not self.url_canon:
-            self.url_canon = UrlCanonicalizer(surt_ordered)
-
-    def _check_cdx_iter(self, cdx_iter, query):
-        """ Check cdx iter semantics
-        If `cdx_iter` is empty (no matches), check if fuzzy matching
-        is allowed, and try it -- otherwise,
-        throw :exc:`~pywb.utils.wbexception.NotFoundException`
-        """
-
-        cdx_iter = self.peek_iter(cdx_iter)
-
-        if cdx_iter:
-            return cdx_iter
-
-        # check if fuzzy is allowed and ensure that its an
-        # exact match
-        if (self.fuzzy_query and
-            query.allow_fuzzy and
-            query.is_exact):
-
-            fuzzy_query_params = self.fuzzy_query(query)
-            if fuzzy_query_params:
-                return self.load_cdx(**fuzzy_query_params)
-
-        msg = 'No Captures found for: ' + query.url
-        if not query.is_exact:
-            msg += ' (' + query.match_type + ' query)'
-
-        raise NotFoundException(msg, url=query.url)
-
-    #def _calc_search_keys(self, query):
-    #    return calc_search_range(url=query.url,
-    #                             match_type=query.match_type,
-    #                             url_canon=self.url_canon)
-
-    def load_cdx(self, **params):
-        params['_url_canon'] = self.url_canon
-        query = CDXQuery(params)
-
-        #key, end_key = self._calc_search_keys(query)
-        #query.set_key(key, end_key)
-
-        cdx_iter = self._load_cdx_query(query)
-
-        return self._check_cdx_iter(cdx_iter, query)
-
-    def _load_cdx_query(self, query):  # pragma: no cover
-        raise NotImplementedError('Implement in subclass')
-
-    @staticmethod
-    def peek_iter(iterable):
-        try:
-            first = next(iterable)
-        except StopIteration:
-            return None
-
-        return chain([first], iterable)
-
-
-#=================================================================
-class CDXServer(BaseCDXServer):
-    """
-    Top-level cdx server object which maintains a list of cdx sources,
-    responds to queries and dispatches to the cdx ops for processing
-    """
-
-    def __init__(self, paths, **kwargs):
-        super(CDXServer, self).__init__(**kwargs)
-        # TODO: we could save config in member, so that other
-        # methods can use it. it's bad for add_cdx_source to take
-        # config argument.
-        self._create_cdx_sources(paths, kwargs.get('config'))
-
-    def _load_cdx_query(self, query):
-        """
-        load CDX for query parameters ``params``.
-        ``key`` (or ``url``) parameter specifies URL to query,
-        ``matchType`` parameter specifies matching method for ``key``
-        (default ``exact``).
-        other parameters are passed down to :func:`cdx_load`.
-        raises :exc:`~pywb.utils.wbexception.NotFoundException`
-        if no captures are found.
-
-        :param query: query parameters
-        :type query: :class:`~pywb.cdx.query.CDXQuery`
-        :rtype: iterator on :class:`~pywb.cdx.cdxobject.CDXObject`
-        """
-        return cdx_load(self.sources, query)
-
-    def _create_cdx_sources(self, paths, config):
-        """
-        build CDXSource instances for each of path in ``paths``.
-
-        :param paths: list of sources or single source.
-        each source may be either string or CDXSource instance. value
-        of any other types will be silently ignored.
-        :param config: config object passed to :method:`add_cdx_source`.
-        """
-        self.sources = []
-
-        if paths is not None:
-            if not isinstance(paths, (list, tuple)):
-                paths = [paths]
-
-            for path in paths:
-                self.add_cdx_source(path, config)
-
-        if len(self.sources) == 0:
-            logging.warn('No CDX Sources configured from paths=%s', paths)
-
-    def _add_cdx_source(self, source):
-        if source is None:
-            return
-
-        logging.debug('Adding CDX Source: %s', source)
-        self.sources.append(source)
-
-    def add_cdx_source(self, source, config):
-        if isinstance(source, CDXSource):
-            self._add_cdx_source(source)
-
-        elif isinstance(source, str):
-            if os.path.isdir(source):
-                for fn in os.listdir(source):
-                    self._add_cdx_source(self._create_cdx_source(
-                        os.path.join(source, fn), config))
-            else:
-                self._add_cdx_source(self._create_cdx_source(
-                    source, config))
-
-    def _create_cdx_source(self, filename, config):
-        if is_http(filename):
-            return RemoteCDXSource(filename)
-
-        if filename.startswith('redis://'):
-            return RedisCDXSource(filename, config)
-
-        if filename.endswith(('.cdx', '.cdxj')):
-            return CDXFile(filename)
-
-        if filename.endswith(('.summary', '.idx')):
-            return ZipNumCluster(filename, config)
-
-        # no warning for .loc or .gz (zipnum)
-        if not filename.endswith(('.loc', '.gz')):
-            logging.warn('skipping unrecognized URI: %s', filename)
-
-        return None
-
-
-#=================================================================
-class RemoteCDXServer(BaseCDXServer):
-    """
-    A special cdx server that uses a single
-    :class:`~pywb.cdx.cdxsource.RemoteCDXSource`.
-    It simply proxies the query params to the remote source
-    and performs no local processing/filtering
-    """
-    def __init__(self, source, **kwargs):
-        super(RemoteCDXServer, self).__init__(**kwargs)
-
-        if isinstance(source, RemoteCDXSource):
-            self.source = source
-        elif (isinstance(source, str) and is_http(source)):
-            self.source = RemoteCDXSource(source, remote_processing=True)
-        else:
-            raise Exception('Invalid remote cdx source: ' + str(source))
-
-    def _load_cdx_query(self, query):
-        return cdx_load([self.source], query, process=False)
-
-
-#=================================================================
-def create_cdx_server(config, ds_rules_file=None, server_cls=None):
-    if hasattr(config, 'get'):
-        paths = config.get('index_paths')
-        surt_ordered = config.get('surt_ordered', True)
-        pass_config = config
-    else:
-        paths = config
-        surt_ordered = True
-        pass_config = None
-
-    logging.debug('CDX Surt-Ordered? ' + str(surt_ordered))
-
-    if not server_cls:
-        if ((isinstance(paths, str) and is_http(paths)) or
-            isinstance(paths, RemoteCDXSource)):
-            server_cls = RemoteCDXServer
-        else:
-            server_cls = CDXServer
-
-    return server_cls(paths,
-                      config=pass_config,
-                      surt_ordered=surt_ordered,
-                      ds_rules_file=ds_rules_file)
--- a/pywb/cdx/cdxsource.py
+++ b/pywb/cdx/cdxsource.py
@ -1,150 +0,0 @@
-from pywb.utils.binsearch import iter_range
-
-from pywb.utils.wbexception import AccessException, NotFoundException
-from pywb.utils.wbexception import BadRequestException, WbException
-
-from pywb.cdx.query import CDXQuery
-
-from six.moves.urllib.request import urlopen, Request
-from six.moves.urllib.error import HTTPError
-from six.moves import map
-
-
-#=================================================================
-class CDXSource(object):
-    """
-    Represents any cdx index source
-    """
-    def load_cdx(self, query):  # pragma: no cover
-        raise NotImplementedError('Implement in subclass')
-
-
-#=================================================================
-class CDXFile(CDXSource):
-    """
-    Represents a local plain-text .cdx file
-    """
-    def __init__(self, filename):
-        self.filename = filename
-
-    def load_cdx(self, query):
-        return self._do_load_file(self.filename, query)
-
-    @staticmethod
-    def _do_load_file(filename, query):
-        with open(filename, 'rb') as source:
-            gen = iter_range(source, query.key,
-                                     query.end_key)
-            for line in gen:
-                yield line
-
-    def __str__(self):
-        return 'CDX File - ' + self.filename
-
-
-#=================================================================
-class RemoteCDXSource(CDXSource):
-    """
-    Represents a remote cdx server, to which requests will be proxied.
-
-    Only ``url`` and ``match_type`` params are proxied at this time,
-    the stream is passed through all other filters locally.
-    """
-    def __init__(self, filename, cookie=None, remote_processing=False):
-        self.remote_url = filename
-        self.cookie = cookie
-        self.remote_processing = remote_processing
-
-    def load_cdx(self, query):
-        if self.remote_processing:
-            remote_query = query
-        else:
-            # Only send url and matchType to remote
-            remote_query = CDXQuery(dict(url=query.url,
-                                         matchType=query.match_type))
-
-        urlparams = remote_query.urlencode()
-
-        try:
-            request = Request(self.remote_url + '?' + urlparams)
-
-            if self.cookie:
-                request.add_header('Cookie', self.cookie)
-
-            response = urlopen(request)
-
-        except HTTPError as e:
-            if e.code == 403:
-                raise AccessException('Access Denied')
-            elif e.code == 404:
-                # return empty list for consistency with other cdx sources
-                # will be converted to 404 if no other retry
-                return []
-            elif e.code == 400:
-                raise BadRequestException()
-            else:
-                raise WbException('Invalid response from remote cdx server')
-
-        return iter(response)
-
-    def __str__(self):
-        if self.remote_processing:
-            return 'Remote CDX Server: ' + self.remote_url
-        else:
-            return 'Remote CDX Source: ' + self.remote_url
-
-
-#=================================================================
-class RedisCDXSource(CDXSource):
-    DEFAULT_KEY_PREFIX = b'c:'
-
-    def __init__(self, redis_url, config=None):
-        import redis
-
-        parts = redis_url.split('/')
-        if len(parts) > 4:
-            self.cdx_key = parts[4].encode('utf-8')
-            redis_url = 'redis://' + parts[2] + '/' + parts[3]
-        else:
-            self.cdx_key = None
-
-        self.redis_url = redis_url
-        self.redis = redis.StrictRedis.from_url(redis_url)
-
-        self.key_prefix = self.DEFAULT_KEY_PREFIX
-
-    def load_cdx(self, query):
-        """
-        Load cdx from redis cache, from an ordered list
-
-        If cdx_key is set, treat it as cdx file and load use
-        zrangebylex! (Supports all match types!)
-
-        Otherwise, assume a key per-url and load all entries for that key.
-        (Only exact match supported)
-        """
-
-        if self.cdx_key:
-            return self.load_sorted_range(query, self.cdx_key)
-        else:
-            return self.load_single_key(query.key)
-
-    def load_sorted_range(self, query, cdx_key):
-        cdx_list = self.redis.zrangebylex(cdx_key,
-                                          b'[' + query.key,
-                                          b'(' + query.end_key)
-
-        return iter(cdx_list)
-
-    def load_single_key(self, key):
-        # ensure only url/surt is part of key
-        key = key.split(b' ')[0]
-        cdx_list = self.redis.zrange(self.key_prefix + key, 0, -1)
-
-        # key is not part of list, so prepend to each line
-        key += b' '
-        cdx_list = list(map(lambda x: key + x, cdx_list))
-        return cdx_list
-
-    def __str__(self):
-        return 'Redis - ' + self.redis_url
--- a/pywb/cdx/test/test_cdxdomainspecific.py
+++ b/pywb/cdx/test/test_cdxdomainspecific.py
@ -1,40 +0,0 @@
-r"""
-Load Rules
-
->>> (canon, fuzzy) = load_domain_specific_cdx_rules(None, True)
->>> canon('http://test.example.example/path/index.html?a=b&id=value&c=d')
-'example,example,test)/path/index.html?id=value'
-
-
-# Fuzzy Query Args Builder
->>> CDXDomainSpecificRule.make_query_match_regex(['para', 'id', 'abc'])
-'[?&](abc=[^&]+).*[?&](id=[^&]+).*[?&](para=[^&]+)'
-
->>> CDXDomainSpecificRule.make_query_match_regex(['id[0]', 'abc()'])
-'[?&](abc\\(\\)=[^&]+).*[?&](id\\[0\\]=[^&]+)'
-
-
-# Fuzzy Match Query + Args
-
-# list
->>> CDXDomainSpecificRule.make_regex(['para', 'id', 'abc']).pattern
-'[?&](abc=[^&]+).*[?&](id=[^&]+).*[?&](para=[^&]+)'
-
-# dict
->>> CDXDomainSpecificRule.make_regex(dict(regex='com,test,.*\)/', args=['para', 'id', 'abc'])).pattern
-'com,test,.*\\)/[?&](abc=[^&]+).*[?&](id=[^&]+).*[?&](para=[^&]+)'
-
-# string
->>> CDXDomainSpecificRule.make_regex('com,test,.*\)/[?&](abc=[^&]+).*[?&](id=[^&]+).*[?&](para=[^&]+)').pattern
-'com,test,.*\\)/[?&](abc=[^&]+).*[?&](id=[^&]+).*[?&](para=[^&]+)'
-
-"""
-
-
-from pywb.cdx.cdxdomainspecific import CDXDomainSpecificRule
-from pywb.cdx.cdxdomainspecific import load_domain_specific_cdx_rules
-
-
-if __name__ == "__main__":
-    import doctest
-    doctest.testmod()
--- a/pywb/cdx/test/test_cdxops.py
+++ b/pywb/cdx/test/test_cdxops.py
@ -1,228 +0,0 @@
-#=================================================================
-"""
-# Merge Sort Multipe CDX Sources
->>> cdx_ops_test(url = 'http://iana.org/', sources = [test_cdx_dir + 'dupes.cdx', test_cdx_dir + 'iana.cdx'])
-org,iana)/ 20140126200624 http://www.iana.org/ text/html 200 OSSAPWJ23L56IYVRW3GFEAR4MCJMGPTB - - 2258 334 iana.warc.gz
-org,iana)/ 20140127171238 http://iana.org unk 302 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ - - 343 1858 dupes.warc.gz
-org,iana)/ 20140127171238 http://www.iana.org/ warc/revisit - OSSAPWJ23L56IYVRW3GFEAR4MCJMGPTB - - 536 2678 dupes.warc.gz
-
-
-# Limit CDX Stream
->>> cdx_ops_test('http://iana.org/_css/2013.1/fonts/opensans-bold.ttf', limit = 3)
-org,iana)/_css/2013.1/fonts/opensans-bold.ttf 20140126200625 http://www.iana.org/_css/2013.1/fonts/OpenSans-Bold.ttf application/octet-stream 200 YFUR5ALIWJMWV6FAAFRLVRQNXZQF5HRW - - 117166 198285 iana.warc.gz
-org,iana)/_css/2013.1/fonts/opensans-bold.ttf 20140126200654 http://www.iana.org/_css/2013.1/fonts/OpenSans-Bold.ttf warc/revisit - YFUR5ALIWJMWV6FAAFRLVRQNXZQF5HRW - - 548 482544 iana.warc.gz
-org,iana)/_css/2013.1/fonts/opensans-bold.ttf 20140126200706 http://www.iana.org/_css/2013.1/fonts/OpenSans-Bold.ttf warc/revisit - YFUR5ALIWJMWV6FAAFRLVRQNXZQF5HRW - - 552 495230 iana.warc.gz
-
-
-# Reverse CDX Stream
->>> cdx_ops_test('http://iana.org/_css/2013.1/fonts/opensans-bold.ttf', reverse = True, resolveRevisits = True, limit = 3)
-org,iana)/_css/2013.1/fonts/opensans-bold.ttf 20140126201308 https://www.iana.org/_css/2013.1/fonts/OpenSans-Bold.ttf application/octet-stream 200 YFUR5ALIWJMWV6FAAFRLVRQNXZQF5HRW - - 551 783712 iana.warc.gz 117166 198285 iana.warc.gz
-org,iana)/_css/2013.1/fonts/opensans-bold.ttf 20140126201249 http://www.iana.org/_css/2013.1/fonts/OpenSans-Bold.ttf application/octet-stream 200 YFUR5ALIWJMWV6FAAFRLVRQNXZQF5HRW - - 552 771773 iana.warc.gz 117166 198285 iana.warc.gz
-org,iana)/_css/2013.1/fonts/opensans-bold.ttf 20140126201240 http://www.iana.org/_css/2013.1/fonts/OpenSans-Bold.ttf application/octet-stream 200 YFUR5ALIWJMWV6FAAFRLVRQNXZQF5HRW - - 551 757988 iana.warc.gz 117166 198285 iana.warc.gz
-
->>> cdx_ops_test('http://iana.org/_js/2013.1/jquery.js', reverse = True, resolveRevisits = True, limit = 1)
-org,iana)/_js/2013.1/jquery.js 20140126201307 https://www.iana.org/_js/2013.1/jquery.js application/x-javascript 200 AAW2RS7JB7HTF666XNZDQYJFA6PDQBPO - - 543 778507 iana.warc.gz 33449 7311 iana.warc.gz
-
-# From & To
->>> cdx_ops_test('http://example.com/', sources = [test_cdx_dir], from_ts='2013', to='2013')
-com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 example-url-agnostic-revisit.warc.gz
-
->>> cdx_ops_test('http://example.com/', sources = [test_cdx_dir], from_ts='2014')
-com,example)/ 20140127171200 http://example.com text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1046 334 dupes.warc.gz
-com,example)/ 20140127171251 http://example.com warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 553 11875 dupes.warc.gz
-
->>> cdx_ops_test('http://example.com/', sources = [test_cdx_dir], to='2012')  # doctest: +IGNORE_EXCEPTION_DETAIL
-Traceback (most recent call last):
-NotFoundException: No Captures found for: http://example.com/
-
-# No matching results
->>> cdx_ops_test('http://iana.org/dont_have_this', reverse = True, resolveRevisits = True, limit = 2)  # doctest: +IGNORE_EXCEPTION_DETAIL
-Traceback (most recent call last):
-NotFoundException: No Captures found for: http://iana.org/dont_have_this
-
-# No matching -- limit=1
->>> cdx_ops_test('http://iana.org/dont_have_this', reverse = True, resolveRevisits = True, limit = 1)  # doctest: +IGNORE_EXCEPTION_DETAIL
-Traceback (most recent call last):
-NotFoundException: No Captures found for: http://iana.org/dont_have_this
-
-# Filter cdx (default: regex)
->>> cdx_ops_test(url = 'http://iana.org/domains', matchType = 'prefix', filter = ['mimetype:text/html'])
-org,iana)/domains 20140126200825 http://www.iana.org/domains text/html 200 7UPSCLNWNZP33LGW6OJGSF2Y4CDG4ES7 - - 2912 610534 iana.warc.gz
-org,iana)/domains/arpa 20140126201248 http://www.iana.org/domains/arpa text/html 200 QOFZZRN6JIKAL2JRL6ZC2VVG42SPKGHT - - 2939 759039 iana.warc.gz
-org,iana)/domains/idn-tables 20140126201127 http://www.iana.org/domains/idn-tables text/html 200 HNCUFTJMOQOGAEY6T56KVC3T7TVLKGEW - - 8118 715878 iana.warc.gz
-org,iana)/domains/int 20140126201239 http://www.iana.org/domains/int text/html 200 X32BBNNORV4SPEHTQF5KI5NFHSKTZK6Q - - 2482 746788 iana.warc.gz
-org,iana)/domains/reserved 20140126201054 http://www.iana.org/domains/reserved text/html 200 R5AAEQX5XY5X5DG66B23ODN5DUBWRA27 - - 3573 701457 iana.warc.gz
-org,iana)/domains/root 20140126200912 http://www.iana.org/domains/root text/html 200 YWA2R6UVWCYNHBZJKBTPYPZ5CJWKGGUX - - 2691 657746 iana.warc.gz
-org,iana)/domains/root/db 20140126200927 http://www.iana.org/domains/root/db/ text/html 302 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ - - 446 671278 iana.warc.gz
-org,iana)/domains/root/db 20140126200928 http://www.iana.org/domains/root/db text/html 200 DHXA725IW5VJJFRTWBQT6BEZKRE7H57S - - 18365 672225 iana.warc.gz
-org,iana)/domains/root/servers 20140126201227 http://www.iana.org/domains/root/servers text/html 200 AFW34N3S4NK2RJ6QWMVPB5E2AIUETAHU - - 3137 733840 iana.warc.gz
-
->>> cdx_ops_test(url = 'http://iana.org/_css/2013.1/screen.css', filter = 'statuscode:200')
-org,iana)/_css/2013.1/screen.css 20140126200625 http://www.iana.org/_css/2013.1/screen.css text/css 200 BUAEPXZNN44AIX3NLXON4QDV6OY2H5QD - - 8754 41238 iana.warc.gz
-
-# Filter Alt field name
->>> cdx_ops_test(url = 'http://iana.org/_css/2013.1/screen.css', filter = 'status:200')
-org,iana)/_css/2013.1/screen.css 20140126200625 http://www.iana.org/_css/2013.1/screen.css text/css 200 BUAEPXZNN44AIX3NLXON4QDV6OY2H5QD - - 8754 41238 iana.warc.gz
-
-# Filter -- no field specified, match regex on entire line
->>> cdx_ops_test(url = 'http://iana.org/_css/2013.1/screen.css', filter = '~screen.css 20140126200625')
-org,iana)/_css/2013.1/screen.css 20140126200625 http://www.iana.org/_css/2013.1/screen.css text/css 200 BUAEPXZNN44AIX3NLXON4QDV6OY2H5QD - - 8754 41238 iana.warc.gz
-
-# Filter -- no such field, no matches
->>> cdx_ops_test(url = 'http://iana.org/_css/2013.1/screen.css', filter = 'blah:200')  # doctest: +IGNORE_EXCEPTION_DETAIL
-Traceback (most recent call last):
-NotFoundException: No Captures found for: http://iana.org/_css/2013.1/screen.css
-
-# Filter exact -- (* prefix)
->>> cdx_ops_test(url = 'http://example.com*', sources = [test_cdx_dir], filter = '=urlkey:com,example)/?example=1')
-com,example)/?example=1 20140103030321 http://example.com?example=1 text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1043 333 example.warc.gz
-com,example)/?example=1 20140103030341 http://example.com?example=1 warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 553 1864 example.warc.gz
-
-# Filter exact invert
->>> cdx_ops_test(url = 'http://example.com', sources = [test_cdx_dir], matchType = 'prefix', filter = ['!=urlkey:com,example)/?example=1', '!=urlkey:com,example)/?example=2', '!=urlkey:com,example)/?example=3'])
-com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 example-url-agnostic-revisit.warc.gz
-com,example)/ 20140127171200 http://example.com text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1046 334 dupes.warc.gz
-com,example)/ 20140127171251 http://example.com warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 553 11875 dupes.warc.gz
-
-# Filter contains
->>> cdx_ops_test(url = 'http://example.com', sources = [test_cdx_dir], matchType = 'prefix', filter = '~urlkey:example=1')
-com,example)/?example=1 20140103030321 http://example.com?example=1 text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1043 333 example.warc.gz
-com,example)/?example=1 20140103030341 http://example.com?example=1 warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 553 1864 example.warc.gz
-
-# Filter contains invert
->>> cdx_ops_test(url = 'http://example.com', sources = [test_cdx_dir], matchType = 'prefix', filter = '!~urlkey:example=')
-com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 example-url-agnostic-revisit.warc.gz
-com,example)/ 20140127171200 http://example.com text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1046 334 dupes.warc.gz
-com,example)/ 20140127171251 http://example.com warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 553 11875 dupes.warc.gz
-
-# Collapse by timestamp
-# unresolved revisits, different statuscode results in an extra repeat
->>> cdx_ops_test(url = 'http://iana.org/_css/2013.1/screen.css', collapseTime = 11)
-org,iana)/_css/2013.1/screen.css 20140126200625 http://www.iana.org/_css/2013.1/screen.css text/css 200 BUAEPXZNN44AIX3NLXON4QDV6OY2H5QD - - 8754 41238 iana.warc.gz
-org,iana)/_css/2013.1/screen.css 20140126200653 http://www.iana.org/_css/2013.1/screen.css warc/revisit - BUAEPXZNN44AIX3NLXON4QDV6OY2H5QD - - 533 328367 iana.warc.gz
-org,iana)/_css/2013.1/screen.css 20140126201054 http://www.iana.org/_css/2013.1/screen.css warc/revisit - BUAEPXZNN44AIX3NLXON4QDV6OY2H5QD - - 543 706476 iana.warc.gz
-
-# resolved revisits
->>> cdx_ops_test(url = 'http://iana.org/_css/2013.1/screen.css', collapseTime = '11', resolveRevisits = True)
-org,iana)/_css/2013.1/screen.css 20140126200625 http://www.iana.org/_css/2013.1/screen.css text/css 200 BUAEPXZNN44AIX3NLXON4QDV6OY2H5QD - - 8754 41238 iana.warc.gz - - -
-org,iana)/_css/2013.1/screen.css 20140126201054 http://www.iana.org/_css/2013.1/screen.css text/css 200 BUAEPXZNN44AIX3NLXON4QDV6OY2H5QD - - 543 706476 iana.warc.gz 8754 41238 iana.warc.gz
-
-# Sort by closest timestamp + field select output
->>> cdx_ops_test(closest = '20140126200826', url = 'http://iana.org/_css/2013.1/fonts/opensans-bold.ttf', fields = 'timestamp', limit = 10)
-20140126200826
-20140126200816
-20140126200805
-20140126200912
-20140126200738
-20140126200930
-20140126200718
-20140126200706
-20140126200654
-20140126200625
-
-# In case of both reverse and closest, closest takes precedence
-# 'reverse closest' not supported at this time
-# if it is, this test will reflect the change
->>> cdx_ops_test(closest = '20140126200826', url = 'http://iana.org/_css/2013.1/fonts/opensans-bold.ttf', fields = 'timestamp', limit = 3, reverse = True)
-20140126200826
-20140126200816
-20140126200805
-
->>> cdx_ops_test(closest = '20140126201306', url = 'http://iana.org/dnssec', resolveRevisits = True, sources = [test_cdx_dir + 'dupes.cdx', test_cdx_dir + 'iana.cdx'])
-org,iana)/dnssec 20140126201306 http://www.iana.org/dnssec text/html 302 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ - - 442 772827 iana.warc.gz - - -
-org,iana)/dnssec 20140126201307 https://www.iana.org/dnssec text/html 200 PHLRSX73EV3WSZRFXMWDO6BRKTVUSASI - - 2278 773766 iana.warc.gz - - -
-
-
->>> cdx_ops_test(closest = '20140126201307', url = 'http://iana.org/dnssec', resolveRevisits = True)
-org,iana)/dnssec 20140126201307 https://www.iana.org/dnssec text/html 200 PHLRSX73EV3WSZRFXMWDO6BRKTVUSASI - - 2278 773766 iana.warc.gz - - -
-org,iana)/dnssec 20140126201306 http://www.iana.org/dnssec text/html 302 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ - - 442 772827 iana.warc.gz - - -
-
-# equal dist prefer earlier
->>> cdx_ops_test(closest = '20140126200700', url = 'http://iana.org/_css/2013.1/fonts/opensans-bold.ttf', resolveRevisits = True, limit = 2)
-org,iana)/_css/2013.1/fonts/opensans-bold.ttf 20140126200654 http://www.iana.org/_css/2013.1/fonts/OpenSans-Bold.ttf application/octet-stream 200 YFUR5ALIWJMWV6FAAFRLVRQNXZQF5HRW - - 548 482544 iana.warc.gz 117166 198285 iana.warc.gz
-org,iana)/_css/2013.1/fonts/opensans-bold.ttf 20140126200706 http://www.iana.org/_css/2013.1/fonts/OpenSans-Bold.ttf application/octet-stream 200 YFUR5ALIWJMWV6FAAFRLVRQNXZQF5HRW - - 552 495230 iana.warc.gz 117166 198285 iana.warc.gz
-
->>> cdx_ops_test(closest = '20140126200659', url = 'http://iana.org/_css/2013.1/fonts/opensans-bold.ttf', resolveRevisits = True, limit = 2, fields = 'timestamp')
-20140126200654
-20140126200706
-
->>> cdx_ops_test(closest = '20140126200701', url = 'http://iana.org/_css/2013.1/fonts/opensans-bold.ttf', resolveRevisits = True, limit = 2, fields = 'timestamp')
-20140126200706
-20140126200654
-
-
-# Resolve Revisits
->>> cdx_ops_test('http://iana.org/_css/2013.1/fonts/inconsolata.otf', resolveRevisits = True)
-org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126200826 http://www.iana.org/_css/2013.1/fonts/Inconsolata.otf application/octet-stream 200 LNMEDYOENSOEI5VPADCKL3CB6N3GWXPR - - 34054 620049 iana.warc.gz - - -
-org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126200912 http://www.iana.org/_css/2013.1/fonts/Inconsolata.otf application/octet-stream 200 LNMEDYOENSOEI5VPADCKL3CB6N3GWXPR - - 546 667073 iana.warc.gz 34054 620049 iana.warc.gz
-org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126200930 http://www.iana.org/_css/2013.1/fonts/Inconsolata.otf application/octet-stream 200 LNMEDYOENSOEI5VPADCKL3CB6N3GWXPR - - 534 697255 iana.warc.gz 34054 620049 iana.warc.gz
-org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126201055 http://www.iana.org/_css/2013.1/fonts/Inconsolata.otf application/octet-stream 200 LNMEDYOENSOEI5VPADCKL3CB6N3GWXPR - - 547 714833 iana.warc.gz 34054 620049 iana.warc.gz
-org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126201249 http://www.iana.org/_css/2013.1/fonts/Inconsolata.otf application/octet-stream 200 LNMEDYOENSOEI5VPADCKL3CB6N3GWXPR - - 551 768625 iana.warc.gz 34054 620049 iana.warc.gz
-
->>> cdx_ops_test('http://iana.org/domains/root/db', resolveRevisits = True)
-org,iana)/domains/root/db 20140126200927 http://www.iana.org/domains/root/db/ text/html 302 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ - - 446 671278 iana.warc.gz - - -
-org,iana)/domains/root/db 20140126200928 http://www.iana.org/domains/root/db text/html 200 DHXA725IW5VJJFRTWBQT6BEZKRE7H57S - - 18365 672225 iana.warc.gz - - -
-
-"""
-
-#=================================================================
-from pywb.cdx.cdxserver import CDXServer
-import os
-import sys
-import six
-
-from pywb import get_test_dir
-
-test_cdx_dir = get_test_dir() + 'cdx/'
-
-
-def cdx_ops_test_data(url, sources = [test_cdx_dir + 'iana.cdx'], **kwparams):
-    kwparams['url'] = url
-    if not 'output' in kwparams:
-        kwparams['output'] = 'cdxobject'
-
-    server = CDXServer(sources)
-    results = server.load_cdx(**kwparams)
-    return list(results)
-
-
-def cdx_ops_test(*args, **kwargs):
-    results = cdx_ops_test_data(*args, **kwargs)
-
-    fields = kwargs.get('fields')
-    if fields:
-        fields = fields.split(',')
-
-    for x in results:
-        if not isinstance(x, str):
-            l = x.to_text(fields).replace('\t', '    ')
-        else:
-            l = x
-
-        sys.stdout.write(l)
-
-
-
-def test_cdxj_resolve_revisit():
-    # Resolve Revisit -- cdxj minimal -- output also json
-    results = cdx_ops_test_data(url = 'http://example.com/?example=1', sources=[get_test_dir() + 'cdxj/example.cdxj'], resolveRevisits=True)
-    assert(len(results) == 2)
-    assert(dict(results[0]) == {"urlkey": "com,example)/?example=1", "timestamp": "20140103030321", "url": "http://example.com?example=1", "length": "1043", "filename": "example.warc.gz", "digest": "B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A", "offset": "333", "orig.length": "-", "orig.offset": "-", "orig.filename": "-"})
-
-    assert(dict(results[1]) == {"urlkey": "com,example)/?example=1", "timestamp": "20140103030341", "url": "http://example.com?example=1", "filename": "example.warc.gz", "length": "553", "mime": "", "offset": "1864", "digest": "B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A", "orig.length": "1043", "orig.offset": "333", "orig.filename": "example.warc.gz"})
-
-
-
-def test_cdxj_resolve_revisit_2():
-    # Resolve Revisit -- cdxj minimal -- output also json
-    results = cdx_ops_test_data(url = 'http://example.com/?example=1', sources=[get_test_dir() + 'cdxj/example-no-digest.cdxj'], resolveRevisits=True)
-    assert(len(results) == 2)
-    assert(dict(results[0]) == {"urlkey": "com,example)/?example=1", "timestamp": "20140103030321", "url": "http://example.com?example=1", "length": "1043", "filename": "example.warc.gz", "offset": "333", "orig.length": "-", "orig.offset": "-", "orig.filename": "-"})
-
-    assert(dict(results[1]) == {"urlkey": "com,example)/?example=1", "timestamp": "20140103030341", "url": "http://example.com?example=1", "length": "553", "filename": "example.warc.gz", "mime": "warc/revisit", "offset": "1864", "orig.length": "-", "orig.offset": "-", "orig.filename": "-"})
-
-
-
-if __name__ == "__main__":
-    import doctest
-    doctest.testmod()
--- a/pywb/cdx/test/test_cdxserver_config.py
+++ b/pywb/cdx/test/test_cdxserver_config.py
@ -1,117 +0,0 @@
-import yaml
-from pywb.cdx.cdxserver import create_cdx_server, CDXServer, RemoteCDXServer
-from pywb.cdx.cdxsource import CDXFile, RemoteCDXSource, RedisCDXSource
-from pywb.cdx.zipnum import ZipNumCluster
-
-from pywb import get_test_dir
-
-yaml_config = r"""
-test_1:
-    index_paths:
-        # local cdx paths
-        - {0}cdx/example.cdx
-
-        # simple remote cdx source, assumes no filtering
-        - http://cdxserver.example.com/cdx
-
-        # customized remote cdx server
-        - !!python/object:pywb.cdx.cdxsource.RemoteCDXSource {{
-            remote_url: 'http://cdxserver.example.com/cdx',
-            cookie: custom_token=value,
-            remote_processing: true,
-        }}
-
-        # example redis cdx source
-        - redis://redis.example.com:6379/0
-
-        - {0}zipcdx/zipnum-sample.idx
-
-test_2:
-    index_paths: http://cdxserver.example.com/cdx
-
-test_3: http://cdxserver.example.com/cdx
-
-test_4: !!python/object:pywb.cdx.cdxsource.RemoteCDXSource {{
-            remote_url: 'http://cdxserver.example.com/cdx',
-            cookie: custom_token=value,
-            remote_processing: true,
-        }}
-
-test_5: {0}cdx/example.cdx
-
-test_6:
-    index_paths: invalid://abc
-
-
-""".format(get_test_dir())
-
-def test_cdxserver_config():
-    config = yaml.load(yaml_config)
-    cdxserver = create_cdx_server(config.get('test_1'))
-    assert(isinstance(cdxserver, CDXServer))
-    sources = cdxserver.sources
-    assert len(sources) == 5
-
-    assert type(sources[0]) == CDXFile
-    assert sources[0].filename.endswith('example.cdx')
-
-    # remote source with no remote processing
-    assert type(sources[1]) == RemoteCDXSource
-    assert sources[1].remote_url == 'http://cdxserver.example.com/cdx'
-    assert sources[1].remote_processing == False
-
-    # remote cdx server with processing
-    assert type(sources[2]) == RemoteCDXSource
-    assert sources[2].remote_url == 'http://cdxserver.example.com/cdx'
-    assert sources[2].remote_processing == True
-
-    # redis source
-    assert type(sources[3]) == RedisCDXSource
-    assert sources[3].redis_url == 'redis://redis.example.com:6379/0'
-
-    assert type(sources[4]) == ZipNumCluster
-    assert sources[4].summary.endswith('zipnum-sample.idx')
-    assert sources[4].loc_resolver.loc_filename.endswith('zipnum-sample.loc')
-
-
-def assert_remote_cdxserver(config_name):
-    config = yaml.load(yaml_config)
-    cdxserver = create_cdx_server(config.get(config_name))
-    assert(isinstance(cdxserver, RemoteCDXServer))
-
-    source = cdxserver.source
-
-    # remote cdx server with remote processing
-    assert type(source) == RemoteCDXSource
-    assert source.remote_url == 'http://cdxserver.example.com/cdx'
-    assert source.remote_processing == True
-
-
-def test_remote_index_path():
-    assert_remote_cdxserver('test_2')
-
-def test_no_index_path_remote():
-    assert_remote_cdxserver('test_3')
-
-def test_explicit_remote_source():
-    assert_remote_cdxserver('test_4')
-
-
-def test_single_cdx():
-    config = yaml.load(yaml_config)
-    cdxserver = create_cdx_server(config.get('test_5'))
-    assert(isinstance(cdxserver, CDXServer))
-    sources = cdxserver.sources
-    assert len(sources) == 1
-
-    assert type(sources[0]) == CDXFile
-    assert sources[0].filename.endswith('example.cdx')
-
-def test_invalid_config():
-    config = yaml.load(yaml_config)
-    cdxserver = create_cdx_server(config.get('test_6'))
-    assert(isinstance(cdxserver, CDXServer))
-    sources = cdxserver.sources
-    assert len(sources) == 0
-
-
--- a/pywb/cdx/test/test_redis_source.py
+++ b/pywb/cdx/test/test_redis_source.py
@ -1,78 +0,0 @@
-"""
->>> redis_cdx(redis_cdx_server, 'http://example.com')
-com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 example-url-agnostic-revisit.warc.gz
-com,example)/ 20140127171200 http://example.com text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1046 334 dupes.warc.gz
-com,example)/ 20140127171251 http://example.com warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 553 11875 dupes.warc.gz
-
->>> redis_cdx(redis_cdx_server_key, 'http://example.com')
-com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 example-url-agnostic-revisit.warc.gz
-com,example)/ 20140127171200 http://example.com text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1046 334 dupes.warc.gz
-com,example)/ 20140127171251 http://example.com warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 553 11875 dupes.warc.gz
-
-"""
-
-from fakeredis import FakeStrictRedis
-from mock import patch
-
-from warcio.timeutils import timestamp_to_sec
-from pywb.cdx.cdxsource import RedisCDXSource
-from pywb.cdx.cdxserver import CDXServer
-
-from pywb import get_test_dir
-
-import sys
-import os
-
-test_cdx_dir = os.path.join(get_test_dir(), 'cdx/')
-
-def load_cdx_into_redis(source, filename, key=None):
-    # load a cdx into mock redis
-    with open(test_cdx_dir + filename, 'rb') as fh:
-        for line in fh:
-            zadd_cdx(source, line, key)
-
-def zadd_cdx(source, cdx, key):
-    if key:
-        source.redis.zadd(key, 0, cdx)
-        return
-
-    parts = cdx.split(b' ', 2)
-
-    key = parts[0]
-    timestamp = parts[1]
-    rest = timestamp + b' ' + parts[2]
-
-    score = timestamp_to_sec(timestamp.decode('utf-8'))
-    source.redis.zadd(source.key_prefix + key, score, rest)
-
-
-
-@patch('redis.StrictRedis', FakeStrictRedis)
-def init_redis_server():
-    source = RedisCDXSource('redis://127.0.0.1:6379/0')
-
-    for f in os.listdir(test_cdx_dir):
-        if f.endswith('.cdx'):
-            load_cdx_into_redis(source, f)
-
-    return CDXServer([source])
-
-@patch('redis.StrictRedis', FakeStrictRedis)
-def init_redis_server_key_file():
-    source = RedisCDXSource('redis://127.0.0.1:6379/0/key')
-
-    for f in os.listdir(test_cdx_dir):
-        if f.endswith('.cdx'):
-            load_cdx_into_redis(source, f, source.cdx_key)
-
-    return CDXServer([source])
-
-
-def redis_cdx(cdx_server, url, **params):
-    cdx_iter = cdx_server.load_cdx(url=url, **params)
-    for cdx in cdx_iter:
-        sys.stdout.write(cdx)
-
-redis_cdx_server = init_redis_server()
-redis_cdx_server_key = init_redis_server_key_file()
-
--- a/pywb/cdx/test/test_zipnum.py
+++ b/pywb/cdx/test/test_zipnum.py
@ -1,243 +0,0 @@
-"""
->>> zip_ops_test(url='http://iana.org')
-org,iana)/ 20140126200624 http://www.iana.org/ text/html 200 OSSAPWJ23L56IYVRW3GFEAR4MCJMGPTB - - 2258 334 iana.warc.gz
-org,iana)/ 20140127171238 http://iana.org unk 302 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ - - 343 1858 dupes.warc.gz
-org,iana)/ 20140127171238 http://www.iana.org/ warc/revisit - OSSAPWJ23L56IYVRW3GFEAR4MCJMGPTB - - 536 2678 dupes.warc.gz
-
-# test idx index (tabs replacad with 4 spaces)
->>> zip_ops_test(url='http://iana.org/domains/', matchType='prefix', showPagedIndex=True)
-org,iana)/dnssec 20140126201307    zipnum    8517    373    35
-org,iana)/domains/int 20140126201239    zipnum    8890    355    36
-org,iana)/domains/root/servers 20140126201227    zipnum    9245    386    37
-
-
->>> zip_ops_test(url='http://iana.org/domains/*')
-org,iana)/domains/arpa 20140126201248 http://www.iana.org/domains/arpa text/html 200 QOFZZRN6JIKAL2JRL6ZC2VVG42SPKGHT - - 2939 759039 iana.warc.gz
-org,iana)/domains/example 20140128051539 http://www.iana.org/domains/example text/html 302 JZ622UA23G5ZU6Y3XAKH4LINONUEICEG - - 577 2907 example.warc.gz
-org,iana)/domains/idn-tables 20140126201127 http://www.iana.org/domains/idn-tables text/html 200 HNCUFTJMOQOGAEY6T56KVC3T7TVLKGEW - - 8118 715878 iana.warc.gz
-org,iana)/domains/int 20140126201239 http://www.iana.org/domains/int text/html 200 X32BBNNORV4SPEHTQF5KI5NFHSKTZK6Q - - 2482 746788 iana.warc.gz
-org,iana)/domains/reserved 20140126201054 http://www.iana.org/domains/reserved text/html 200 R5AAEQX5XY5X5DG66B23ODN5DUBWRA27 - - 3573 701457 iana.warc.gz
-org,iana)/domains/root 20140126200912 http://www.iana.org/domains/root text/html 200 YWA2R6UVWCYNHBZJKBTPYPZ5CJWKGGUX - - 2691 657746 iana.warc.gz
-org,iana)/domains/root/db 20140126200927 http://www.iana.org/domains/root/db/ text/html 302 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ - - 446 671278 iana.warc.gz
-org,iana)/domains/root/db 20140126200928 http://www.iana.org/domains/root/db text/html 200 DHXA725IW5VJJFRTWBQT6BEZKRE7H57S - - 18365 672225 iana.warc.gz
-org,iana)/domains/root/servers 20140126201227 http://www.iana.org/domains/root/servers text/html 200 AFW34N3S4NK2RJ6QWMVPB5E2AIUETAHU - - 3137 733840 iana.warc.gz
-
-# first page
->>> zip_ops_test(url='http://iana.org/domains/', matchType='domain', showPagedIndex=True, pageSize=4, page=0)
-com,example)/ 20140127171200    zipnum    0    275    1
-org,iana)/ 20140127171238    zipnum    275    328    2
-org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126201055    zipnum    603    312    3
-org,iana)/_css/2013.1/fonts/opensans-bold.ttf 20140126200718    zipnum    915    235    4
-
-
-# first page -- simplified query
->>> zip_ops_test(url='*.iana.org/path_part_ignored/', showPagedIndex=True, pageSize=4)
-com,example)/ 20140127171200    zipnum    0    275    1
-org,iana)/ 20140127171238    zipnum    275    328    2
-org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126201055    zipnum    603    312    3
-org,iana)/_css/2013.1/fonts/opensans-bold.ttf 20140126200718    zipnum    915    235    4
-
-# next page + json
->>> zip_ops_test(url='http://iana.org/domains/', matchType='domain', output='json', showPagedIndex=True, pageSize=4, page=1)
-{"urlkey": "org,iana)/_css/2013.1/fonts/opensans-bold.ttf 20140126200912", "part": "zipnum", "offset": 1150, "length": 235, "lineno": 5}
-{"urlkey": "org,iana)/_css/2013.1/fonts/opensans-bold.ttf 20140126201240", "part": "zipnum", "offset": 1385, "length": 307, "lineno": 6}
-{"urlkey": "org,iana)/_css/2013.1/fonts/opensans-regular.ttf 20140126200654", "part": "zipnum", "offset": 1692, "length": 235, "lineno": 7}
-{"urlkey": "org,iana)/_css/2013.1/fonts/opensans-regular.ttf 20140126200816", "part": "zipnum", "offset": 1927, "length": 231, "lineno": 8}
-
-# last page
->>> zip_ops_test(url='http://iana.org/domains/', matchType='domain', showPagedIndex=True, pageSize=4, page=9)
-org,iana)/domains/root/servers 20140126201227    zipnum    9245    386    37
-org,iana)/time-zones 20140126200737    zipnum    9631    166    38
-
-# last page cdx
->>> zip_ops_test(url='http://iana.org/domains/', matchType='domain', pageSize=4, page=9)
-org,iana)/domains/root/servers 20140126201227 http://www.iana.org/domains/root/servers text/html 200 AFW34N3S4NK2RJ6QWMVPB5E2AIUETAHU - - 3137 733840 iana.warc.gz
-org,iana)/numbers 20140126200651 http://www.iana.org/numbers text/html 200 HWT5UZKURYLW5QNWVZCWFCANGEMU7XWK - - 3498 321385 iana.warc.gz
-org,iana)/performance/ietf-draft-status 20140126200815 http://www.iana.org/performance/ietf-draft-status text/html 200 T5IQTX6DWV5KABGH454CYEDWKRI5Y23E - - 2940 597667 iana.warc.gz
-org,iana)/performance/ietf-statistics 20140126200804 http://www.iana.org/performance/ietf-statistics text/html 200 XOFML5WNBQMTSULLIIPLSP6U5MX33HN6 - - 3712 582987 iana.warc.gz
-org,iana)/protocols 20140126200715 http://www.iana.org/protocols text/html 200 IRUJZEUAXOUUG224ZMI4VWTUPJX6XJTT - - 63663 496277 iana.warc.gz
-org,iana)/time-zones 20140126200737 http://www.iana.org/time-zones text/html 200 4Z27MYWOSXY2XDRAJRW7WRMT56LXDD4R - - 2449 569675 iana.warc.gz
-org,iana)/time-zones/x 20140126200737 http://www.iana.org/time-zones/X text/html 200 4Z27MYWOSXY2XDRAJRW7WRMT56LXDD4R - - 2449 569675 iana.warc.gz
-org,iana)/time-zones/y 20140126200737 http://www.iana.org/time-zones/Y text/html 200 4Z27MYWOSXY2XDRAJRW7WRMT56LXDD4R - - 2449 569675 iana.warc.gz
-
-# last page reverse -- not yet supported
-#>>> zip_ops_test(url='http://iana.org/domains/', matchType='domain', reverse=True, showPagedIndex=True, pageSize=4, page=9)
-#org,iana)/time-zones 20140126200737    zipnum    9623    145    38
-#org,iana)/domains/root/servers 20140126201227    zipnum    9237    386    37
-
-
-# last page reverse CDX
->>> zip_ops_test(url='http://iana.org/domains/', matchType='domain', reverse=True, pageSize=4, page=9)
-org,iana)/time-zones/y 20140126200737 http://www.iana.org/time-zones/Y text/html 200 4Z27MYWOSXY2XDRAJRW7WRMT56LXDD4R - - 2449 569675 iana.warc.gz
-org,iana)/time-zones/x 20140126200737 http://www.iana.org/time-zones/X text/html 200 4Z27MYWOSXY2XDRAJRW7WRMT56LXDD4R - - 2449 569675 iana.warc.gz
-org,iana)/time-zones 20140126200737 http://www.iana.org/time-zones text/html 200 4Z27MYWOSXY2XDRAJRW7WRMT56LXDD4R - - 2449 569675 iana.warc.gz
-org,iana)/protocols 20140126200715 http://www.iana.org/protocols text/html 200 IRUJZEUAXOUUG224ZMI4VWTUPJX6XJTT - - 63663 496277 iana.warc.gz
-org,iana)/performance/ietf-statistics 20140126200804 http://www.iana.org/performance/ietf-statistics text/html 200 XOFML5WNBQMTSULLIIPLSP6U5MX33HN6 - - 3712 582987 iana.warc.gz
-org,iana)/performance/ietf-draft-status 20140126200815 http://www.iana.org/performance/ietf-draft-status text/html 200 T5IQTX6DWV5KABGH454CYEDWKRI5Y23E - - 2940 597667 iana.warc.gz
-org,iana)/numbers 20140126200651 http://www.iana.org/numbers text/html 200 HWT5UZKURYLW5QNWVZCWFCANGEMU7XWK - - 3498 321385 iana.warc.gz
-org,iana)/domains/root/servers 20140126201227 http://www.iana.org/domains/root/servers text/html 200 AFW34N3S4NK2RJ6QWMVPB5E2AIUETAHU - - 3137 733840 iana.warc.gz
-
-# last url prefix
->>> zip_ops_test(url='http://iana.org/time-zones*')
-org,iana)/time-zones 20140126200737 http://www.iana.org/time-zones text/html 200 4Z27MYWOSXY2XDRAJRW7WRMT56LXDD4R - - 2449 569675 iana.warc.gz
-org,iana)/time-zones/x 20140126200737 http://www.iana.org/time-zones/X text/html 200 4Z27MYWOSXY2XDRAJRW7WRMT56LXDD4R - - 2449 569675 iana.warc.gz
-org,iana)/time-zones/y 20140126200737 http://www.iana.org/time-zones/Y text/html 200 4Z27MYWOSXY2XDRAJRW7WRMT56LXDD4R - - 2449 569675 iana.warc.gz
-
-# last url prefix w/ slash
->>> zip_ops_test(url='http://iana.org/time-zones/*')
-org,iana)/time-zones/x 20140126200737 http://www.iana.org/time-zones/X text/html 200 4Z27MYWOSXY2XDRAJRW7WRMT56LXDD4R - - 2449 569675 iana.warc.gz
-org,iana)/time-zones/y 20140126200737 http://www.iana.org/time-zones/Y text/html 200 4Z27MYWOSXY2XDRAJRW7WRMT56LXDD4R - - 2449 569675 iana.warc.gz
-
-# last url exact
->>> zip_ops_test(url='http://iana.org/time-zones/Y')
-org,iana)/time-zones/y 20140126200737 http://www.iana.org/time-zones/Y text/html 200 4Z27MYWOSXY2XDRAJRW7WRMT56LXDD4R - - 2449 569675 iana.warc.gz
-
-# invalid page
->>> zip_ops_test(url='http://iana.org/domains/', matchType='domain', showPagedIndex=True, pageSize=4, page=10)   # doctest: +IGNORE_EXCEPTION_DETAIL
-Traceback (most recent call last):
-CDXException: Page 10 invalid: First Page is 0, Last Page is 9
-
-
->>> zip_ops_test(url='http://aaa.aaa/', matchType='exact', showPagedIndex=True)  # doctest: +IGNORE_EXCEPTION_DETAIL
-Traceback (most recent call last):
-NotFoundException: No Captures found for: http://aaa.aaa/
-
->>> zip_ops_test(url='http://aaa.aaa/', matchType='domain', showPagedIndex=True)  # doctest: +IGNORE_EXCEPTION_DETAIL
-Traceback (most recent call last):
-NotFoundException: No Captures found for: http://aaa.aaa/ (domain query)
-
-# list last index line, as we don't know if there are any captures at end
->>> zip_ops_test(url='http://aaa.zz/', matchType='domain', showPagedIndex=True)
-org,iana)/time-zones 20140126200737    zipnum    9631    166    38
-
-# read cdx to find no captures
->>> zip_ops_test(url='http://aaa.zz/', matchType='domain')  # doctest: +IGNORE_EXCEPTION_DETAIL
-Traceback (most recent call last):
-NotFoundException: No Captures found for: http://aaa.zz/ (domain query)
-
-# Invalid .idx filesor or missing loc
-
->>> zip_test_err(url='http://example.com/', matchType='exact')  # doctest: +IGNORE_EXCEPTION_DETAIL
-Traceback (most recent call last):
-Exception: No Locations Found for: foo
-
-
->>> zip_test_err(url='http://example.zz/x', matchType='exact')  # doctest: +IGNORE_EXCEPTION_DETAIL
-Traceback (most recent call last):
-Exception: No Locations Found for: foo2
-
-"""
-
-from test_cdxops import cdx_ops_test, cdx_ops_test_data
-from pywb import get_test_dir
-from pywb.cdx.cdxserver import CDXServer
-
-
-import shutil
-import tempfile
-import os
-import json
-
-import pytest
-
-
-test_zipnum = get_test_dir() + 'zipcdx/zipnum-sample.idx'
-
-def zip_ops_test_data(url, **kwargs):
-    sources = test_zipnum
-    return json.loads(cdx_ops_test_data(url, sources, **kwargs)[0])
-
-def zip_ops_test(url, **kwargs):
-    sources = test_zipnum
-    cdx_ops_test(url, sources, **kwargs)
-
-def zip_test_err(url, **kwargs):
-    sources = get_test_dir() + 'zipcdx/zipnum-bad.idx'
-    cdx_ops_test(url, sources, **kwargs)
-
-
-def test_zip_prefix_load():
-
-    tmpdir = tempfile.mkdtemp()
-    try:
-        shutil.copy(test_zipnum, tmpdir)
-        shutil.copy(get_test_dir() + 'zipcdx/zipnum-sample.cdx.gz',
-                    os.path.join(tmpdir, 'zipnum'))
-
-        config={}
-        config['shard_index_loc'] = dict(match='(.*)',
-                                         replace=r'\1')
-        server = CDXServer(os.path.join(tmpdir, 'zipnum-sample.idx'),
-                           config=config)
-
-
-        # Test Page Count
-        results = server.load_cdx(url='iana.org/',
-                                  matchType='domain',
-                                  showNumPages=True)
-
-        results = list(results)
-        assert len(results) == 1, results
-        assert json.loads(results[0]) == {"blocks": 38, "pages": 4, "pageSize": 10}
-
-
-        # Test simple query
-        results = server.load_cdx(url='iana.org/')
-        results = list(results)
-        assert len(results) ==3, results
-        assert '20140126200624' in results[0]
-        assert '20140127171238' in results[1]
-        assert 'warc/revisit' in results[2]
-
-    finally:
-        shutil.rmtree(tmpdir)
-
-
-
-def test_blocks_def_page_size():
-    # Pages -- default page size
-    res = zip_ops_test_data(url='http://iana.org/domains/example', matchType='exact', showNumPages=True)
-    assert(res == {"blocks": 1, "pages": 1, "pageSize": 10})
-
-def test_blocks_def_size_2():
-    res = zip_ops_test_data(url='http://iana.org/domains/', matchType='domain', showNumPages=True)
-    assert(res == {"blocks": 38, "pages": 4, "pageSize": 10})
-
-def test_blocks_set_page_size():
-    # set page size
-    res = zip_ops_test_data(url='http://iana.org/domains/', matchType='domain', pageSize=4, showNumPages=True)
-    assert(res == {"blocks": 38, "pages": 10, "pageSize": 4})
-
-def test_blocks_alt_q():
-    # set page size -- alt domain query
-    res = zip_ops_test_data(url='*.iana.org', pageSize='4', showNumPages=True)
-    assert(res == {"blocks": 38, "pages": 10, "pageSize": 4})
-
-def test_blocks_secondary_match():
-    # page size for non-existent, but secondary index match
-    res = zip_ops_test_data(url='iana.org/domains/int/blah', pageSize=4, showNumPages=True)
-    assert(res == {"blocks": 0, "pages": 0, "pageSize": 4})
-
-def test_blocks_no_match():
-    # page size for non-existent, no secondary index match
-    res = zip_ops_test_data(url='*.foo.bar', showNumPages=True)
-    assert(res == {"blocks": 0, "pages": 0, "pageSize": 10})
-
-def test_blocks_zero_pages():
-    # read cdx to find 0 pages
-    res = zip_ops_test_data(url='http://aaa.zz/', matchType='domain', showNumPages=True)
-    assert(res == {"blocks": 0, "pages": 0, "pageSize": 10})
-
-
-# Errors
-
-def test_err_file_not_found():
-    with pytest.raises(IOError):
-        zip_test_err(url='http://iana.org/x', matchType='exact')  # doctest: +IGNORE_EXCEPTION_DETAIL
-
-
-
-
-if __name__ == "__main__":
-    import doctest
-    doctest.testmod()
--- a/pywb/cdx/zipnum.py
+++ b/pywb/cdx/zipnum.py
@ -1,353 +0,0 @@
-import os
-import collections
-import itertools
-import logging
-from io import BytesIO
-import datetime
-import json
-import six
-
-from six.moves import map
-
-from pywb.cdx.cdxsource import CDXSource
-from pywb.cdx.cdxobject import IDXObject, CDXException
-
-from pywb.utils.loaders import BlockLoader, read_last_line
-from warcio.bufferedreaders import gzip_decompressor
-from pywb.utils.binsearch import iter_range, linearsearch, search
-
-
-#=================================================================
-class ZipBlocks:
-    def __init__(self, part, offset, length, count):
-        self.part = part
-        self.offset = offset
-        self.length = length
-        self.count = count
-
-
-#=================================================================
-#TODO: see if these could be combined with warc path resolvers
-
-class LocMapResolver(object):
-    """ Lookup shards based on a file mapping
-    shard name to one or more paths. The entries are
-    tab delimited.
-    """
-    def __init__(self, loc_summary, loc_filename):
-        # initial loc map
-        self.loc_map = {}
-        self.loc_mtime = 0
-        if not loc_filename:
-            splits = os.path.splitext(loc_summary)
-            loc_filename = splits[0] + '.loc'
-        self.loc_filename = loc_filename
-
-        self.load_loc()
-
-    def load_loc(self):
-        # check modified time of current file before loading
-        new_mtime = os.path.getmtime(self.loc_filename)
-        if (new_mtime == self.loc_mtime):
-            return
-
-        # update loc file mtime
-        self.loc_mtime = new_mtime
-
-        local_dir = os.path.dirname(self.loc_filename)
-
-        def res_path(pathname):
-            if '://' not in pathname:
-                pathname = os.path.join(local_dir, pathname)
-            return pathname
-
-        logging.debug('Loading loc from: ' + self.loc_filename)
-        with open(self.loc_filename, 'r') as fh:
-            for line in fh:
-                parts = line.rstrip().split('\t')
-
-                paths = [res_path(pathname) for pathname in parts[1:]]
-                self.loc_map[parts[0]] = paths
-
-    def __call__(self, part, query):
-        return self.loc_map[part]
-
-
-#=================================================================
-class LocPrefixResolver(object):
-    """ Use a prefix lookup, where the prefix can either be a fixed
-    string or can be a regex replacement of the index summary path
-    """
-    def __init__(self, loc_summary, loc_config):
-        import re
-        loc_match = loc_config.get('match', '().*')
-        loc_replace = loc_config['replace']
-        loc_summary = os.path.dirname(loc_summary) + '/'
-        self.prefix = re.sub(loc_match, loc_replace, loc_summary)
-
-    def load_loc(self):
-        pass
-
-    def __call__(self, part, query):
-        return [self.prefix + part]
-
-
-#=================================================================
-class ZipNumCluster(CDXSource):
-    DEFAULT_RELOAD_INTERVAL = 10  # in minutes
-    DEFAULT_MAX_BLOCKS = 10
-
-    def __init__(self, summary, config=None):
-        self.max_blocks = self.DEFAULT_MAX_BLOCKS
-
-        self.loc_resolver = None
-
-        loc = None
-        cookie_maker = None
-        reload_ival = self.DEFAULT_RELOAD_INTERVAL
-
-        if config:
-            loc = config.get('shard_index_loc')
-            cookie_maker = config.get('cookie_maker')
-
-            self.max_blocks = config.get('max_blocks', self.max_blocks)
-
-            reload_ival = config.get('reload_interval', reload_ival)
-
-
-        if isinstance(loc, dict):
-            self.loc_resolver = LocPrefixResolver(summary, loc)
-        else:
-            self.loc_resolver = LocMapResolver(summary, loc)
-
-        self.summary = summary
-
-        # reload interval
-        self.loc_update_time = datetime.datetime.now()
-        self.reload_interval = datetime.timedelta(minutes=reload_ival)
-
-        self.blk_loader = BlockLoader(cookie_maker=cookie_maker)
-
-#    @staticmethod
-#    def reload_timed(timestamp, val, delta, func):
-#        now = datetime.datetime.now()
-#        if now - timestamp >= delta:
-#            func()
-#            return now
-#        return None
-#
-#    def reload_loc(self):
-#        reload_time = self.reload_timed(self.loc_update_time,
-#                                        self.loc_map,
-#                                        self.reload_interval,
-#                                        self.load_loc)
-#
-#        if reload_time:
-#            self.loc_update_time = reload_time
-
-    def load_cdx(self, query):
-        self.loc_resolver.load_loc()
-        return self._do_load_cdx(self.summary, query)
-
-    def _do_load_cdx(self, filename, query):
-        reader = open(filename, 'rb')
-
-        idx_iter = self.compute_page_range(reader, query)
-
-        if query.secondary_index_only or query.page_count:
-            return idx_iter
-
-        blocks = self.idx_to_cdx(idx_iter, query)
-
-        def gen_cdx():
-            for blk in blocks:
-                for cdx in blk:
-                    yield cdx
-
-        return gen_cdx()
-
-
-    def _page_info(self, pages, pagesize, blocks):
-        info = dict(pages=pages,
-                    pageSize=pagesize,
-                    blocks=blocks)
-        return json.dumps(info) + '\n'
-
-    def compute_page_range(self, reader, query):
-        pagesize = query.page_size
-        if not pagesize:
-            pagesize = self.max_blocks
-        else:
-            pagesize = int(pagesize)
-
-        last_line = None
-
-        # Get End
-        end_iter = search(reader, query.end_key, prev_size=1)
-
-        try:
-            end_line = six.next(end_iter)
-        except StopIteration:
-            last_line = read_last_line(reader)
-            end_line = last_line
-
-        # Get Start
-        first_iter = iter_range(reader,
-                                query.key,
-                                query.end_key,
-                                prev_size=1)
-
-        try:
-            first_line = six.next(first_iter)
-        except StopIteration:
-            if end_line == last_line and query.key >= last_line:
-                first_line = last_line
-            else:
-                reader.close()
-                if query.page_count:
-                    yield self._page_info(0, pagesize, 0)
-                    return
-                else:
-                    raise
-
-        first = IDXObject(first_line)
-
-        end = IDXObject(end_line)
-
-        try:
-            blocks = end['lineno'] - first['lineno']
-            total_pages = int(blocks / pagesize) + 1
-        except:
-            blocks = -1
-            total_pages = 1
-
-        if query.page_count:
-            # same line, so actually need to look at cdx
-            # to determine if it exists
-            if blocks == 0:
-                try:
-                    block_cdx_iter = self.idx_to_cdx([first_line], query)
-                    block = six.next(block_cdx_iter)
-                    cdx = six.next(block)
-                except StopIteration:
-                    total_pages = 0
-                    blocks = -1
-
-            yield self._page_info(total_pages, pagesize, blocks + 1)
-            reader.close()
-            return
-
-        curr_page = query.page
-        if curr_page >= total_pages or curr_page < 0:
-            msg = 'Page {0} invalid: First Page is 0, Last Page is {1}'
-            reader.close()
-            raise CDXException(msg.format(curr_page, total_pages - 1))
-
-        startline = curr_page * pagesize
-        endline = startline + pagesize - 1
-        if blocks >= 0:
-            endline = min(endline, blocks)
-
-        if curr_page == 0:
-            yield first_line
-        else:
-            startline -= 1
-
-        idxiter = itertools.islice(first_iter, startline, endline)
-        for idx in idxiter:
-            yield idx
-
-        reader.close()
-
-
-    def search_by_line_num(self, reader, line):  # pragma: no cover
-        def line_cmp(line1, line2):
-            line1_no = int(line1.rsplit(b'\t', 1)[-1])
-            line2_no = int(line2.rsplit(b'\t', 1)[-1])
-            return cmp(line1_no, line2_no)
-
-        line_iter = search(reader, line, compare_func=line_cmp)
-        yield six.next(line_iter)
-
-    def idx_to_cdx(self, idx_iter, query):
-        blocks = None
-        ranges = []
-
-        for idx in idx_iter:
-            idx = IDXObject(idx)
-
-            if (blocks and blocks.part == idx['part'] and
-                blocks.offset + blocks.length == idx['offset'] and
-                blocks.count < self.max_blocks):
-
-                    blocks.length += idx['length']
-                    blocks.count += 1
-                    ranges.append(idx['length'])
-
-            else:
-                if blocks:
-                    yield self.block_to_cdx_iter(blocks, ranges, query)
-
-                blocks = ZipBlocks(idx['part'],
-                                   idx['offset'],
-                                   idx['length'],
-                                   1)
-
-                ranges = [blocks.length]
-
-        if blocks:
-            yield self.block_to_cdx_iter(blocks, ranges, query)
-
-    def block_to_cdx_iter(self, blocks, ranges, query):
-        last_exc = None
-        last_traceback = None
-
-        try:
-            locations = self.loc_resolver(blocks.part, query)
-        except:
-            raise Exception('No Locations Found for: ' + blocks.part)
-
-        for location in self.loc_resolver(blocks.part, query):
-            try:
-                return self.load_blocks(location, blocks, ranges, query)
-            except Exception as exc:
-                last_exc = exc
-                import sys
-                last_traceback = sys.exc_info()[2]
-
-        if last_exc:
-            six.reraise(Exception, last_exc, last_traceback)
-            #raise last_exc
-        else:
-            raise Exception('No Locations Found for: ' + blocks.part)
-
-    def load_blocks(self, location, blocks, ranges, query):
-        """ Load one or more blocks of compressed cdx lines, return
-        a line iterator which decompresses and returns one line at a time,
-        bounded by query.key and query.end_key
-        """
-
-        if (logging.getLogger().getEffectiveLevel() <= logging.DEBUG):
-            msg = 'Loading {b.count} blocks from {loc}:{b.offset}+{b.length}'
-            logging.debug(msg.format(b=blocks, loc=location))
-
-        reader = self.blk_loader.load(location, blocks.offset, blocks.length)
-
-        def decompress_block(range_):
-            decomp = gzip_decompressor()
-            buff = decomp.decompress(reader.read(range_))
-            for line in BytesIO(buff):
-                yield line
-
-        iter_ = itertools.chain(*map(decompress_block, ranges))
-
-        # start bound
-        iter_ = linearsearch(iter_, query.key)
-
-        # end bound
-        iter_ = itertools.takewhile(lambda line: line < query.end_key, iter_)
-        return iter_
-
-    def __str__(self):
-        return 'ZipNum Cluster: {0}, {1}'.format(self.summary,
-                                                 self.loc_resolver)
--- a/pywb/framework/archivalrouter.py
+++ b/pywb/framework/archivalrouter.py
@ -1,245 +0,0 @@
-from six.moves.urllib.parse import urlsplit, urlunsplit, quote
-
-import re
-
-from pywb.rewrite.url_rewriter import UrlRewriter
-from pywb.rewrite.wburl import WbUrl
-from pywb.framework.wbrequestresponse import WbRequest, WbResponse
-
-
-#=================================================================
-# ArchivalRouter -- route WB requests in archival mode
-#=================================================================
-class ArchivalRouter(object):
-    def __init__(self, routes, **kwargs):
-        self.routes = routes
-
-        # optional port setting may be ignored by wsgi container
-        self.port = kwargs.get('port')
-
-        self.fallback = ReferRedirect()
-
-        self.abs_path = kwargs.get('abs_path')
-
-        self.home_view = kwargs.get('home_view')
-        self.error_view = kwargs.get('error_view')
-        self.info_view = kwargs.get('info_view')
-
-        config = kwargs.get('config', {})
-        self.urlrewriter_class = config.get('urlrewriter_class', UrlRewriter)
-
-        self.enable_coll_info = config.get('enable_coll_info', False)
-
-    def __call__(self, env):
-        request_uri = self.ensure_rel_uri_set(env)
-
-        for route in self.routes:
-            matcher, coll = route.is_handling(request_uri)
-            if matcher:
-                wbrequest = self.parse_request(route, env, matcher,
-                                               coll, request_uri,
-                                               use_abs_prefix=self.abs_path)
-
-                return route.handler(wbrequest)
-
-        # Default Home Page
-        if request_uri in ['/', '/index.html', '/index.htm']:
-            return self.render_home_page(env)
-
-        if self.enable_coll_info and request_uri in ['/collinfo.json']:
-            params = env.get('pywb.template_params', {})
-            host = WbRequest.make_host_prefix(env)
-            return self.info_view.render_response(env=env, host=host, routes=self.routes,
-                                                  content_type='application/json',
-                                                  **params)
-
-        return self.fallback(env, self) if self.fallback else None
-
-    def parse_request(self, route, env, matcher, coll, request_uri,
-                      use_abs_prefix=False):
-        matched_str = matcher.group(0)
-        rel_prefix = env.get('SCRIPT_NAME', '') + '/'
-
-        if matched_str:
-            rel_prefix += matched_str + '/'
-            # remove the '/' + rel_prefix part of uri
-            wb_url_str = request_uri[len(matched_str) + 2:]
-        else:
-            # the request_uri is the wb_url, since no coll
-            wb_url_str = request_uri[1:]
-
-        wbrequest = route.request_class(env,
-                              request_uri=request_uri,
-                              wb_url_str=wb_url_str,
-                              rel_prefix=rel_prefix,
-                              coll=coll,
-                              use_abs_prefix=use_abs_prefix,
-                              wburl_class=route.handler.get_wburl_type(),
-                              urlrewriter_class=self.urlrewriter_class,
-                              cookie_scope=route.cookie_scope,
-                              rewrite_opts=route.rewrite_opts,
-                              user_metadata=route.user_metadata)
-
-        # Allow for applying of additional filters
-        route.apply_filters(wbrequest, matcher)
-
-        return wbrequest
-
-    def render_home_page(self, env):
-        if self.home_view:
-            params = env.get('pywb.template_params', {})
-            return self.home_view.render_response(env=env, routes=self.routes, **params)
-        else:
-            return None
-
-    #=================================================================
-    # adapted from wsgiref.request_uri, but doesn't include domain name
-    # and allows all characters which are allowed in the path segment
-    # according to: http://tools.ietf.org/html/rfc3986#section-3.3
-    # explained here:
-    # http://stackoverflow.com/questions/4669692/
-    #   valid-characters-for-directory-part-of-a-url-for-short-links
-
-    @staticmethod
-    def ensure_rel_uri_set(env):
-        """ Return the full requested path, including the query string
-        """
-        if 'REL_REQUEST_URI' in env:
-            return env['REL_REQUEST_URI']
-
-        if not env.get('SCRIPT_NAME') and env.get('REQUEST_URI'):
-            env['REL_REQUEST_URI'] = env['REQUEST_URI']
-            return env['REL_REQUEST_URI']
-
-        url = quote(env.get('PATH_INFO', ''), safe='/~!$&\'()*+,;=:@')
-        query = env.get('QUERY_STRING')
-        if query:
-            url += '?' + query
-
-        env['REL_REQUEST_URI'] = url
-        return url
-
-
-#=================================================================
-# Route by matching regex (or fixed prefix)
-# of request uri (excluding first '/')
-#=================================================================
-class Route(object):
-    # match upto next / or ? or end
-    SLASH_QUERY_LOOKAHEAD = '(?=/|$|\?)'
-
-    def __init__(self, regex, handler, config=None,
-                 request_class=WbRequest,
-                 lookahead=SLASH_QUERY_LOOKAHEAD):
-
-        config = config or {}
-        self.path = regex
-        if regex:
-            self.regex = re.compile(regex + lookahead)
-        else:
-            self.regex = re.compile('')
-
-        self.handler = handler
-        self.request_class = request_class
-
-        # collection id from regex group (default 0)
-        self.coll_group = int(config.get('coll_group', 0))
-        self.cookie_scope = config.get('cookie_scope')
-        self.rewrite_opts = config.get('rewrite_opts', {})
-        self.user_metadata = config.get('metadata', {})
-        self._custom_init(config)
-
-    def is_handling(self, request_uri):
-        matcher = self.regex.match(request_uri[1:])
-        if not matcher:
-            return None, None
-
-        coll = matcher.group(self.coll_group)
-        return matcher, coll
-
-    def apply_filters(self, wbrequest, matcher):
-        for filter in self.filters:
-            last_grp = len(matcher.groups())
-            filter_str = filter.format(matcher.group(last_grp))
-            wbrequest.query_filter.append(filter_str)
-
-    def _custom_init(self, config):
-        self.filters = config.get('filters', [])
-
-
-#=================================================================
-# ReferRedirect -- redirect urls that have 'fallen through'
-# based on the referrer settings
-#=================================================================
-class ReferRedirect:
-    def __call__(self, env, the_router):
-        referrer = env.get('HTTP_REFERER')
-
-        routes = the_router.routes
-
-        # ensure there is a referrer
-        if referrer is None:
-            return None
-
-        # get referrer path name
-        ref_split = urlsplit(referrer)
-
-        # require that referrer starts with current Host, if any
-        curr_host = env.get('HTTP_HOST')
-        if curr_host and curr_host != ref_split.netloc:
-            return None
-
-        path = ref_split.path
-
-        app_path = env.get('SCRIPT_NAME', '')
-
-        if app_path:
-            # must start with current app name, if not root
-            if not path.startswith(app_path):
-                return None
-
-            path = path[len(app_path):]
-
-        ref_route = None
-        ref_request = None
-
-        for route in routes:
-            matcher, coll = route.is_handling(path)
-            if matcher:
-                ref_request = the_router.parse_request(route, env,
-                                                       matcher, coll, path)
-                ref_route = route
-                break
-
-        # must have matched one of the routes with a urlrewriter
-        if not ref_request or not ref_request.urlrewriter:
-            return None
-
-        rewriter = ref_request.urlrewriter
-
-        rel_request_uri = env['REL_REQUEST_URI']
-
-        timestamp_path = '/' + rewriter.wburl.timestamp + '/'
-
-        # check if timestamp is already part of the path
-        if rel_request_uri.startswith(timestamp_path):
-            # remove timestamp but leave / to make host relative url
-            # 2013/path.html -> /path.html
-            rel_request_uri = rel_request_uri[len(timestamp_path) - 1:]
-
-        rewritten_url = rewriter.rewrite(rel_request_uri)
-
-        # if post, can't redirect as that would lost the post data
-        # (can't use 307 because FF will show confirmation warning)
-        if ref_request.method == 'POST':
-            new_wb_url = WbUrl(rewritten_url[len(rewriter.prefix):])
-            ref_request.wb_url.url = new_wb_url.url
-            return ref_route.handler(ref_request)
-
-        final_url = urlunsplit((ref_split.scheme,
-                                ref_split.netloc,
-                                rewritten_url,
-                                '',
-                                ''))
-
-        return WbResponse.redir_response(final_url, status='302 Temp Redirect')
--- a/pywb/framework/basehandlers.py
+++ b/pywb/framework/basehandlers.py
@ -1,23 +0,0 @@
-from pywb.rewrite.wburl import WbUrl
-
-
-#=================================================================
-class BaseHandler(object):
-    """
-    Represents a base handler class that handles any request
-    """
-    def __call__(self, wbrequest):  # pragma: no cover
-        raise NotImplementedError('Need to implement in derived class')
-
-    def get_wburl_type(self):
-        return None
-
-
-#=================================================================
-class WbUrlHandler(BaseHandler):
-    """
-    Represents a handler which assumes the request contains a WbUrl
-    Ensure that the WbUrl is parsed in the request
-    """
-    def get_wburl_type(self):
-        return WbUrl
--- a/pywb/framework/cache.py
+++ b/pywb/framework/cache.py
@ -1,62 +0,0 @@
-try:  # pragma: no cover
-    import uwsgi
-    uwsgi_cache = True
-except ImportError:
-    uwsgi_cache = False
-
-
-from redis import StrictRedis
-from pywb.utils.loaders import to_native_str
-
-
-#=================================================================
-class UwsgiCache(object):  # pragma: no cover
-    def __setitem__(self, item, value):
-        uwsgi.cache_update(item, value)
-
-    def __getitem__(self, item):
-        return uwsgi.cache_get(item)
-
-    def __contains__(self, item):
-        return uwsgi.cache_exists(item)
-
-    def __delitem__(self, item):
-        uwsgi.cache_del(item)
-
-
-#=================================================================
-class DefaultCache(dict):
-    def __getitem__(self, item):
-        return self.get(item)
-
-
-#=================================================================
-class RedisCache(object):
-    def __init__(self, redis_url):
-        # must be of the form redis://host:port/db/key
-        redis_url, key = redis_url.rsplit('/', 1)
-        self.redis = StrictRedis.from_url(redis_url)
-        self.key = key
-
-    def __setitem__(self, item, value):
-        self.redis.hset(self.key, item, value)
-
-    def __getitem__(self, item):
-        return to_native_str(self.redis.hget(self.key, item), 'utf-8')
-
-    def __contains__(self, item):
-        return self.redis.hexists(self.key, item)
-
-    def __delitem__(self, item):
-        self.redis.hdel(self.key, item)
-
-
-#=================================================================
-def create_cache(redis_url_key=None):
-    if redis_url_key:
-        return RedisCache(redis_url_key)
-
-    if uwsgi_cache:  # pragma: no cover
-        return UwsgiCache()
-    else:
-        return DefaultCache()
--- a/pywb/framework/memento.py
+++ b/pywb/framework/memento.py
@ -1,231 +0,0 @@
-from pywb.utils.wbexception import BadRequestException
-from warcio.timeutils import http_date_to_timestamp
-from warcio.timeutils import timestamp_to_http_date
-
-from pywb.framework.wbrequestresponse import WbRequest, WbResponse
-from pywb.rewrite.wburl import WbUrl
-
-import six
-LINK_FORMAT = 'application/link-format'
-
-
-#=================================================================
-class MementoReqMixin(object):
-    def _parse_extra(self):
-        if not self.wb_url:
-            return
-
-        if self.wb_url.type != self.wb_url.LATEST_REPLAY:
-            return
-
-        self.options['is_timegate'] = True
-
-        accept_datetime = self.env.get('HTTP_ACCEPT_DATETIME')
-        if not accept_datetime:
-            return
-
-        try:
-            timestamp = http_date_to_timestamp(accept_datetime)
-        except Exception:
-            raise BadRequestException('Invalid Accept-Datetime: ' +
-                                      accept_datetime)
-
-        # note: this changes from LATEST_REPLAY -> REPLAY
-        self.wb_url.set_replay_timestamp(timestamp)
-
-
-#=================================================================
-class MementoRequest(MementoReqMixin, WbRequest):
-    pass
-
-
-#=================================================================
-class MementoRespMixin(object):
-    def _init_derived(self, params):
-        wbrequest = params.get('wbrequest')
-        is_redirect = params.get('memento_is_redir', False)
-        cdx = params.get('cdx')
-
-        if not wbrequest or not wbrequest.wb_url:
-            return
-
-        mod = wbrequest.options.get('replay_mod', '')
-
-        #is_top_frame = wbrequest.wb_url.is_top_frame
-        is_top_frame = wbrequest.options.get('is_top_frame', False)
-
-        is_timegate = (wbrequest.options.get('is_timegate', False) and
-                       not is_top_frame)
-
-        if is_timegate:
-            self.status_headers.replace_header('Vary', 'accept-datetime')
-
-        # Determine if memento:
-        is_memento = False
-        is_original = False
-
-        # if no cdx included, not a memento, unless top-frame special
-        if not cdx:
-            # special case: include the headers but except Memento-Datetime
-            # since this is really an intermediate resource
-            if is_top_frame:
-                is_memento = True
-
-        # otherwise, if in proxy mode, then always a memento
-        elif wbrequest.options['is_proxy']:
-            is_memento = True
-            is_original = True
-
-        # otherwise only if timestamp replay (and not a timegate)
-        #elif not is_timegate:
-        #    is_memento = (wbrequest.wb_url.type == wbrequest.wb_url.REPLAY)
-        elif not is_redirect:
-            is_memento = (wbrequest.wb_url.is_replay())
-
-        link = []
-        req_url = wbrequest.wb_url.url
-
-        if is_memento or is_timegate:
-            url = req_url
-            if cdx:
-                ts = cdx['timestamp']
-                url = cdx['url']
-            # for top frame
-            elif wbrequest.wb_url.timestamp:
-                ts = wbrequest.wb_url.timestamp
-            else:
-                ts = None
-
-            if ts:
-                http_date = timestamp_to_http_date(ts)
-
-                if is_memento:
-                    self.status_headers.replace_header('Memento-Datetime',
-                                                       http_date)
-
-                canon_link = wbrequest.urlrewriter.get_new_url(mod=mod,
-                                                               timestamp=ts,
-                                                               url=url)
-
-                # set in replay_views -- Must set content location
-                #if is_memento and is_timegate:
-                #    self.status_headers.headers.append(('Content-Location',
-                #                                        canon_link))
-
-                # don't set memento link for very long urls...
-                if len(canon_link) < 512:
-                    link.append(self.make_memento_link(canon_link,
-                                                       'memento',
-                                                       http_date))
-
-        if is_original and is_timegate:
-            link.append(self.make_link(req_url, 'original timegate'))
-        else:
-            link.append(self.make_link(req_url, 'original'))
-
-        # for now, include timemap only in non-proxy mode
-        if not wbrequest.options['is_proxy'] and (is_memento or is_timegate):
-            link.append(self.make_timemap_link(wbrequest))
-
-        if is_memento and not is_timegate:
-            timegate = wbrequest.urlrewriter.get_new_url(mod=mod, timestamp='')
-            link.append(self.make_link(timegate, 'timegate'))
-
-        link = ', '.join(link)
-
-        self.status_headers.replace_header('Link', link)
-
-    def make_link(self, url, type):
-        return '<{0}>; rel="{1}"'.format(url, type)
-
-    def make_memento_link(self, url, type_, dt):
-        return '<{0}>; rel="{1}"; datetime="{2}"'.format(url, type_, dt)
-
-    def make_timemap_link(self, wbrequest):
-        format_ = '<{0}>; rel="timemap"; type="{1}"'
-
-        url = wbrequest.urlrewriter.get_new_url(mod='timemap',
-                                                timestamp='',
-                                                type=wbrequest.wb_url.QUERY)
-
-        return format_.format(url, LINK_FORMAT)
-
-
-#=================================================================
-class MementoResponse(MementoRespMixin, WbResponse):
-    pass
-
-
-#=================================================================
-def make_timemap_memento_link(cdx, prefix, datetime=None,
-                             rel='memento', end=',\n', mod=''):
-
-    memento = '<{0}>; rel="{1}"; datetime="{2}"' + end
-
-    string = WbUrl.to_wburl_str(url=cdx['url'],
-                                mod=mod,
-                                timestamp=cdx['timestamp'],
-                                type=WbUrl.REPLAY)
-
-    url = prefix + string
-
-    if not datetime:
-        datetime = timestamp_to_http_date(cdx['timestamp'])
-
-    return memento.format(url, rel, datetime)
-
-
-#=================================================================
-def make_timemap(wbrequest, cdx_lines):
-    prefix = wbrequest.wb_prefix
-    url = wbrequest.wb_url.url
-    mod = wbrequest.options.get('replay_mod', '')
-
-    # get first memento as it'll be used for 'from' field
-    try:
-        first_cdx = six.next(cdx_lines)
-        from_date = timestamp_to_http_date(first_cdx['timestamp'])
-    except StopIteration:
-        first_cdx = None
-
-
-    if first_cdx:
-        # timemap link
-        timemap = ('<{0}>; rel="self"; ' +
-                   'type="application/link-format"; from="{1}",\n')
-        yield timemap.format(prefix + wbrequest.wb_url.to_str(),
-                             from_date)
-
-    # original link
-    original = '<{0}>; rel="original",\n'
-    yield original.format(url)
-
-    # timegate link
-    timegate = '<{0}>; rel="timegate",\n'
-    timegate_url= WbUrl.to_wburl_str(url=url,
-                                     mod=mod,
-                                     type=WbUrl.LATEST_REPLAY)
-
-    yield timegate.format(prefix + timegate_url)
-
-    if not first_cdx:
-        # terminating timemap link, no from
-        timemap = ('<{0}>; rel="self"; type="application/link-format"')
-        yield timemap.format(prefix + wbrequest.wb_url.to_str())
-        return
-
-    # first memento link
-    yield make_timemap_memento_link(first_cdx, prefix,
-                            datetime=from_date, mod=mod)
-
-    prev_cdx = None
-
-    for cdx in cdx_lines:
-        if prev_cdx:
-            yield make_timemap_memento_link(prev_cdx, prefix, mod=mod)
-
-        prev_cdx = cdx
-
-    # last memento link, if any
-    if prev_cdx:
-        yield make_timemap_memento_link(prev_cdx, prefix, end='', mod=mod)
--- a/pywb/framework/proxy.py
+++ b/pywb/framework/proxy.py
@ -1,463 +0,0 @@
-from __future__ import absolute_import
-
-from pywb.framework.wbrequestresponse import WbResponse, WbRequest
-from pywb.framework.archivalrouter import ArchivalRouter
-
-from six.moves.urllib.parse import urlsplit
-from six import iteritems
-import base64
-
-import socket
-import ssl
-
-from io import BytesIO
-
-from pywb.rewrite.url_rewriter import SchemeOnlyUrlRewriter, UrlRewriter
-from pywb.rewrite.rewrite_content import RewriteContent
-from pywb.utils.wbexception import BadRequestException
-
-from warcio.bufferedreaders import BufferedReader
-from warcio.utils import to_native_str
-
-from pywb.framework.proxy_resolvers import ProxyAuthResolver, CookieResolver, IPCacheResolver
-
-from tempfile import SpooledTemporaryFile
-
-
-#=================================================================
-class ProxyArchivalRouter(ArchivalRouter):
-    """
-    A router which combines both archival and proxy modes support
-    First, request is treated as a proxy request using ProxyRouter
-    Second, if not handled by the router, it is treated as a regular
-    archival mode request.
-    """
-    def __init__(self, routes, **kwargs):
-        super(ProxyArchivalRouter, self).__init__(routes, **kwargs)
-        self.proxy = ProxyRouter(routes, **kwargs)
-
-    def __call__(self, env):
-        response = self.proxy(env)
-        if response:
-            return response
-
-        response = super(ProxyArchivalRouter, self).__call__(env)
-        if response:
-            return response
-
-
-#=================================================================
-class ProxyRouter(object):
-    """
-    A router which supports http proxy mode requests
-    Handles requests of the form: GET http://example.com
-
-    The router returns latest capture by default.
-    However, if Memento protocol support is enabled,
-    the memento Accept-Datetime header can be used
-    to select specific capture.
-    See: http://www.mementoweb.org/guide/rfc/#Pattern1.3
-    for more details.
-    """
-
-    BLOCK_SIZE = 4096
-    DEF_MAGIC_NAME = 'pywb.proxy'
-    BUFF_RESPONSE_MEM_SIZE = 1024*1024
-
-    CERT_DL_PEM = '/pywb-ca.pem'
-    CERT_DL_P12 = '/pywb-ca.p12'
-
-    CA_ROOT_FILE = './ca/pywb-ca.pem'
-    CA_ROOT_NAME = 'pywb https proxy replay CA'
-    CA_CERTS_DIR = './ca/certs/'
-
-    EXTRA_HEADERS = {'cache-control': 'no-cache',
-                     'connection': 'close',
-                     'p3p': 'CP="NOI ADM DEV COM NAV OUR STP"'}
-
-    def __init__(self, routes, **kwargs):
-        self.error_view = kwargs.get('error_view')
-
-        proxy_options = kwargs.get('config', {})
-        if proxy_options:
-            proxy_options = proxy_options.get('proxy_options', {})
-
-        self.magic_name = proxy_options.get('magic_name')
-        if not self.magic_name:
-            self.magic_name = self.DEF_MAGIC_NAME
-            proxy_options['magic_name'] = self.magic_name
-
-        self.extra_headers = proxy_options.get('extra_headers')
-        if not self.extra_headers:
-            self.extra_headers = self.EXTRA_HEADERS
-            proxy_options['extra_headers'] = self.extra_headers
-
-        res_type = proxy_options.get('cookie_resolver', True)
-        if res_type == 'auth' or not res_type:
-            self.resolver = ProxyAuthResolver(routes, proxy_options)
-        elif res_type == 'ip':
-            self.resolver = IPCacheResolver(routes, proxy_options)
-        #elif res_type == True or res_type == 'cookie':
-        #    self.resolver = CookieResolver(routes, proxy_options)
-        else:
-            self.resolver = CookieResolver(routes, proxy_options)
-
-        self.use_banner = proxy_options.get('use_banner', True)
-        self.use_wombat = proxy_options.get('use_client_rewrite', True)
-
-        self.proxy_cert_dl_view = proxy_options.get('proxy_cert_download_view')
-
-        if not proxy_options.get('enable_https_proxy'):
-            self.ca = None
-            return
-
-        try:
-            from certauth.certauth import CertificateAuthority
-        except ImportError:  #pragma: no cover
-            print('HTTPS proxy is not available as the "certauth" module ' +
-                  'is not installed')
-            print('Please install via "pip install certauth" ' +
-                  'to enable HTTPS support')
-            self.ca = None
-            return
-
-        # HTTPS Only Options
-        ca_file = proxy_options.get('root_ca_file', self.CA_ROOT_FILE)
-
-        # attempt to create the root_ca_file if doesn't exist
-        # (generally recommended to create this seperately)
-        ca_name = proxy_options.get('root_ca_name', self.CA_ROOT_NAME)
-
-        certs_dir = proxy_options.get('certs_dir', self.CA_CERTS_DIR)
-        self.ca = CertificateAuthority(ca_file=ca_file,
-                                       certs_dir=certs_dir,
-                                       ca_name=ca_name)
-
-        self.use_wildcard = proxy_options.get('use_wildcard_certs', True)
-
-    def __call__(self, env):
-        is_https = (env['REQUEST_METHOD'] == 'CONNECT')
-        ArchivalRouter.ensure_rel_uri_set(env)
-
-        # for non-https requests, check non-proxy urls
-        if not is_https:
-            url = env['REL_REQUEST_URI']
-
-            if not url.startswith(('http://', 'https://')):
-                return None
-
-            env['pywb.proxy_scheme'] = 'http'
-
-        route = None
-        coll = None
-        matcher = None
-        response = None
-        ts = None
-
-        # check resolver, for pre connect resolve
-        if self.resolver.pre_connect:
-            route, coll, matcher, ts, response = self.resolver.resolve(env)
-            if response:
-                return response
-
-        # do connect, then get updated url
-        if is_https:
-            response = self.handle_connect(env)
-            if response:
-                return response
-
-            url = env['REL_REQUEST_URI']
-        else:
-            parts = urlsplit(env['REL_REQUEST_URI'])
-            hostport = parts.netloc.split(':', 1)
-            env['pywb.proxy_host'] = hostport[0]
-            env['pywb.proxy_port'] = hostport[1] if len(hostport) == 2 else ''
-            env['pywb.proxy_req_uri'] = parts.path
-            if parts.query:
-                env['pywb.proxy_req_uri'] += '?' + parts.query
-                env['pywb.proxy_query'] = parts.query
-
-        if self.resolver.supports_switching:
-            env['pywb_proxy_magic'] = self.magic_name
-
-        # route (static) and other resources to archival replay
-        if env['pywb.proxy_host'] == self.magic_name:
-            env['REL_REQUEST_URI'] = env['pywb.proxy_req_uri']
-
-            # special case for proxy install
-            response = self.handle_cert_install(env)
-            if response:
-                return response
-
-            return None
-
-        # check resolver, post connect
-        if not self.resolver.pre_connect:
-            route, coll, matcher, ts, response = self.resolver.resolve(env)
-            if response:
-                return response
-
-        rel_prefix = ''
-
-        custom_prefix = env.get('HTTP_PYWB_REWRITE_PREFIX', '')
-        if custom_prefix:
-            host_prefix = custom_prefix
-            urlrewriter_class = UrlRewriter
-            abs_prefix = True
-            # always rewrite to absolute here
-            rewrite_opts = dict(no_match_rel=True)
-        else:
-            host_prefix = env['pywb.proxy_scheme'] + '://' + self.magic_name
-            urlrewriter_class = SchemeOnlyUrlRewriter
-            abs_prefix = False
-            rewrite_opts = {}
-
-        # special case for proxy calendar
-        if (env['pywb.proxy_host'] == 'query.' + self.magic_name):
-            url = env['pywb.proxy_req_uri'][1:]
-            rel_prefix = '/'
-
-        if ts is not None:
-            url = ts + '/' + url
-
-        wbrequest = route.request_class(env,
-                              request_uri=url,
-                              wb_url_str=url,
-                              coll=coll,
-                              host_prefix=host_prefix,
-                              rel_prefix=rel_prefix,
-                              wburl_class=route.handler.get_wburl_type(),
-                              urlrewriter_class=urlrewriter_class,
-                              use_abs_prefix=abs_prefix,
-                              rewrite_opts=rewrite_opts,
-                              is_proxy=True)
-
-        if matcher:
-            route.apply_filters(wbrequest, matcher)
-
-        # full rewrite and banner
-        if self.use_wombat and self.use_banner:
-            wbrequest.wb_url.mod = ''
-        elif self.use_banner:
-        # banner only, no rewrite
-            wbrequest.wb_url.mod = 'bn_'
-        else:
-        # unaltered, no rewrite or banner
-            wbrequest.wb_url.mod = 'uo_'
-
-        response = route.handler(wbrequest)
-        if not response:
-            return None
-
-        # add extra headers for replay responses
-        if wbrequest.wb_url and wbrequest.wb_url.is_replay():
-            for name, value in iteritems(self.extra_headers):
-                response.status_headers.replace_header(name, value)
-
-        # check for content-length
-        res = response.status_headers.get_header('content-length')
-        try:
-            if int(res) > 0:
-                return response
-        except:
-            pass
-
-        # need to either chunk or buffer to get content-length
-        if env.get('SERVER_PROTOCOL') == 'HTTP/1.1':
-            response.status_headers.remove_header('content-length')
-            response.status_headers.headers.append(('Transfer-Encoding', 'chunked'))
-            response.body = self._chunk_encode(response.body)
-        else:
-            response.body = self._buffer_response(response.status_headers,
-                                                  response.body)
-
-        return response
-
-    @staticmethod
-    def _chunk_encode(orig_iter):
-        for chunk in orig_iter:
-            if not len(chunk):
-                continue
-            chunk_len = b'%X\r\n' % len(chunk)
-            yield chunk_len
-            yield chunk
-            yield b'\r\n'
-
-        yield b'0\r\n\r\n'
-
-    @staticmethod
-    def _buffer_response(status_headers, iterator):
-        out = SpooledTemporaryFile(ProxyRouter.BUFF_RESPONSE_MEM_SIZE)
-        size = 0
-
-        for buff in iterator:
-            size += len(buff)
-            out.write(buff)
-
-        content_length_str = str(size)
-        # remove existing content length
-        status_headers.replace_header('Content-Length',
-                                      content_length_str)
-
-        out.seek(0)
-        return RewriteContent.stream_to_gen(out)
-
-    def get_request_socket(self, env):
-        if not self.ca:
-            return None
-
-        sock = None
-
-        if env.get('uwsgi.version'):  # pragma: no cover
-            try:
-                import uwsgi
-                fd = uwsgi.connection_fd()
-                conn = socket.fromfd(fd, socket.AF_INET, socket.SOCK_STREAM)
-                try:
-                    sock = socket.socket(_sock=conn)
-                except:
-                    sock = conn
-            except Exception as e:
-                pass
-        elif env.get('gunicorn.socket'):  # pragma: no cover
-            sock = env['gunicorn.socket']
-
-        if not sock:
-            # attempt to find socket from wsgi.input
-            input_ = env.get('wsgi.input')
-            if input_:
-                if hasattr(input_, '_sock'):  # pragma: no cover
-                    raw = input_._sock
-                    sock = socket.socket(_sock=raw)  # pragma: no cover
-                elif hasattr(input_, 'raw'):
-                    sock = input_.raw._sock
-
-        return sock
-
-    def handle_connect(self, env):
-        sock = self.get_request_socket(env)
-        if not sock:
-            return WbResponse.text_response('HTTPS Proxy Not Supported',
-                                            '405 HTTPS Proxy Not Supported')
-
-        sock.send(b'HTTP/1.0 200 Connection Established\r\n')
-        sock.send(b'Proxy-Connection: close\r\n')
-        sock.send(b'Server: pywb proxy\r\n')
-        sock.send(b'\r\n')
-
-        hostname, port = env['REL_REQUEST_URI'].split(':')
-
-        if not self.use_wildcard:
-            certfile = self.ca.cert_for_host(hostname)
-        else:
-            certfile = self.ca.get_wildcard_cert(hostname)
-
-        try:
-            ssl_sock = ssl.wrap_socket(sock,
-                                       server_side=True,
-                                       certfile=certfile,
-                                       #ciphers="ALL",
-                                       suppress_ragged_eofs=False,
-                                       ssl_version=ssl.PROTOCOL_SSLv23
-                                       )
-            env['pywb.proxy_ssl_sock'] = ssl_sock
-
-            buffreader = BufferedReader(ssl_sock, block_size=self.BLOCK_SIZE)
-
-            statusline = to_native_str(buffreader.readline().rstrip())
-
-        except Exception as se:
-            raise BadRequestException(se.message)
-
-        statusparts = statusline.split(' ')
-
-        if len(statusparts) < 3:
-            raise BadRequestException('Invalid Proxy Request: ' + statusline)
-
-        env['REQUEST_METHOD'] = statusparts[0]
-        env['REL_REQUEST_URI'] = ('https://' +
-                                  env['REL_REQUEST_URI'].replace(':443', '') +
-                                  statusparts[1])
-
-        env['SERVER_PROTOCOL'] = statusparts[2].strip()
-
-        env['pywb.proxy_scheme'] = 'https'
-
-        env['pywb.proxy_host'] = hostname
-        env['pywb.proxy_port'] = port
-        env['pywb.proxy_req_uri'] = statusparts[1]
-
-        queryparts = env['REL_REQUEST_URI'].split('?', 1)
-        env['PATH_INFO'] = queryparts[0]
-        env['QUERY_STRING'] = queryparts[1] if len(queryparts) > 1 else ''
-        env['pywb.proxy_query'] = env['QUERY_STRING']
-
-        while True:
-            line = to_native_str(buffreader.readline())
-            if line:
-                line = line.rstrip()
-
-            if not line:
-                break
-
-            parts = line.split(':', 1)
-            if len(parts) < 2:
-                continue
-
-            name = parts[0].strip()
-            value = parts[1].strip()
-
-            name = name.replace('-', '_').upper()
-
-            if name not in ('CONTENT_LENGTH', 'CONTENT_TYPE'):
-                name = 'HTTP_' + name
-
-            env[name] = value
-
-        env['wsgi.input'] = buffreader
-        #remain = buffreader.rem_length()
-        #if remain > 0:
-            #remainder = buffreader.read()
-            #env['wsgi.input'] = BufferedReader(BytesIO(remainder))
-            #remainder = buffreader.read(self.BLOCK_SIZE)
-            #env['wsgi.input'] = BufferedReader(ssl_sock,
-            #                                   block_size=self.BLOCK_SIZE,
-            #                                   starting_data=remainder)
-
-    def handle_cert_install(self, env):
-        if env['pywb.proxy_req_uri'] in ('/', '/index.html', '/index.html'):
-            available = (self.ca is not None)
-
-            if self.proxy_cert_dl_view:
-                return (self.proxy_cert_dl_view.
-                         render_response(available=available,
-                                         pem_path=self.CERT_DL_PEM,
-                                         p12_path=self.CERT_DL_P12))
-
-        elif env['pywb.proxy_req_uri'] == self.CERT_DL_PEM:
-            if not self.ca:
-                return None
-
-            buff = b''
-            with open(self.ca.ca_file, 'rb') as fh:
-                buff = fh.read()
-
-            content_type = 'application/x-x509-ca-cert'
-            headers = [('Content-Length', str(len(buff)))]
-
-            return WbResponse.bin_stream([buff],
-                                         content_type=content_type,
-                                         headers=headers)
-
-        elif env['pywb.proxy_req_uri'] == self.CERT_DL_P12:
-            if not self.ca:
-                return None
-
-            buff = self.ca.get_root_PKCS12()
-
-            content_type = 'application/x-pkcs12'
-            headers = [('Content-Length', str(len(buff)))]
-
-            return WbResponse.bin_stream([buff],
-                                         content_type=content_type,
-                                         headers=headers)
--- a/pywb/framework/proxy_resolvers.py
+++ b/pywb/framework/proxy_resolvers.py
@ -1,374 +0,0 @@
-from pywb.framework.wbrequestresponse import WbResponse
-from pywb.utils.loaders import extract_client_cookie
-from pywb.utils.wbexception import WbException
-from pywb.rewrite.wburl import WbUrl
-
-from pywb.framework.cache import create_cache
-from pywb.framework.basehandlers import WbUrlHandler
-
-from six.moves.urllib.parse import parse_qs, urlsplit
-import six
-
-from warcio.statusandheaders import StatusAndHeaders
-from warcio.utils import to_native_str
-
-import base64
-import os
-import json
-
-
-#=================================================================
-class BaseCollResolver(object):
-    def __init__(self, routes, config):
-        self.routes = routes
-        self.use_default_coll = config.get('use_default_coll')
-
-    @property
-    def pre_connect(self):
-        return False
-
-    def resolve(self, env):
-        route = None
-        coll = None
-        matcher = None
-        ts = None
-
-        proxy_coll, ts = self.get_proxy_coll_ts(env)
-
-        # invalid parsing
-        if proxy_coll == '':
-            return None, None, None, None, self.select_coll_response(env, proxy_coll)
-
-        if proxy_coll is None and isinstance(self.use_default_coll, str):
-            proxy_coll = self.use_default_coll
-
-        if proxy_coll:
-            path = '/' + proxy_coll + '/'
-
-            for r in self.routes:
-                matcher, c = r.is_handling(path)
-                if matcher:
-                    route = r
-                    coll = c
-                    break
-
-            # if no match, return coll selection response
-            if not route:
-                return None, None, None, None, self.select_coll_response(env, proxy_coll)
-
-        # if 'use_default_coll', find first WbUrl-handling collection
-        elif self.use_default_coll:
-            raise Exception('use_default_coll: true no longer supported, please specify collection name')
-            #for route in self.routes:
-            #    if isinstance(route.handler, WbUrlHandler):
-            #        return route, route.path, matcher, ts, None
-
-        # otherwise, return the appropriate coll selection response
-        else:
-            return None, None, None, None, self.select_coll_response(env, proxy_coll)
-
-        return route, coll, matcher, ts, None
-
-
-#=================================================================
-class ProxyAuthResolver(BaseCollResolver):
-    DEFAULT_MSG = 'Please enter name of a collection to use with proxy mode'
-
-    def __init__(self, routes, config):
-        super(ProxyAuthResolver, self).__init__(routes, config)
-        self.auth_msg = config.get('auth_msg', self.DEFAULT_MSG)
-
-    @property
-    def pre_connect(self):
-        return True
-
-    @property
-    def supports_switching(self):
-        return False
-
-    def get_proxy_coll_ts(self, env):
-        proxy_auth = env.get('HTTP_PROXY_AUTHORIZATION')
-
-        if not proxy_auth:
-            return None, None
-
-        proxy_coll = self.read_basic_auth_coll(proxy_auth)
-        return proxy_coll, None
-
-    def select_coll_response(self, env, default_coll=None):
-        proxy_msg = 'Basic realm="{0}"'.format(self.auth_msg)
-
-        headers = [('Content-Type', 'text/plain'),
-                   ('Proxy-Authenticate', proxy_msg)]
-
-        status_headers = StatusAndHeaders('407 Proxy Authentication', headers)
-
-        value = self.auth_msg
-
-        return WbResponse(status_headers, value=[value.encode('utf-8')])
-
-    @staticmethod
-    def read_basic_auth_coll(value):
-        parts = value.split(' ')
-        if parts[0].lower() != 'basic':
-            return ''
-
-        if len(parts) != 2:
-            return ''
-
-        user_pass = base64.b64decode(parts[1].encode('utf-8'))
-        return to_native_str(user_pass.split(b':')[0])
-
-
-#=================================================================
-class IPCacheResolver(BaseCollResolver):
-    def __init__(self, routes, config):
-        super(IPCacheResolver, self).__init__(routes, config)
-        self.cache = create_cache(config.get('redis_cache_key'))
-        self.magic_name = config['magic_name']
-
-    @property
-    def supports_switching(self):
-        return False
-
-    def _get_ip(self, env):
-        ip = env['REMOTE_ADDR']
-        qs = env.get('pywb.proxy_query')
-        if qs:
-            res = parse_qs(qs)
-
-            if 'ip' in res:
-                ip = res['ip'][0]
-
-        return ip
-
-    def select_coll_response(self, env, default_coll=None):
-        raise WbException('Invalid Proxy Collection Specified: ' + str(default_coll))
-
-    def get_proxy_coll_ts(self, env):
-        ip = env['REMOTE_ADDR']
-        qs = env.get('pywb.proxy_query')
-
-        if qs:
-            res = parse_qs(qs)
-
-            if 'ip' in res:
-                ip = res['ip'][0]
-
-            if 'delete' in res:
-                del self.cache[ip + ':c']
-                del self.cache[ip + ':t']
-            else:
-                if 'coll' in res:
-                    self.cache[ip + ':c'] = res['coll'][0]
-
-                if 'ts' in res:
-                    self.cache[ip + ':t'] = res['ts'][0]
-
-        coll = self.cache[ip + ':c']
-        ts = self.cache[ip + ':t']
-        return coll, ts
-
-    def resolve(self, env):
-        server_name = env['pywb.proxy_host']
-
-        if self.magic_name in server_name:
-            response = self.handle_magic_page(env)
-            if response:
-                return None, None, None, None, response
-
-        return super(IPCacheResolver, self).resolve(env)
-
-    def handle_magic_page(self, env):
-        coll, ts = self.get_proxy_coll_ts(env)
-        ip = self._get_ip(env)
-        res = json.dumps({'ip': ip, 'coll': coll, 'ts': ts})
-        return WbResponse.text_response(res, content_type='application/json')
-
-
-#=================================================================
-class CookieResolver(BaseCollResolver):
-    SESH_COOKIE_NAME = '__pywb_proxy_sesh'
-
-    def __init__(self, routes, config):
-        super(CookieResolver, self).__init__(routes, config)
-        self.magic_name = config['magic_name']
-        self.sethost_prefix = '-sethost.' + self.magic_name + '.'
-        self.set_prefix = '-set.' + self.magic_name
-
-        self.cookie_name = config.get('cookie_name', self.SESH_COOKIE_NAME)
-        self.proxy_select_view = config.get('proxy_select_view')
-
-        self.extra_headers = config.get('extra_headers')
-
-        self.cache = create_cache()
-
-    @property
-    def supports_switching(self):
-        return True
-
-    def get_proxy_coll_ts(self, env):
-        coll, ts, sesh_id = self.get_coll(env)
-        return coll, ts
-
-    def select_coll_response(self, env, default_coll=None):
-        return self.make_magic_response('auto',
-                                        env['REL_REQUEST_URI'],
-                                        env)
-
-    def resolve(self, env):
-        server_name = env['pywb.proxy_host']
-
-        if ('.' + self.magic_name) in server_name:
-            response = self.handle_magic_page(env)
-            if response:
-                return None, None, None, None, response
-
-        return super(CookieResolver, self).resolve(env)
-
-    def handle_magic_page(self, env):
-        request_url = env['REL_REQUEST_URI']
-        parts = urlsplit(request_url)
-        server_name = env['pywb.proxy_host']
-
-        path_url = parts.path[1:]
-        if parts.query:
-            path_url += '?' + parts.query
-
-        if server_name.startswith('auto'):
-            coll, ts, sesh_id = self.get_coll(env)
-
-            if coll:
-                return self.make_sethost_cookie_response(sesh_id,
-                                                         path_url,
-                                                         env)
-            else:
-                return self.make_magic_response('select', path_url, env)
-
-        elif server_name.startswith('query.'):
-            wb_url = WbUrl(path_url)
-
-            # only dealing with specific timestamp setting
-            if wb_url.is_query():
-                return None
-
-            coll, ts, sesh_id = self.get_coll(env)
-            if not coll:
-                return self.make_magic_response('select', path_url, env)
-
-            self.set_ts(sesh_id, wb_url.timestamp)
-            return self.make_redir_response(wb_url.url)
-
-        elif server_name.endswith(self.set_prefix):
-            old_sesh_id = extract_client_cookie(env, self.cookie_name)
-            sesh_id = self.create_renew_sesh_id(old_sesh_id)
-
-            if sesh_id != old_sesh_id:
-                headers = self.make_cookie_headers(sesh_id, self.magic_name)
-            else:
-                headers = None
-
-            coll = server_name[:-len(self.set_prefix)]
-
-            # set sesh value
-            self.set_coll(sesh_id, coll)
-
-            return self.make_sethost_cookie_response(sesh_id, path_url, env,
-                                                     headers=headers)
-
-        elif self.sethost_prefix in server_name:
-            inx = server_name.find(self.sethost_prefix)
-            sesh_id = server_name[:inx]
-
-            domain = server_name[inx + len(self.sethost_prefix):]
-
-            headers = self.make_cookie_headers(sesh_id, domain)
-
-            full_url = env['pywb.proxy_scheme'] + '://' + domain
-            full_url += '/' + path_url
-            return self.make_redir_response(full_url, headers=headers)
-
-        elif 'select.' in server_name:
-            coll, ts, sesh_id = self.get_coll(env)
-
-            route_temp = '-set.' + self.magic_name + '/' + path_url
-
-            return (self.proxy_select_view.
-                    render_response(routes=self.routes,
-                                    route_temp=route_temp,
-                                    coll=coll,
-                                    url=path_url))
-        #else:
-        #    msg = 'Invalid Magic Path: ' + url
-        #    print msg
-        #    return WbResponse.text_response(msg, status='404 Not Found')
-
-    def make_cookie_headers(self, sesh_id, domain):
-        cookie_val = '{0}={1}; Path=/; Domain=.{2}; HttpOnly'
-        cookie_val = cookie_val.format(self.cookie_name, sesh_id, domain)
-        headers = [('Set-Cookie', cookie_val)]
-        return headers
-
-    def make_sethost_cookie_response(self, sesh_id, path_url,
-                                     env, headers=None):
-        if '://' not in path_url:
-            path_url = 'http://' + path_url
-
-        path_parts = urlsplit(path_url)
-
-        new_url = path_parts.path[1:]
-        if path_parts.query:
-            new_url += '?' + path_parts.query
-
-        return self.make_magic_response(sesh_id + '-sethost', new_url, env,
-                                        suffix=path_parts.netloc,
-                                        headers=headers)
-
-    def make_magic_response(self, prefix, url, env,
-                            suffix=None, headers=None):
-        full_url = env['pywb.proxy_scheme'] + '://' + prefix + '.'
-        full_url += self.magic_name
-        if suffix:
-            full_url += '.' + suffix
-        full_url += '/' + url
-        return self.make_redir_response(full_url, headers=headers)
-
-    def set_coll(self, sesh_id, coll):
-        self.cache[sesh_id + ':c'] = coll
-
-    def set_ts(self, sesh_id, ts):
-        if ts:
-            self.cache[sesh_id + ':t'] = ts
-        # this ensures that omitting timestamp will reset to latest
-        # capture by deleting the cache entry
-        else:
-            del self.cache[sesh_id + ':t']
-
-    def get_coll(self, env):
-        sesh_id = extract_client_cookie(env, self.cookie_name)
-
-        coll = None
-        ts = None
-        if sesh_id:
-            coll = self.cache[sesh_id + ':c']
-            ts = self.cache[sesh_id + ':t']
-
-        return coll, ts, sesh_id
-
-    def create_renew_sesh_id(self, sesh_id, force=False):
-        #if sesh_id in self.cache and not force:
-        if sesh_id and ((sesh_id + ':c') in self.cache) and not force:
-            return sesh_id
-
-        sesh_id = base64.b32encode(os.urandom(5)).lower()
-        return to_native_str(sesh_id)
-
-    def make_redir_response(self, url, headers=None):
-        if not headers:
-            headers = []
-
-        if self.extra_headers:
-            for name, value in six.iteritems(self.extra_headers):
-                headers.append((name, value))
-
-        return WbResponse.redir_response(url, headers=headers)
--- a/pywb/framework/test/test_archivalrouter.py
+++ b/pywb/framework/test/test_archivalrouter.py
@ -1,135 +0,0 @@
-"""
-# Test WbRequest parsed via a Route
-# route with relative path, print resulting wbrequest
->>> _test_route_req(Route('web', WbUrlHandler()), {'REL_REQUEST_URI': '/web/test.example.com', 'SCRIPT_NAME': ''})
-{'coll': 'web',
- 'request_uri': '/web/test.example.com',
- 'wb_prefix': '/web/',
- 'wb_url': ('latest_replay', '', '', 'http://test.example.com', 'http://test.example.com')}
-
-
-# route with absolute path, running at script /my_pywb, print resultingwbrequest
->>> _test_route_req(Route('web', WbUrlHandler()), {'REL_REQUEST_URI': '/web/2013im_/test.example.com', 'SCRIPT_NAME': '/my_pywb', 'HTTP_HOST': 'localhost:8081', 'wsgi.url_scheme': 'https'}, True)
-{'coll': 'web',
- 'request_uri': '/web/2013im_/test.example.com',
- 'wb_prefix': 'https://localhost:8081/my_pywb/web/',
- 'wb_url': ('replay', '2013', 'im_', 'http://test.example.com', '2013im_/http://test.example.com')}
-
-# route with no collection
->>> _test_route_req(Route('', BaseHandler()), {'REL_REQUEST_URI': 'http://example.com', 'SCRIPT_NAME': '/pywb'})
-{'coll': '',
- 'request_uri': 'http://example.com',
- 'wb_prefix': '/pywb/',
- 'wb_url': None}
-
-# not matching route -- skipped
->>> _test_route_req(Route('web', BaseHandler()), {'REL_REQUEST_URI': '/other/test.example.com', 'SCRIPT_NAME': ''})
-
-# Test Refer Redirects
->>> _test_redir('http://localhost:8080/', '/diff_path/other.html', 'http://localhost:8080/coll/20131010/http://example.com/path/page.html')
-'http://localhost:8080/coll/20131010/http://example.com/diff_path/other.html'
-
->>> _test_redir('http://localhost:8080/', '/../other.html', 'http://localhost:8080/coll/20131010/http://example.com/path/page.html')
-'http://localhost:8080/coll/20131010/http://example.com/other.html'
-
->>> _test_redir('http://localhost:8080/', '/../../other.html', 'http://localhost:8080/coll/20131010/http://example.com/index.html')
-'http://localhost:8080/coll/20131010/http://example.com/other.html'
-
-# Custom collection
->>> _test_redir('http://localhost:8080/', '/other.html', 'http://localhost:8080/complex/123/20131010/http://example.com/path/page.html', coll='complex/123')
-'http://localhost:8080/complex/123/20131010/http://example.com/other.html'
-
-# With timestamp included
->>> _test_redir('http://localhost:8080/', '/20131010/other.html', 'http://localhost:8080/coll/20131010/http://example.com/index.html')
-'http://localhost:8080/coll/20131010/http://example.com/other.html'
-
-# With timestamp included
->>> _test_redir('http://localhost:8080/', '/20131010/path/other.html', 'http://localhost:8080/coll/20131010/http://example.com/some/index.html')
-'http://localhost:8080/coll/20131010/http://example.com/path/other.html'
-
-# Wrong Host
->>> _test_redir('http://example.com:8080/', '/other.html', 'http://localhost:8080/coll/20131010/http://example.com/path/page.html')
-False
-
-# Right Host
->>> _test_redir('http://example.com:8080/', '/other.html', 'http://example.com:8080/coll/20131010/http://example.com/path/page.html')
-'http://example.com:8080/coll/20131010/http://example.com/other.html'
-
-# With custom SCRIPT_NAME
->>> _test_redir('http://localhost:8080/', '/../other.html', 'http://localhost:8080/extra/coll/20131010/http://example.com/path/page.html', '/extra')
-'http://localhost:8080/extra/coll/20131010/http://example.com/other.html'
-
-# With custom SCRIPT_NAME + timestamp
->>> _test_redir('http://localhost:8080/', '/20131010/other.html', 'http://localhost:8080/extra/coll/20131010/http://example.com/path/page.html', '/extra')
-'http://localhost:8080/extra/coll/20131010/http://example.com/other.html'
-
-# With custom SCRIPT_NAME, bad match
->>> _test_redir('http://localhost:8080/', '/../other.html', 'http://localhost:8080/extra/coll/20131010/http://example.com/path/page.html', '/extr')
-False
-
-# With no collection
->>> _test_redir('http://localhost:8080/', '/other.html', 'http://localhost:8080/2013/http://example.com/path/page.html', coll='')
-'http://localhost:8080/2013/http://example.com/other.html'
-
-# With SCRIPT_NAME but no collection
->>> _test_redir('http://localhost:8080/', '/other.html', 'http://localhost:8080/pywb-access/http://example.com/path/page.html', '/pywb-access', coll='')
-'http://localhost:8080/pywb-access/http://example.com/other.html'
-
-
->>> _test_redir('http://localhost:8080/', '/some/example/other.html', 'http://localhost:8080/user/coll/http://example.com/path/page.html', '/user/coll', coll='')
-'http://localhost:8080/user/coll/http://example.com/some/example/other.html'
-
-## Test ensure_rel_uri_set
-
-# Simple test:
->>> ArchivalRouter.ensure_rel_uri_set({'PATH_INFO': '/pywb/example.com'})
-'/pywb/example.com'
-
-# Test all unecoded special chars and double-quote
-# (double-quote must be encoded but not single quote)
->>> ArchivalRouter.ensure_rel_uri_set({'PATH_INFO': "/pywb/example.com/0~!+$&'()*+,;=:\\\""})
-"/pywb/example.com/0~!+$&'()*+,;=:%22"
-
-"""
-
-from pywb.framework.archivalrouter import Route, ReferRedirect, ArchivalRouter
-from pywb.framework.basehandlers import BaseHandler, WbUrlHandler
-
-import pprint
-
-from six.moves.urllib.parse import urlsplit
-
-def _test_route_req(route, env, abs_path=False):
-    matcher, coll = route.is_handling(env['REL_REQUEST_URI'])
-    if not matcher:
-        return
-
-    the_router = ArchivalRouter([route], abs_path=abs_path)
-    req = the_router.parse_request(route, env, matcher, coll, env['REL_REQUEST_URI'], abs_path)
-
-    varlist = vars(req)
-    the_dict = dict((k, varlist[k]) for k in ('request_uri', 'wb_prefix', 'wb_url', 'coll'))
-    pprint.pprint(the_dict)
-
-
-def _test_redir(match_host, request_uri, referrer, script_name='', coll='coll'):
-    env = {'REL_REQUEST_URI': request_uri, 'HTTP_REFERER': referrer, 'SCRIPT_NAME': script_name}
-
-    env['HTTP_HOST'] = urlsplit(match_host).netloc
-
-    routes = [Route(coll, WbUrlHandler())]
-
-    the_router = ArchivalRouter(routes)
-
-    redir = ReferRedirect()
-    #req = WbRequest.from_uri(request_uri, env)
-    rep = redir(env, the_router)
-    if not rep:
-        return False
-
-    return rep.status_headers.get_header('Location')
-
-
-if __name__ == "__main__":
-    import doctest
-    doctest.testmod()
--- a/pywb/framework/test/test_wbrequestresponse.py
+++ b/pywb/framework/test/test_wbrequestresponse.py
@ -1,178 +1,6 @@
-"""
-# WbRequest Tests
-# =================
-#>>> get_req_from_uri('/save/_embed/example.com/?a=b')
-{'wb_url': ('latest_replay', '', '', 'http://_embed/example.com/?a=b', 'http://_embed/example.com/?a=b'), 'coll': 'save', 'wb_prefix': '/save/', 'request_uri': '/save/_embed/example.com/?a=b'}
-
-#>>> get_req_from_uri('/2345/20101024101112im_/example.com/?b=c')
-{'wb_url': ('replay', '20101024101112', 'im_', 'http://example.com/?b=c', '20101024101112im_/http://example.com/?b=c'), 'coll': '2345', 'wb_prefix': '/2345/', 'request_uri': '/2345/20101024101112im_/example.com/?b=c'}
-
-#>>> get_req_from_uri('/2010/example.com')
-{'wb_url': ('latest_replay', '', '', 'http://example.com', 'http://example.com'), 'coll': '2010', 'wb_prefix': '/2010/', 'request_uri': '/2010/example.com'}
-
-# ajax
-#>>> get_req_from_uri('', {'REL_REQUEST_URI': '/2010/example.com', 'HTTP_HOST': 'localhost:8080', 'HTTP_X_REQUESTED_WITH': 'XMLHttpRequest'})
-{'wb_url': ('latest_replay', '', '', 'http://example.com', 'http://example.com'), 'coll': '2010', 'wb_prefix': '/2010/', 'request_uri': '/2010/example.com'}
-
-#>>> get_req_from_uri('../example.com')
-{'wb_url': ('latest_replay', '', '', 'http://example.com', 'http://example.com'), 'coll': '', 'wb_prefix': '/', 'request_uri': '../example.com'}
-
-# Abs path
-#>>> get_req_from_uri('/2010/example.com', {'wsgi.url_scheme': 'https', 'HTTP_HOST': 'localhost:8080'}, use_abs_prefix = True)
-{'wb_url': ('latest_replay', '', '', 'http://example.com', 'http://example.com'), 'coll': '2010', 'wb_prefix': 'https://localhost:8080/2010/', 'request_uri': '/2010/example.com'}
-
-# No Scheme, default to http (shouldn't happen per WSGI standard)
-#>>> get_req_from_uri('/2010/example.com', {'HTTP_HOST': 'localhost:8080'}, use_abs_prefix = True)
-{'wb_url': ('latest_replay', '', '', 'http://example.com', 'http://example.com'), 'coll': '2010', 'wb_prefix': 'http://localhost:8080/2010/', 'request_uri': '/2010/example.com'}
-
-# Referrer extraction
->>> WbUrl(req_from_uri('/web/2010/example.com', {'wsgi.url_scheme': 'http', 'HTTP_HOST': 'localhost:8080', 'HTTP_REFERER': 'http://localhost:8080/web/2011/blah.example.com/'}).extract_referrer_wburl_str()).url
-'http://blah.example.com/'
-
-# incorrect referer
->>> req_from_uri('/web/2010/example.com', {'wsgi.url_scheme': 'http', 'HTTP_HOST': 'localhost:8080', 'HTTP_REFERER': 'http://other.example.com/web/2011/blah.example.com/'}).extract_referrer_wburl_str()
-
-
-# no referer
->>> req_from_uri('/web/2010/example.com', {'wsgi.url_scheme': 'http', 'HTTP_HOST': 'localhost:8080'}).extract_referrer_wburl_str()
-
-# range requests
->>> req_from_uri('/web/2014/example.com', dict(HTTP_RANGE='bytes=10-100')).extract_range()
-('http://example.com', 10, 100, True)
-
->>> req_from_uri('/web/2014/example.com', dict(HTTP_RANGE='bytes=0-')).extract_range()
-('http://example.com', 0, '', True)
-
->>> req_from_uri('/web/www.googlevideo.com/videoplayback?id=123&range=0-65535').extract_range()
-('http://www.googlevideo.com/videoplayback?id=123', 0, 65535, False)
-
->>> req_from_uri('/web/www.googlevideo.com/videoplayback?id=123&range=100-200').extract_range()
-('http://www.googlevideo.com/videoplayback?id=123', 100, 200, False)
-
-# invalid range requests
->>> req_from_uri('/web/2014/example.com', dict(HTTP_RANGE='10-20')).extract_range()
-
->>> req_from_uri('/web/2014/example.com', dict(HTTP_RANGE='A-5')).extract_range()
-
->>> req_from_uri('/web/www.googlevideo.com/videoplayback?id=123&range=100-').extract_range()
-
-"""
-
-
-from pywb.rewrite.wburl import WbUrl
-from pywb.rewrite.url_rewriter import UrlRewriter
+from pywb.framework.wbrequestresponse import WbResponse
 from warcio.statusandheaders import StatusAndHeaders

-from pywb.framework.wbrequestresponse import WbRequest, WbResponse
-
-
-def get_req_from_uri(request_uri, env={}, use_abs_prefix=False):
-    response = req_from_uri(request_uri, env, use_abs_prefix)
-    varlist = vars(response)
-    the_dict = dict((k, varlist[k]) for k in ('request_uri', 'wb_prefix', 'wb_url', 'coll'))
-    #print(the_dict)
-    return the_dict
-
-def req_from_uri(request_uri, env={}, use_abs_prefix=False):
-    if not request_uri:
-        request_uri = env.get('REL_REQUEST_URI')
-
-    parts = request_uri.split('/', 2)
-
-    # Has coll prefix
-    if len(parts) == 3:
-        rel_prefix = '/' + parts[1] + '/'
-        wb_url_str = parts[2]
-        coll = parts[1]
-    # No Coll Prefix
-    elif len(parts) == 2:
-        rel_prefix = '/'
-        wb_url_str = parts[1]
-        coll = ''
-    else:
-        rel_prefix = '/'
-        wb_url_str = parts[0]
-        coll = ''
-
-    return WbRequest(env,
-                     request_uri=request_uri,
-                     rel_prefix=rel_prefix,
-                     wb_url_str=wb_url_str,
-                     coll=coll,
-                     wburl_class=WbUrl,
-                     urlrewriter_class=UrlRewriter,
-                     use_abs_prefix=use_abs_prefix)
-
-
-def test_req_1():
-    res = get_req_from_uri('/save/_embed/example.com/?a=b')
-
-    assert(repr(res['wb_url']) == "('latest_replay', '', '', 'http://_embed/example.com/?a=b', 'http://_embed/example.com/?a=b')")
-    assert(res['coll'] == 'save')
-    assert(res['wb_prefix'] == '/save/')
-    assert(res['request_uri'] == '/save/_embed/example.com/?a=b')
-
-def test_req_2():
-    res = get_req_from_uri('/2345/20101024101112im_/example.com/?b=c')
-
-    assert(repr(res['wb_url']) == "('replay', '20101024101112', 'im_', 'http://example.com/?b=c', '20101024101112im_/http://example.com/?b=c')")
-    assert(res['coll'] == '2345')
-    assert(res['wb_prefix'] == '/2345/')
-    assert(res['request_uri'] == '/2345/20101024101112im_/example.com/?b=c')
-
-def test_req_3():
-    res = get_req_from_uri('/2010/example.com')
-
-    assert(repr(res['wb_url']) == "('latest_replay', '', '', 'http://example.com', 'http://example.com')")
-    assert(res['coll'] == '2010')
-    assert(res['wb_prefix'] == '/2010/')
-    assert(res['request_uri'] == '/2010/example.com')
-
-
-def test_req_4():
-    # ajax
-    res = get_req_from_uri('', {'REL_REQUEST_URI': '/2010/example.com', 'HTTP_HOST': 'localhost:8080', 'HTTP_X_REQUESTED_WITH': 'XMLHttpRequest'})
-
-    assert(repr(res['wb_url']) == "('latest_replay', '', '', 'http://example.com', 'http://example.com')")
-    assert(res['coll'] == '2010')
-    assert(res['wb_prefix'] == '/2010/')
-    assert(res['request_uri'] == '/2010/example.com')
-
-
-def test_req_5():
-    res = get_req_from_uri('../example.com')
-
-    assert(repr(res['wb_url']) == "('latest_replay', '', '', 'http://example.com', 'http://example.com')")
-    assert(res['coll'] == '')
-    assert(res['wb_prefix'] == '/')
-    assert(res['request_uri'] == '../example.com')
-
-
-
-def test_req_6():
-    # Abs path
-    res = get_req_from_uri('/2010/example.com', {'wsgi.url_scheme': 'https', 'HTTP_HOST': 'localhost:8080'}, use_abs_prefix = True)
-
-    assert(repr(res['wb_url']) == "('latest_replay', '', '', 'http://example.com', 'http://example.com')")
-    assert(res['coll'] == '2010')
-    assert(res['wb_prefix'] == 'https://localhost:8080/2010/')
-    assert(res['request_uri'] == '/2010/example.com')
-
-
-def test_req_7():
-    # No Scheme, default to http (shouldn't happen per WSGI standard)
-    res = get_req_from_uri('/2010/example.com', {'HTTP_HOST': 'localhost:8080'}, use_abs_prefix = True)
-
-    assert(repr(res['wb_url']) == "('latest_replay', '', '', 'http://example.com', 'http://example.com')")
-    assert(res['coll'] == '2010')
-    assert(res['wb_prefix'] == 'http://localhost:8080/2010/')
-    assert(res['request_uri'] == '/2010/example.com')
-
-
-
-
-
-#Response tests

 def test_resp_1():
    resp = vars(WbResponse.text_response('Test'))
--- a/pywb/framework/test/test_wsgi_wrapper.py
+++ b/pywb/framework/test/test_wsgi_wrapper.py
@ -1,57 +0,0 @@
-from pywb.framework.wsgi_wrappers import init_app
-
-from pywb.utils.wbexception import AccessException
-
-import webtest
-
-class TestOkApp:
-    def __call__(self, env):
-        def response(env, start_response):
-            start_response('200 OK', [])
-            return [b'Test']
-        return response
-
-class TestErrApp:
-    def __call__(self, env):
-        raise Exception('Test Unexpected Error')
-
-class TestCustomErrApp:
-    def __call__(self, env):
-        raise AccessException('Forbidden Test')
-
-
-def initer(app_class):
-    def init(config=None):
-        return app_class()
-    return init
-
-def test_ok_app():
-    the_app = init_app(initer(TestOkApp), load_yaml=False)
-
-    testapp = webtest.TestApp(the_app)
-    resp = testapp.get('/')
-
-    assert resp.status_int == 200
-    assert b'Test' in resp.body, resp.body
-
-def test_err_app():
-    the_app = init_app(initer(TestErrApp), load_yaml=False)
-
-    testapp = webtest.TestApp(the_app)
-    resp = testapp.get('/abc', expect_errors=True)
-
-    assert resp.status_int == 500
-    assert b'500 Internal Server Error Error: Test Unexpected Error' in resp.body
-
-def test_custom_err_app():
-    the_app = init_app(initer(TestCustomErrApp), load_yaml=False)
-
-    testapp = webtest.TestApp(the_app)
-    resp = testapp.get('/abc', expect_errors=True)
-
-    assert resp.status_int == 403
-    assert b'403 Access Denied Error: Forbidden Test' in resp.body
-
-
-
-
--- a/pywb/framework/wbrequestresponse.py
+++ b/pywb/framework/wbrequestresponse.py
@ -1,204 +1,8 @@
 from warcio.statusandheaders import StatusAndHeaders
-from pywb.utils.loaders import extract_post_query, append_post_query

-from io import BytesIO
-import pprint
-import re
 import json


-#=================================================================
-class WbRequest(object):
-    """
-    Represents the main pywb request object.
-
-    Contains various info from wsgi env, add additional info
-    about the request, such as coll, relative prefix,
-    host prefix, absolute prefix.
-
-    If a wburl and url rewriter classes are specified, the class
-    also contains the url rewriter.
-
-    """
-    @staticmethod
-    def make_host_prefix(env):
-        try:
-            host = env.get('HTTP_HOST')
-            if not host:
-                host = env['SERVER_NAME'] + ':' + env['SERVER_PORT']
-
-            return env.get('wsgi.url_scheme', 'http') + '://' + host
-        except KeyError:
-            return ''
-
-    def __init__(self, env,
-                 request_uri=None,
-                 rel_prefix='',
-                 wb_url_str='/',
-                 coll='',
-                 host_prefix='',
-                 use_abs_prefix=False,
-                 wburl_class=None,
-                 urlrewriter_class=None,
-                 is_proxy=False,
-                 cookie_scope=None,
-                 rewrite_opts={},
-                 user_metadata={},
-                 ):
-
-        self.env = env
-
-        if request_uri:
-            self.request_uri = request_uri
-        else:
-            self.request_uri = env.get('REL_REQUEST_URI')
-
-        self.method = self.env.get('REQUEST_METHOD')
-
-        self.coll = coll
-
-        self.final_mod = ''
-
-        if not host_prefix:
-            host_prefix = self.make_host_prefix(env)
-
-        self.host_prefix = host_prefix
-        self.rel_prefix = rel_prefix
-
-        if use_abs_prefix:
-            self.wb_prefix = host_prefix + rel_prefix
-        else:
-            self.wb_prefix = rel_prefix
-
-        if not wb_url_str:
-            wb_url_str = '/'
-
-        self.wb_url_str = wb_url_str
-
-        # wb_url present and not root page
-        if wb_url_str != '/' and wburl_class:
-            self.wb_url = wburl_class(wb_url_str)
-            self.urlrewriter = urlrewriter_class(self.wb_url,
-                                                 self.wb_prefix,
-                                                 host_prefix + rel_prefix,
-                                                 rel_prefix,
-                                                 env.get('SCRIPT_NAME', '/'),
-                                                 cookie_scope,
-                                                 rewrite_opts)
-
-            self.urlrewriter.deprefix_url()
-        # no wb_url, just store blank wb_url
-        else:
-            self.wb_url = None
-            self.urlrewriter = None
-
-        self.referrer = env.get('HTTP_REFERER')
-
-        self.options = dict()
-        self.options['is_ajax'] = self._is_ajax()
-        self.options['is_proxy'] = is_proxy or env.get('pywb_proxy_magic')
-
-        self.query_filter = []
-        self.custom_params = {}
-        self.user_metadata = user_metadata
-        self.rewrite_opts = rewrite_opts
-
-        # PERF
-        env['X_PERF'] = {}
-
-        if env.get('HTTP_X_PYWB_NOREDIRECT'):
-            self.custom_params['noredir'] = True
-
-        self._parse_extra()
-
-    def _is_ajax(self):
-        value = self.env.get('HTTP_X_REQUESTED_WITH')
-        value = value or self.env.get('HTTP_X_PYWB_REQUESTED_WITH')
-        if value and value.lower() == 'xmlhttprequest':
-            return True
-
-        return False
-
-    RANGE_ARG_RX = re.compile('.*.googlevideo.com/videoplayback.*([&?]range=(\d+)-(\d+))')
-
-    RANGE_HEADER = re.compile('bytes=(\d+)-(\d+)?')
-
-    def extract_range(self):
-        url = self.wb_url.url
-        use_206 = False
-        start = None
-        end = None
-
-        range_h = self.env.get('HTTP_RANGE')
-
-        if range_h:
-            m = self.RANGE_HEADER.match(range_h)
-            if m:
-                start = m.group(1)
-                end = m.group(2)
-                use_206 = True
-
-        else:
-            m = self.RANGE_ARG_RX.match(url)
-            if m:
-                start = m.group(2)
-                end = m.group(3)
-                url = url[:m.start(1)] + url[m.end(1):]
-                use_206 = False
-
-        if not start:
-            return None
-
-        start = int(start)
-        self.custom_params['noredir'] = True
-
-        if end:
-            end = int(end)
-        else:
-            end = ''
-
-        result = (url, start, end, use_206)
-        return result
-
-    def __repr__(self):
-        varlist = vars(self)
-        varstr = pprint.pformat(varlist)
-        return varstr
-
-    def _parse_extra(self):
-        pass
-
-    def extract_referrer_wburl_str(self):
-        if not self.referrer:
-            return None
-
-        if not self.referrer.startswith(self.host_prefix + self.rel_prefix):
-            return None
-
-        wburl_str = self.referrer[len(self.host_prefix + self.rel_prefix):]
-        return wburl_str
-
-    def normalize_post_query(self):
-        if self.method != 'POST':
-            return
-
-        if not self.wb_url:
-            return
-
-        mime = self.env.get('CONTENT_TYPE', '')
-        length = self.env.get('CONTENT_LENGTH')
-        stream = self.env['wsgi.input']
-
-        buffered_stream = BytesIO()
-
-        post_query = extract_post_query('POST', mime, length, stream,
-                                        buffered_stream=buffered_stream,
-                                        environ=self.env)
-
-        if post_query:
-            self.env['wsgi.input'] = buffered_stream
-            self.wb_url.url = append_post_query(self.wb_url.url, post_query)
-

 #=================================================================
 class WbResponse(object):
--- a/pywb/framework/wsgi_wrappers.py
+++ b/pywb/framework/wsgi_wrappers.py
@ -1,188 +0,0 @@
-from pywb.utils.wbexception import WbException, NotFoundException
-from pywb.utils.loaders import load_yaml_config
-from pywb.utils.loaders import load_yaml_config
-from warcio.utils import to_native_str
-
-from pywb.framework.wbrequestresponse import WbResponse
-from warcio.statusandheaders import StatusAndHeaders
-
-
-import os
-import logging
-
-
-DEFAULT_PORT = 8080
-
-
-#=================================================================
-class WSGIApp(object):
-    def __init__(self, wb_router, fallback_app=None):
-        self.wb_router = wb_router
-        self.fallback_app = fallback_app
-
-    # Top-level wsgi application
-    def __call__(self, env, start_response):
-        if env['REQUEST_METHOD'] == 'CONNECT':
-            return self.handle_connect(env, start_response)
-        else:
-            return self.handle_methods(env, start_response)
-
-    def handle_connect(self, env, start_response):
-        def ssl_start_response(statusline, headers):
-            ssl_sock = env.get('pywb.proxy_ssl_sock')
-            if not ssl_sock:
-                start_response(statusline, headers)
-                return
-
-            env['pywb.proxy_statusline'] = statusline
-
-            status_line = 'HTTP/1.1 ' + statusline + '\r\n'
-            ssl_sock.write(status_line.encode('iso-8859-1'))
-
-            for name, value in headers:
-                line = name + ': ' + value + '\r\n'
-                ssl_sock.write(line.encode('iso-8859-1'))
-
-        resp_iter = self.handle_methods(env, ssl_start_response)
-
-        ssl_sock = env.get('pywb.proxy_ssl_sock')
-        if not ssl_sock:
-            return resp_iter
-
-        ssl_sock.write(b'\r\n')
-
-        for obj in resp_iter:
-            if obj:
-                ssl_sock.write(obj)
-        ssl_sock.close()
-
-        start_response(env['pywb.proxy_statusline'], [])
-
-        return []
-
-    def handle_methods(self, env, start_response):
-        wb_router = self.wb_router
-        response = None
-
-        try:
-            response = wb_router(env)
-
-            if not response:
-                if self.fallback_app:
-                    return self.fallback_app(env, start_response)
-                else:
-                    msg = 'No handler for "{0}".'.format(env['REL_REQUEST_URI'])
-                    raise NotFoundException(msg)
-
-        except WbException as e:
-            response = self.handle_exception(env, e, False)
-
-        except Exception as e:
-            response = self.handle_exception(env, e, True)
-
-        return response(env, start_response)
-
-    def handle_exception(self, env, exc, print_trace):
-        error_view = None
-
-        if hasattr(self.wb_router, 'error_view'):
-            error_view = self.wb_router.error_view
-
-        if hasattr(exc, 'status'):
-            status = exc.status()
-        else:
-            status = '500 Internal Server Error'
-
-        if hasattr(exc, 'url'):
-            err_url = exc.url
-        else:
-            err_url = None
-
-        if len(exc.args):
-            err_msg = exc.args[0]
-
-        if print_trace:
-            import traceback
-            err_details = traceback.format_exc()
-            print(err_details)
-        else:
-            logging.info(err_msg)
-            err_details = None
-
-        if error_view:
-            if err_url and isinstance(err_url, str):
-                err_url = to_native_str(err_url, 'utf-8')
-            if err_msg and isinstance(err_msg, str):
-                err_msg = to_native_str(err_msg, 'utf-8')
-
-            return error_view.render_response(exc_type=type(exc).__name__,
-                                              err_msg=err_msg,
-                                              err_details=err_details,
-                                              status=status,
-                                              env=env,
-                                              err_url=err_url)
-        else:
-            msg = status + ' Error: '
-            if err_msg:
-                msg += err_msg
-
-            #msg = msg.encode('utf-8', 'ignore')
-            return WbResponse.text_response(msg,
-                                           status=status)
-
-#=================================================================
-DEFAULT_CONFIG_FILE = 'config.yaml'
-
-
-#=================================================================
-def init_app(init_func, load_yaml=True, config_file=None, config=None):
-    try:
-        config = config or {}
-        if load_yaml:
-            # env setting overrides all others
-            env_config = os.environ.get('PYWB_CONFIG_FILE')
-            if env_config:
-                config_file = env_config
-
-            if not config_file:
-                config_file = DEFAULT_CONFIG_FILE
-
-            if os.path.isfile(config_file):
-                config = load_yaml_config(config_file)
-
-        wb_router = init_func(config)
-    except:
-        msg = '*** pywb app init FAILED config from "%s"!\n'
-        logging.exception(msg, init_func.__name__)
-        raise
-    else:
-        msg = '*** pywb app inited with config from "%s"!\n'
-        logging.debug(msg, init_func.__name__)
-
-    return WSGIApp(wb_router)
-
-
-#=================================================================
-def start_wsgi_ref_server(the_app, name, port):  # pragma: no cover
-    from wsgiref.simple_server import make_server, WSGIServer
-    from six.moves.socketserver import ThreadingMixIn
-
-    # disable is_hop_by_hop restrictions
-    import wsgiref.handlers
-    wsgiref.handlers.is_hop_by_hop = lambda x: False
-
-    if port is None:
-        port = DEFAULT_PORT
-
-    logging.info('Starting %s on port %s', name, port)
-
-    class ThreadingWSGIServer(ThreadingMixIn, WSGIServer):
-        pass
-
-    try:
-        httpd = make_server('', port, the_app, ThreadingWSGIServer)
-        httpd.serve_forever()
-    except KeyboardInterrupt as ex:
-        pass
-    finally:
-        logging.info('Stopping %s', name)
--- a/pywb/perms/init.py
+++ b/pywb/perms/init.py
--- a/pywb/perms/perms_filter.py
+++ b/pywb/perms/perms_filter.py
@ -1,85 +0,0 @@
-from pywb.utils.wbexception import AccessException
-
-
-#=================================================================
-def make_perms_cdx_filter(perms_policy, wbrequest):
-    """
-    Called internally to convert a perms_policy and a request
-    to a filter which can be applied on the cdx
-    """
-    perms_checker = perms_policy(wbrequest)
-    if not perms_checker:
-        return None
-
-    return _create_cdx_perms_filter(perms_checker)
-
-
-#=================================================================
-def _create_cdx_perms_filter(perms_checker):
-    """
-    Return a function which will filter the cdx given
-    a Perms object.
-    :param perms_checker: a Perms object which implements the
-        allow_url_lookup() and access_check_capture() methods
-    """
-
-    def perms_filter_op(cdx_iter, query):
-        """
-        filter out those cdx records that user doesn't have access to,
-        by consulting :param perms_checker:.
-        :param cdx_iter: cdx record source iterable
-        :param query: request parameters (CDXQuery)
-        :param perms_checker: object implementing permission checker
-        """
-        if not perms_checker.allow_url_lookup(query.key):
-            if query.is_exact:
-                raise AccessException('Excluded')
-
-        for cdx in cdx_iter:
-            cdx = perms_checker.access_check_capture(cdx)
-            if cdx:
-                yield cdx
-
-    return perms_filter_op
-
-
-#================================================================
-def allow_all_perms_policy(wbrequest):
-    """
-    Perms policy which always returns a default Perms object
-    which allows everything.
-
-    The perms object is created per request and may store request
-    state, if necessary.
-
-    The same perms object may be called with multiple queries
-    (such as for each cdx line) per request.
-    """
-    return Perms()
-
-
-#=================================================================
-class Perms(object):
-    """
-    A base perms checker which allows everything
-    """
-
-    def allow_url_lookup(self, key):
-        """
-        Return true/false if urlkey (canonicalized url)
-        should be allowed.
-
-        Default: allow all
-        """
-        return True
-
-    def access_check_capture(self, cdx):
-        """
-        Allow/deny specified cdx capture (dict) to be included
-        in the result.
-        Return None to reject, or modify the cdx to exclude
-        any fields that need to be restricted.
-
-        Default: allow cdx line without modifications
-        """
-        return cdx
--- a/pywb/perms/perms_handler.py
+++ b/pywb/perms/perms_handler.py
@ -1,67 +0,0 @@
-from pywb.utils.canonicalize import UrlCanonicalizer
-from pywb.utils.wbexception import NotFoundException
-
-from pywb.framework.basehandlers import WbUrlHandler
-from pywb.framework.archivalrouter import ArchivalRouter, Route
-from pywb.framework.wbrequestresponse import WbResponse
-
-BLOCK = '["block"]'
-ALLOW = '["allow"]'
-RESPONSE_TYPE = 'application/json'
-
-NOT_FOUND = 'Please specify a url to check for access'
-
-
-#=================================================================
-class PermsHandler(WbUrlHandler):
-
-    def __init__(self, perms_policy, url_canon):
-        self.perms_policy = perms_policy
-        self.url_canon = url_canon
-
-    def __call__(self, wbrequest):
-        perms_checker = self.perms_policy(wbrequest)
-
-        if wbrequest.wb_url:
-            return self.check_single_url(wbrequest, perms_checker)
-
-#        elif wbrequest.env['REQUEST_METHOD'] == 'POST':
-#            return self.check_bulk(wbrequest, perms_checker)
-
-        else:
-            raise NotFoundException(NOT_FOUND)
-
-    def check_single_url(self, wbrequest, perms_checker):
-        urlkey = self.url_canon(wbrequest.wb_url.url)
-        urlkey = urlkey.encode('utf-8')
-
-        if not perms_checker.allow_url_lookup(urlkey):
-            response_text = BLOCK
-        else:
-            response_text = ALLOW
-
-        #TODO: other types of checking
-        return WbResponse.text_response(response_text,
-                                        content_type=RESPONSE_TYPE)
-#TODO
-#    def check_bulk_urls(self, wbrequest, perms_checker):
-#        pass
-#
-
-
-#=================================================================
-def create_perms_checker_app(config):
-    """
-    Create permissions checker standalone app
-    Running under the '/check-access' route
-    """
-    port = config.get('port')
-
-    perms_policy = config.get('perms_policy')
-
-    canonicalizer = UrlCanonicalizer(config.get('surt_ordered', True))
-
-    handler = PermsHandler(perms_policy, canonicalizer)
-    routes = [Route('check-access', handler)]
-
-    return ArchivalRouter(routes, port=port)
--- a/pywb/urlrewrite/platformhandler.py
+++ b/pywb/urlrewrite/platformhandler.py
@ -1,99 +0,0 @@
-from gevent.monkey import patch_all; patch_all()
-
-import requests
-
-from pywb.framework.archivalrouter import Route
-
-from pywb.rewrite.rewrite_content import RewriteContent
-from pywb.rewrite.wburl import WbUrl
-from warcio.recordloader import ArcWarcRecordLoader
-from pywb.webapp.live_rewrite_handler import RewriteHandler
-from pywb.utils.canonicalize import canonicalize
-from warcio.timeutils import http_date_to_timestamp
-from pywb.cdx.cdxobject import CDXObject
-
-from io import BytesIO
-
-from pywb.urlrewrite.rewriteinputreq import RewriteInputRequest
-
-from six.moves.urllib.parse import quote
-
-
-# ============================================================================
-class PlatformRoute(Route):
-    def apply_filters(self, wbrequest, matcher):
-        wbrequest.matchdict = matcher.groupdict()
-
-
-# ============================================================================
-class PlatformHandler(RewriteHandler):
-    def __init__(self, config):
-        super(PlatformHandler, self).__init__(config)
-        self.upstream_url = config.get('upstream_url')
-        self.loader = ArcWarcRecordLoader()
-
-        framed = config.get('framed_replay')
-        self.content_rewriter = RewriteContent(is_framed_replay=framed)
-
-    def render_content(self, wbrequest):
-        if wbrequest.wb_url.mod == 'vi_':
-            return self._get_video_info(wbrequest)
-
-        ref_wburl_str = wbrequest.extract_referrer_wburl_str()
-        if ref_wburl_str:
-            wbrequest.env['HTTP_REFERER'] = WbUrl(ref_wburl_str).url
-
-        urlkey = canonicalize(wbrequest.wb_url.url)
-        url = wbrequest.wb_url.url
-
-        inputreq = RewriteInputRequest(wbrequest.env, urlkey, url,
-                                       self.content_rewriter)
-
-        req_data = inputreq.reconstruct_request(url)
-
-        headers = {'Content-Length': len(req_data),
-                   'Content-Type': 'application/request'}
-
-        if wbrequest.wb_url.is_latest_replay():
-            closest = 'now'
-        else:
-            closest = wbrequest.wb_url.timestamp
-
-        upstream_url = self.upstream_url.format(url=quote(url),
-                                                closest=closest,
-                                                #coll=wbrequest.coll,
-                                                **wbrequest.matchdict)
-
-        r = requests.post(upstream_url,
-                          data=BytesIO(req_data),
-                          headers=headers,
-                          stream=True,
-                          allow_redirects=False)
-
-        r.raise_for_status()
-
-        record = self.loader.parse_record_stream(r.raw)
-
-        cdx = CDXObject()
-        cdx['urlkey'] = urlkey
-        cdx['timestamp'] = http_date_to_timestamp(r.headers.get('Memento-Datetime'))
-        cdx['url'] = url
-
-        head_insert_func = self.head_insert_view.create_insert_func(wbrequest)
-        result = self.content_rewriter.rewrite_content(wbrequest.urlrewriter,
-                                               record.http_headers,
-                                               record.stream,
-                                               head_insert_func,
-                                               urlkey,
-                                               cdx)
-
-        status_headers, gen, is_rw = result
-        return self._make_response(wbrequest, *result)
-
-
-if __name__ == "__main__":
-    from gevent.wsgi import WSGIServer
-    from pywb.apps.wayback import application
-
-    server = WSGIServer(('', 8090), application)
-    server.serve_forever()
--- a/pywb/warc/README.md
+++ b/pywb/warc/README.md
@ -1,32 +0,0 @@
-### pywb.warc
-
-This is the WARC/ARC record loading component of pywb wayback tool suite.
-The package provides the following facilities:
-
-* Resolve relative WARC/ARC filenames to a full path based on configurable resolvers
-
-* Resolve 'revisit' records from provided index to find a full record with headers and payload content
-
-* Load WARC/ARC records either locally or via http using http 1.1 range requests
-
-
-When loading archived content, the format type (WARC vs ARC) and compressed ARCs/WARCs
-are decompressed automatically.
-No assumption is made about format based on filename, content type
-or other external parameters other than the content itself.
-
-### Tests
-
-This package will includes a test suite for loading a variety of WARC and ARC records.
-
-Tests so far:
-
-* Compressed WARC, ARC Records
-* Uncompressed ARC Records
-* Compressed WARC created by wget 1.14
-* Same Url revisit record resolving
-
-
-TODO:
-
-* Different url revisit record resolving
--- a/pywb/webapp/init.py
+++ b/pywb/webapp/init.py
--- a/pywb/webapp/cdx_api_handler.py
+++ b/pywb/webapp/cdx_api_handler.py
@ -1,62 +0,0 @@
-from pywb.cdx.cdxserver import create_cdx_server
-
-from pywb.utils.wbexception import NotFoundException
-from pywb.framework.basehandlers import BaseHandler
-from pywb.framework.wbrequestresponse import WbResponse
-
-from pywb.webapp.query_handler import QueryHandler
-
-from six.moves.urllib.parse import parse_qs
-import json
-import six
-
-
-#=================================================================
-class CDXAPIHandler(BaseHandler):
-    """
-    Handler which passes wsgi request to cdx server and
-    returns a text-based cdx api
-    """
-    def __init__(self, index_handler):
-        self.index_handler = index_handler
-
-    def __call__(self, wbrequest):
-        params = self.extract_params_from_wsgi_env(wbrequest.env)
-
-        try:
-            cdx_iter = self.index_handler.load_cdx(wbrequest, params)
-        except NotFoundException:
-            msg = 'No Captures found for: ' + params.get('url')
-            if params.get('output') == 'json':
-                msg = json.dumps(dict(error=msg))
-                content_type='application/json'
-            else:
-                content_type='text/plain'
-
-            return WbResponse.text_response(msg, content_type=content_type,
-                                            status='404 Not Found')
-
-        return WbResponse.text_stream(cdx_iter,
-                                      content_type='text/plain')
-
-    @staticmethod
-    def extract_params_from_wsgi_env(env):
-        """ utility function to extract params and create a CDXQuery
-        from a WSGI environment dictionary
-        """
-        params = parse_qs(env['QUERY_STRING'])
-
-        # parse_qs produces arrays for single values
-        # cdx processing expects singleton params for all params,
-        # except filters, so convert here
-        # use first value of the list
-        for name, val in six.iteritems(params):
-            if name != 'filter':
-                params[name] = val[0]
-
-        if 'output' not in params:
-            params['output'] = 'text'
-        elif params['output'] not in ('text', 'json'):
-            params['output'] = 'text'
-
-        return params
--- a/pywb/webapp/handlers.py
+++ b/pywb/webapp/handlers.py
@ -1,195 +1,14 @@
-import pkgutil
 import mimetypes
-import time
-import logging

-from datetime import datetime
-
-from warcio.statusandheaders import StatusAndHeaders
-from warcio.timeutils import datetime_to_timestamp
-
-from pywb.utils.wbexception import NotFoundException
 from pywb.utils.loaders import LocalFileLoader

-from pywb.framework.basehandlers import BaseHandler, WbUrlHandler
 from pywb.framework.wbrequestresponse import WbResponse

-from pywb.warc.blockrecordloader import BlockArcWarcRecordLoader
-from pywb.warc.resolvingloader import ResolvingLoader
-from pywb.warc.pathresolvers import PathResolverMapper
-
-from pywb.webapp.views import J2TemplateView, init_view
-from pywb.webapp.replay_views import ReplayView
-from pywb.framework.memento import MementoResponse
-
-
-#=================================================================
-class SearchPageWbUrlHandler(WbUrlHandler):
-    """
-    Loads a default search page html template to be shown when
-    the wb_url is empty
-    """
-    def __init__(self, config):
-        self.search_view = init_view(config, 'search_html')
-
-        self.is_frame_mode = config.get('framed_replay', False)
-        self.frame_mod = 'tf_'
-        self.replay_mod = ''
-
-        self.response_class = WbResponse
-
-        if self.is_frame_mode:
-            #html = config.get('frame_insert_html', 'templates/frame_insert.html')
-            #self.search_view = J2TemplateView(html, config.get('jinja_env'))
-            self.frame_insert_view = init_view(config, 'frame_insert_html')
-            assert(self.frame_insert_view)
-
-            self.banner_html = config.get('banner_html', 'banner.html')
-
-            if config.get('enable_memento', False):
-                self.response_class = MementoResponse
-
-            if self.is_frame_mode == 'inverse':
-                self.frame_mod = ''
-                self.replay_mod = 'mp_'
-
-        else:
-            self.frame_insert_view = None
-            self.banner_html = None
-
-    def render_search_page(self, wbrequest, **kwargs):
-        return self.search_view.render_response(wbrequest=wbrequest,
-                                                prefix=wbrequest.wb_prefix,
-                                                **kwargs)
-
-    def __call__(self, wbrequest):
-        # root search page
-        if wbrequest.wb_url_str == '/':
-            return self.render_search_page(wbrequest)
-
-        wbrequest.options['replay_mod'] = self.replay_mod
-        wbrequest.options['frame_mod'] = self.frame_mod
-
-        # render top level frame if in frame mode
-        # (not supported in proxy mode)
-        if (self.is_frame_mode and wbrequest.wb_url and
-             not wbrequest.wb_url.is_query() and
-             not wbrequest.options['is_proxy']):
-
-            if wbrequest.wb_url.mod == self.frame_mod:
-                wbrequest.options['is_top_frame'] = True
-                return self.get_top_frame_response(wbrequest)
-            else:
-                wbrequest.options['is_framed'] = True
-                wbrequest.final_mod = self.frame_mod
-        else:
-            wbrequest.options['is_framed'] = False
-
-        try:
-            return self.handle_request(wbrequest)
-        except NotFoundException as nfe:
-            return self.handle_not_found(wbrequest, nfe)
-
-    def get_top_frame_params(self, wbrequest, mod):
-        embed_url = wbrequest.wb_url.to_str(mod=mod)
-
-        if wbrequest.wb_url.timestamp:
-            timestamp = wbrequest.wb_url.timestamp
-        else:
-            timestamp = datetime_to_timestamp(datetime.utcnow())
-
-        params = dict(embed_url=embed_url,
-                      wbrequest=wbrequest,
-                      timestamp=timestamp,
-                      url=wbrequest.wb_url.get_url(),
-                      banner_html=self.banner_html)
-
-        return params
-
-    def get_top_frame_response(self, wbrequest):
-        params = self.get_top_frame_params(wbrequest, mod=self.replay_mod)
-
-        headers = [('Content-Type', 'text/html')]
-        status_headers = StatusAndHeaders('200 OK', headers)
-
-        template_result = self.frame_insert_view.render_to_string(**params)
-        body = template_result.encode('utf-8')
-
-        return self.response_class(status_headers, [body], wbrequest=wbrequest)
-
-
-#=================================================================
-# Standard WB Handler
-#=================================================================
-class WBHandler(SearchPageWbUrlHandler):
-    def __init__(self, query_handler, config=None):
-        super(WBHandler, self).__init__(config)
-
-        self.index_reader = query_handler
-        self.not_found_view = init_view(config, 'not_found_html')
-
-        self.replay = self._init_replay_view(config)
-
-        self.fallback_handler = None
-        self.fallback_name = config.get('fallback')
-
-    def _init_replay_view(self, config):
-        cookie_maker = config.get('cookie_maker')
-        record_loader = BlockArcWarcRecordLoader(cookie_maker=cookie_maker)
-
-        paths = config.get('archive_paths')
-
-        resolving_loader = ResolvingLoader(PathResolverMapper()(paths),
-                                           record_loader=record_loader)
-
-        return ReplayView(resolving_loader, config)
-
-    def resolve_refs(self, handler_dict):
-        if self.fallback_name:
-            self.fallback_handler = handler_dict.get(self.fallback_name)
-            logging.debug('Fallback Handler: ' + self.fallback_name)
-
-    def handle_request(self, wbrequest):
-        cdx_lines, output = self.index_reader.load_for_request(wbrequest)
-
-        if output != 'text' and wbrequest.wb_url.is_replay():
-            return self.handle_replay(wbrequest, cdx_lines)
-        else:
-            return self.handle_query(wbrequest, cdx_lines, output)
-
-    def handle_query(self, wbrequest, cdx_lines, output):
-        return self.index_reader.make_cdx_response(wbrequest,
-                                                   cdx_lines,
-                                                   output)
-
-    def handle_replay(self, wbrequest, cdx_lines):
-        cdx_callback = self.index_reader.cdx_load_callback(wbrequest)
-
-        return self.replay.render_content(wbrequest,
-                                          cdx_lines,
-                                          cdx_callback)
-
-    def handle_not_found(self, wbrequest, nfe):
-        # check fallback: only for replay queries and not for identity
-        if (self.fallback_handler and
-            not wbrequest.wb_url.is_query() and
-            not wbrequest.wb_url.is_identity):
-            return self.fallback_handler(wbrequest)
-
-        # if capture query, just return capture page
-        if wbrequest.wb_url.is_query():
-            output = self.index_reader.get_output_type(wbrequest.wb_url)
-            return self.index_reader.make_cdx_response(wbrequest, iter([]), output)
-        else:
-            return self.not_found_view.render_response(status='404 Not Found',
-                                                       wbrequest=wbrequest,
-                                                       url=wbrequest.wb_url.url)
-

 #=================================================================
 # Static Content Handler
 #=================================================================
-class StaticHandler(BaseHandler):
+class StaticHandler(object):
    def __init__(self, static_path):
        mimetypes.init()

@ -234,15 +53,3 @@ class StaticHandler(BaseHandler):
                                    wbrequest.wb_url_str)


-#=================================================================
-# Debug Handlers
-#=================================================================
-class DebugEchoEnvHandler(BaseHandler):  # pragma: no cover
-    def __call__(self, wbrequest):
-        return WbResponse.text_response(str(wbrequest.env))
-
-
-#=================================================================
-class DebugEchoHandler(BaseHandler):  # pragma: no cover
-    def __call__(self, wbrequest):
-        return WbResponse.text_response(str(wbrequest))
--- a/pywb/webapp/live_rewrite_handler.py
+++ b/pywb/webapp/live_rewrite_handler.py
@ -1,241 +0,0 @@
-from pywb.framework.wbrequestresponse import WbResponse
-from pywb.framework.cache import create_cache
-
-from pywb.rewrite.rewrite_live import LiveRewriter
-from pywb.rewrite.wburl import WbUrl
-
-from pywb.webapp.handlers import StaticHandler, SearchPageWbUrlHandler
-from pywb.webapp.views import HeadInsertView
-
-from pywb.utils.wbexception import LiveResourceException
-
-import json
-import hashlib
-
-
-#=================================================================
-class RewriteHandler(SearchPageWbUrlHandler):
-
-    LIVE_COOKIE = 'pywb.timestamp={0}; max-age=60'
-
-    YT_DL_TYPE = 'application/vnd.youtube-dl_formats+json'
-
-    def __init__(self, config):
-        super(RewriteHandler, self).__init__(config)
-
-        proxyhostport = config.get('proxyhostport')
-
-        live_rewriter_cls = config.get('live_rewriter_cls', LiveRewriter)
-
-        self.live_fetcher = live_rewriter_cls(is_framed_replay=self.is_frame_mode,
-                                              proxies=proxyhostport)
-
-        self.recording = self.live_fetcher.is_recording()
-
-        self.head_insert_view = HeadInsertView.init_from_config(config)
-
-        self.live_cookie = config.get('live-cookie', self.LIVE_COOKIE)
-
-        self.verify = config.get('verify_ssl', True)
-
-        self.ydl = None
-
-        self._cache = None
-
-    def handle_request(self, wbrequest):
-        if wbrequest.wb_url.is_query():
-            type_ = wbrequest.wb_url.LATEST_REPLAY
-            url = wbrequest.urlrewriter.get_new_url(type=type_, timestamp='')
-            return WbResponse.redir_response(url)
-
-        if wbrequest.options['is_ajax']:
-            wbrequest.urlrewriter.rewrite_opts['is_ajax'] = True
-
-        try:
-            return self.render_content(wbrequest)
-
-        except Exception as exc:
-            import traceback
-            err_details = traceback.format_exc()
-            print(err_details)
-
-            url = wbrequest.wb_url.url
-            msg = 'Could not load the url from the live web: ' + url
-            raise LiveResourceException(msg=msg, url=url)
-
-    def _live_request_headers(self, wbrequest):
-        return {}
-
-    def _skip_recording(self, wbrequest):
-        return False
-
-    def render_content(self, wbrequest):
-        if wbrequest.wb_url.mod == 'vi_':
-            return self._get_video_info(wbrequest)
-
-        head_insert_func = self.head_insert_view.create_insert_func(wbrequest)
-        req_headers = self._live_request_headers(wbrequest)
-
-        ref_wburl_str = wbrequest.extract_referrer_wburl_str()
-        if ref_wburl_str:
-            wbrequest.env['REL_REFERER'] = WbUrl(ref_wburl_str).url
-
-        skip_recording = self._skip_recording(wbrequest)
-
-        use_206 = False
-        url = None
-        rangeres = None
-
-        readd_range = False
-        cache_key = None
-
-        if self.recording and not skip_recording:
-            rangeres = wbrequest.extract_range()
-
-            if rangeres:
-                url, start, end, use_206 = rangeres
-
-                # if bytes=0- Range request,
-                # simply remove the range and still proxy
-                if start == 0 and not end and use_206:
-                    wbrequest.wb_url.url = url
-                    del wbrequest.env['HTTP_RANGE']
-                    readd_range = True
-                else:
-                    # disables proxy
-                    skip_recording = True
-
-                    # sets cache_key only if not already cached
-                    cache_key = self._get_cache_key('r:', url)
-
-        result = self.live_fetcher.fetch_request(wbrequest.wb_url.url,
-                                             wbrequest.urlrewriter,
-                                             head_insert_func=head_insert_func,
-                                             req_headers=req_headers,
-                                             env=wbrequest.env,
-                                             skip_recording=skip_recording,
-                                             verify=self.verify)
-
-        wbresponse = self._make_response(wbrequest, *result)
-
-        if readd_range:
-            content_length = (wbresponse.status_headers.
-                              get_header('Content-Length'))
-            try:
-                content_length = int(content_length)
-                wbresponse.status_headers.add_range(0, content_length,
-                                                    content_length)
-            except (ValueError, TypeError):
-                pass
-
-        if self.recording and cache_key:
-            self._add_rec_ping(cache_key, url, wbrequest, wbresponse)
-
-        if rangeres:
-            referrer = wbrequest.env.get('REL_REFERER')
-
-            # also ping video info
-            if referrer:
-                try:
-                    resp = self._get_video_info(wbrequest,
-                                                info_url=referrer,
-                                                video_url=url)
-                except:
-                    print('Error getting video info')
-
-        return wbresponse
-
-    def _make_response(self, wbrequest, status_headers, gen, is_rewritten):
-        # if cookie set, pass recorded timestamp info via cookie
-        # so that client side may be able to access it
-        # used by framed mode to update frame banner
-        if self.live_cookie:
-            cdx = wbrequest.env.get('pywb.cdx')
-            if cdx:
-                value = self.live_cookie.format(cdx['timestamp'])
-                status_headers.headers.append(('Set-Cookie', value))
-
-        return WbResponse(status_headers, gen)
-
-    def _get_cache_key(self, prefix, url):
-        if not self._cache:
-            self._cache = create_cache()
-
-        key = self.create_cache_key(prefix, url)
-
-        if key in self._cache:
-            return None
-
-        return key
-
-    @staticmethod
-    def create_cache_key(prefix, url):
-        hash_ = hashlib.md5()
-        hash_.update(url.encode('utf-8'))
-        key = hash_.hexdigest()
-        key = prefix + key
-        return key
-
-    def _add_rec_ping(self, key, url, wbrequest, wbresponse):
-        def do_ping():
-            headers = self._live_request_headers(wbrequest)
-            headers['Connection'] = 'close'
-
-            try:
-                # mark as pinged
-                self._cache[key] = '1'
-
-                self.live_fetcher.fetch_async(url, headers)
-
-            except:
-                del self._cache[key]
-                raise
-
-        def wrap_buff_gen(gen):
-            for x in gen:
-                yield x
-
-            try:
-                do_ping()
-            except:
-                pass
-
-        #do_ping()
-        wbresponse.body = wrap_buff_gen(wbresponse.body)
-        return wbresponse
-
-    def _get_video_info(self, wbrequest, info_url=None, video_url=None):
-        if not video_url:
-            video_url = wbrequest.wb_url.url
-
-        if not info_url:
-            info_url = wbrequest.wb_url.url
-
-        cache_key = None
-        if self.recording:
-            cache_key = self._get_cache_key('v:', video_url)
-
-        info = self.live_fetcher.get_video_info(video_url)
-        if info is None:  #pragma: no cover
-            msg = ('youtube-dl is not installed, pip install youtube-dl to ' +
-                   'enable improved video proxy')
-
-            return WbResponse.text_response(text=msg, status='404 Not Found')
-
-        #if info and info.formats and len(info.formats) == 1:
-
-        content_type = self.YT_DL_TYPE
-        metadata = json.dumps(info)
-
-        if (self.recording and cache_key):
-            headers = self._live_request_headers(wbrequest)
-            headers['Content-Type'] = content_type
-
-            if info_url.startswith('https://'):
-                info_url = info_url.replace('https', 'http', 1)
-
-            response = self.live_fetcher.add_metadata(info_url, headers, metadata)
-
-            self._cache[cache_key] = '1'
-
-        return WbResponse.text_response(metadata, content_type=content_type)
--- a/pywb/webapp/pywb_init.py
+++ b/pywb/webapp/pywb_init.py
@ -1,387 +0,0 @@
-from pywb.utils.loaders import load_yaml_config
-
-from pywb.framework.archivalrouter import ArchivalRouter, Route
-from pywb.framework.proxy import ProxyArchivalRouter
-from pywb.framework.wbrequestresponse import WbRequest
-from pywb.framework.memento import MementoRequest
-from pywb.framework.basehandlers import BaseHandler
-
-from pywb.webapp.views import J2TemplateView
-from pywb.webapp.views import J2HtmlCapturesView, init_view
-
-from pywb.webapp.live_rewrite_handler import RewriteHandler
-
-from pywb.webapp.query_handler import QueryHandler
-from pywb.webapp.handlers import WBHandler
-from pywb.webapp.handlers import StaticHandler
-from pywb.webapp.handlers import DebugEchoHandler, DebugEchoEnvHandler
-from pywb.webapp.cdx_api_handler import CDXAPIHandler
-
-from pywb import DEFAULT_CONFIG
-
-import os
-import logging
-import six
-
-
-#=================================================================
-class DictChain(object):
-    def __init__(self, *dicts):
-        self.dicts = dicts
-
-    def get(self, key, default_val=None):
-        for d in self.dicts:
-            val = d.get(key)
-            if val is not None:
-                return val
-        return default_val
-
-    def __contains__(self, key):
-        return self.get(key) is not None
-
-    def __getitem__(self, key):
-        return self.get(key)
-
-    def __setitem__(self, key, value):
-        self.dicts[0][key] = value
-
-
-#=================================================================
-def create_wb_handler(query_handler, config):
-    wb_handler_class = config.get('wb_handler_class', WBHandler)
-
-    wb_handler = wb_handler_class(
-        query_handler,
-        config=config,
-    )
-
-    return wb_handler
-
-
-#=================================================================
-def create_live_handler(config):
-    wb_handler_class = config.get('wb_handler_class', RewriteHandler)
-
-    live_handler = wb_handler_class(config)
-
-    return live_handler
-
-
-#=================================================================
-def init_route_config(value, config):
-    if isinstance(value, str) or isinstance(value, list):
-        value = dict(index_paths=value)
-
-    route_config = DictChain(value, config)
-    return route_config
-
-
-#=================================================================
-def init_collection(route_config):
-    ds_rules_file = route_config.get('domain_specific_rules', None)
-
-    html_view = init_view(route_config, 'query_html', J2HtmlCapturesView)
-
-    server_cls = route_config.get('server_cls')
-
-    query_handler = QueryHandler.init_from_config(route_config,
-                                                  ds_rules_file,
-                                                  html_view,
-                                                  server_cls)
-
-    return query_handler
-
-
-#=================================================================
-def add_cdx_api_handler(name, cdx_api_suffix, routes, query_handler,
-                        route_class=Route):
-    # if bool, use -cdx suffix, else use custom string
-    # as the suffix
-    if isinstance(cdx_api_suffix, bool):
-        name += '-cdx'
-    else:
-        name += str(cdx_api_suffix)
-
-    logging.debug('Adding CDX API Handler: ' + name)
-    routes.append(route_class(name, CDXAPIHandler(query_handler)))
-
-
-#=================================================================
-def create_cdx_server_app(passed_config):
-    """
-    Create a cdx server api-only app
-    For each collection, create a /<coll>-cdx access point
-    which follows the cdx api
-    """
-
-    defaults = load_yaml_config(DEFAULT_CONFIG)
-
-    config = DictChain(passed_config, defaults)
-
-    collections = config.get('collections', {})
-
-    static_routes = {}
-
-    # collections based on file system
-    if config.get('enable_auto_colls', True):
-        colls_loader_cls = config.get('colls_loader_cls', DirectoryCollsLoader)
-        dir_loader = colls_loader_cls(config, static_routes, collections)
-        dir_loader()
-        #collections.update(dir_loader())
-
-    routes = []
-
-    for name, value in six.iteritems(collections):
-        route_config = init_route_config(value, config)
-        query_handler = init_collection(route_config)
-
-        cdx_api_suffix = route_config.get('enable_cdx_api', True)
-
-        add_cdx_api_handler(name, cdx_api_suffix, routes, query_handler)
-
-    return ArchivalRouter(routes)
-
-
-#=================================================================
-class DirectoryCollsLoader(object):
-    def __init__(self, config, static_routes, colls):
-        self.config = config
-        self.static_routes = static_routes
-        self.colls = colls
-
-    def __call__(self):
-        colls = self.colls
-
-        static_dir = self.config.get('paths')['static_path']
-        static_shared_prefix = self.config.get('static_shared_prefix')
-
-        if static_dir and static_shared_prefix and os.path.isdir(static_dir):
-            static_dir = os.path.abspath(static_dir) + os.path.sep
-            self.static_routes[static_shared_prefix] = static_dir
-
-        root_dir = self.config.get('collections_root', '')
-        if not root_dir or not os.path.isdir(root_dir):
-            return colls
-
-        for name in os.listdir(root_dir):
-            full = os.path.join(root_dir, name)
-            if not os.path.isdir(full):
-                continue
-
-            coll_config = self.load_coll_dir(full, name)
-            if coll_config:
-                # if already exists, override existing config with coll specific
-                if name in colls:
-                    colls[name].update(coll_config)
-                else:
-                    colls[name] = coll_config
-
-        return colls
-
-    def _norm_path(self, root_dir, path):
-        result = os.path.normpath(os.path.join(root_dir, path))
-        return result
-
-    def _add_dir_if_exists(self, coll, root_dir, dir_key, required=False):
-        curr_val = coll.get(dir_key)
-        if curr_val:
-            # add collection path only if relative path, and not a url
-            if '://' not in curr_val and not os.path.isabs(curr_val):
-                coll[dir_key] = self._norm_path(root_dir, curr_val) + os.path.sep
-            return False
-
-        thedir = self.config.get('paths')[dir_key]
-
-        fulldir = os.path.join(root_dir, thedir)
-
-        if os.path.isdir(fulldir):
-            fulldir = os.path.abspath(fulldir) + os.path.sep
-            coll[dir_key] = fulldir
-            return True
-        elif required:
-            msg = 'Dir "{0}" does not exist for "{1}"'.format(fulldir, dir_key)
-            raise Exception(msg)
-        else:
-            return False
-
-    def load_yaml_file(self, root_dir, filename):
-        filename = os.path.join(root_dir, filename)
-        if os.path.isfile(filename):
-            return load_yaml_config(filename)
-        else:
-            return {}
-
-    def load_coll_dir(self, root_dir, name):
-        # Load config.yaml
-        coll_config = self.load_yaml_file(root_dir, 'config.yaml')
-
-        # Load metadata.yaml
-        metadata = self.load_yaml_file(root_dir, 'metadata.yaml')
-        coll_config['metadata'] = metadata
-
-        self._add_dir_if_exists(coll_config, root_dir, 'index_paths', True)
-
-        # inherit these properties from base, in case archive_paths is shared
-        shared_config = DictChain(coll_config, self.config)
-        self._add_dir_if_exists(shared_config, root_dir, 'archive_paths', True)
-
-        if self._add_dir_if_exists(coll_config, root_dir, 'static_path', False):
-            self.static_routes['static/' + name] = coll_config['static_path']
-
-        # Custom templates dir
-        templates_dir = self.config.get('paths').get('templates_dir')
-        if templates_dir:
-            template_dir = os.path.join(root_dir, templates_dir)
-
-        # Check all templates
-        template_files = self.config.get('paths')['template_files']
-        for tname, tfile in six.iteritems(template_files):
-            if tname in coll_config:
-                # Already set
-                coll_config[tname] = self._norm_path(root_dir, coll_config[tname])
-
-            # If templates override dir
-            elif templates_dir:
-                full = os.path.join(template_dir, tfile)
-                if os.path.isfile(full):
-                    coll_config[tname] = full
-
-        return coll_config
-
-
-#=================================================================
-def create_wb_router(passed_config=None):
-    passed_config = passed_config or {}
-
-    defaults = load_yaml_config(DEFAULT_CONFIG)
-
-    config = DictChain(passed_config, defaults)
-
-    routes = []
-
-    port = config.get('port')
-
-    collections = config.get('collections', {})
-
-    static_routes = config.get('static_routes', {})
-
-    root_route = None
-
-    # collections based on file system
-    if config.get('enable_auto_colls', True):
-        colls_loader_cls = config.get('colls_loader_cls', DirectoryCollsLoader)
-        dir_loader = colls_loader_cls(config, static_routes, collections)
-        dir_loader()
-        #collections.update(dir_loader())
-
-    if config.get('enable_memento', False):
-        request_class = MementoRequest
-    else:
-        request_class = WbRequest
-
-    # store live and replay handlers
-    handler_dict = {}
-
-    # setup template globals
-    templates_dirs = config['templates_dirs']
-    jinja_env = J2TemplateView.init_shared_env(paths=templates_dirs,
-                                               packages=config['template_packages'])
-
-    jinja_env.globals.update(config.get('template_globals', {}))
-
-    for static_name, static_path in six.iteritems(static_routes):
-        routes.append(Route(static_name, StaticHandler(static_path)))
-
-    for name, value in six.iteritems(collections):
-        if isinstance(value, BaseHandler):
-            handler_dict[name] = value
-            new_route = Route(name, value, config=config)
-            if name != '':
-                routes.append(new_route)
-            else:
-                root_route = new_route
-            continue
-
-        route_config = init_route_config(value, config)
-        route_class = route_config.get('route_class', Route)
-
-        if route_config.get('index_paths') == '$liveweb':
-            live = create_live_handler(route_config)
-            handler_dict[name] = live
-            new_route = route_class(name, live, config=route_config)
-            if name != '':
-                routes.append(new_route)
-            else:
-                root_route = new_route
-            continue
-
-        query_handler = init_collection(route_config)
-
-        wb_handler = create_wb_handler(
-            query_handler=query_handler,
-            config=route_config,
-        )
-
-        handler_dict[name] = wb_handler
-
-        logging.debug('Adding Collection: ' + name)
-
-        new_route = route_class(name, wb_handler,
-                                config=route_config,
-                                request_class=request_class)
-
-        if name != '':
-            routes.append(new_route)
-        else:
-            root_route = new_route
-
-        # cdx query handler
-        cdx_api_suffix = route_config.get('enable_cdx_api', False)
-
-        if cdx_api_suffix:
-            add_cdx_api_handler(name, cdx_api_suffix, routes, query_handler,
-                                route_class=route_class)
-
-    if config.get('debug_echo_env', False):
-        routes.append(Route('echo_env', DebugEchoEnvHandler()))
-
-    if config.get('debug_echo_req', False):
-        routes.append(Route('echo_req', DebugEchoHandler()))
-
-    if root_route:
-        routes.append(root_route)
-
-    # resolve any cross handler references
-    for route in routes:
-        if hasattr(route.handler, 'resolve_refs'):
-            route.handler.resolve_refs(handler_dict)
-
-    # default to regular archival mode
-    router = ArchivalRouter
-
-    if config.get('enable_http_proxy', False):
-        router = ProxyArchivalRouter
-
-        view = init_view(config, 'proxy_select_html')
-
-        if 'proxy_options' not in passed_config:
-            passed_config['proxy_options'] = {}
-
-        if view:
-            passed_config['proxy_options']['proxy_select_view'] = view
-
-        view = init_view(config, 'proxy_cert_download_html')
-
-        if view:
-            passed_config['proxy_options']['proxy_cert_download_view'] = view
-
-    # Finally, create wb router
-    return router(
-        routes,
-        port=port,
-        abs_path=config.get('absolute_paths', True),
-        home_view=init_view(config, 'home_html'),
-        error_view=init_view(config, 'error_html'),
-        info_view=init_view(config, 'info_json'),
-        config=config
-    )
--- a/pywb/webapp/query_handler.py
+++ b/pywb/webapp/query_handler.py
@ -1,172 +0,0 @@
-from pywb.utils.dsrules import DEFAULT_RULES_FILE
-
-from pywb.perms.perms_filter import make_perms_cdx_filter
-from pywb.framework.wbrequestresponse import WbResponse
-from pywb.cdx.cdxserver import create_cdx_server
-from pywb.webapp.views import MementoTimemapView
-
-
-#=================================================================
-class QueryHandler(object):
-    """
-    Main interface for querying the index (currently only CDX) from a
-    source server (currently a cdx server)
-
-    Creates an appropriate query based on wbrequest type info and outputs
-    a returns a view for the cdx, either a raw cdx iter, an html view,
-    etc...
-    """
-
-    def __init__(self, cdx_server, html_query_view=None, perms_policy=None):
-        self.cdx_server = cdx_server
-        self.perms_policy = perms_policy
-
-        self.views = {}
-        if html_query_view:
-            self.views['html'] = html_query_view
-
-        self.views['timemap'] = MementoTimemapView()
-
-    @staticmethod
-    def init_from_config(config,
-                         ds_rules_file=DEFAULT_RULES_FILE,
-                         html_view=None,
-                         server_cls=None):
-
-        perms_policy = None
-
-        if hasattr(config, 'get'):
-            perms_policy = config.get('perms_policy')
-            server_cls = config.get('server_cls', server_cls)
-
-        cdx_server = create_cdx_server(config, ds_rules_file, server_cls)
-
-        return QueryHandler(cdx_server, html_view, perms_policy)
-
-    def get_output_type(self, wb_url):
-        # cdx server only supports text and cdxobject for now
-        if wb_url.mod == 'cdx_':
-            output = 'text'
-        elif wb_url.mod == 'timemap':
-            output = 'timemap'
-        elif wb_url.is_query():
-            output = 'html'
-        else:
-            output = 'cdxobject'
-
-        return output
-
-    def load_for_request(self, wbrequest):
-        wbrequest.normalize_post_query()
-
-        wb_url = wbrequest.wb_url
-        output = self.get_output_type(wb_url)
-
-        # init standard params
-        params = self.get_query_params(wb_url)
-
-        params['allowFuzzy'] = True
-        params['url'] = wb_url.url
-        params['output'] = output
-
-        params['filter'].append('!mimetype:-')
-
-        # get metadata
-        if wb_url.mod == 'vi_':
-            # matching metadata explicitly with special scheme
-            schema, rest = wb_url.url.split('://', 1)
-            params['url'] = 'metadata://' + rest
-            params['filter'].append('~original:metadata://')
-
-        cdx_iter = self.load_cdx(wbrequest, params)
-        return cdx_iter, output
-
-    def load_cdx(self, wbrequest, params):
-        if wbrequest:
-            # add any custom filter from the request
-            if wbrequest.query_filter:
-                filters = params.get('filter')
-                if filters:
-                    filters.extend(wbrequest.query_filter)
-                else:
-                    params['filter'] = wbrequest.query_filter
-
-            params['coll'] = wbrequest.coll
-            if wbrequest.custom_params:
-                params.update(wbrequest.custom_params)
-
-        if self.perms_policy:
-            perms_op = make_perms_cdx_filter(self.perms_policy, wbrequest)
-            if perms_op:
-                params['custom_ops'] = [perms_op]
-
-        cdx_iter = self.cdx_server.load_cdx(**params)
-        return cdx_iter
-
-    def make_cdx_response(self, wbrequest, cdx_iter, output, **kwargs):
-        # if not text, the iterator is assumed to be CDXObjects
-        if output and output != 'text':
-            view = self.views.get(output)
-            if view:
-                return view.render_response(wbrequest, cdx_iter, **kwargs)
-
-        return WbResponse.text_stream(cdx_iter)
-
-    def cdx_load_callback(self, wbrequest):
-        def load_cdx(params):
-            params['output'] = 'cdxobject'
-            return self.load_cdx(wbrequest, params)
-
-        return load_cdx
-
-    def get_query_params(self,
-                         wburl, limit=150000,
-                         collapse_time=None,
-                         replay_closest=100):
-
-        #if wburl.type == wburl.URL_QUERY:
-        #    raise NotImplementedError('Url Query Not Yet Supported')
-
-        return {
-            wburl.QUERY:
-                {'collapseTime': collapse_time,
-                 'filter': ['!statuscode:(500|502|504)'],
-                 'from': wburl.timestamp,
-                 'to': wburl.end_timestamp,
-                 'limit': limit,
-                 'matchType': 'exact',
-                },
-
-            wburl.URL_QUERY:
-                {'collapse': 'urlkey',
-                 'matchType': 'prefix',
-                 'showGroupCount': True,
-                 'showUniqCount': True,
-                 'lastSkipTimestamp': True,
-                 'limit': limit,
-                 'fl': ('urlkey,original,timestamp,' +
-                        'endtimestamp,groupcount,uniqcount'),
-                 'filter': [],
-                },
-
-            wburl.REPLAY:
-                {'sort': 'closest',
-                 'filter': ['!statuscode:(500|502|504)'],
-                 'limit': replay_closest,
-                 'closest': wburl.timestamp,
-                 'resolveRevisits': True,
-                 'matchType': 'exact',
-                },
-
-            wburl.LATEST_REPLAY:
-                {'sort': 'reverse',
-       # Not appropriate as default
-       # Should be an option to configure status code filtering in general
-       #         'filter': ['statuscode:[23]..|-'],
-                 'filter': [],
-                 'limit': '1',
-                 'resolveRevisits': True,
-                 'matchType': 'exact',
-                }
-
-        }[wburl.type]
--- a/pywb/webapp/rangecache.py
+++ b/pywb/webapp/rangecache.py
@ -1,92 +0,0 @@
-from warcio.statusandheaders import StatusAndHeaders
-from warcio.limitreader import LimitReader
-
-from pywb.framework.cache import create_cache
-
-from tempfile import NamedTemporaryFile, mkdtemp
-
-import yaml
-import os
-from shutil import rmtree
-
-import atexit
-
-
-#=================================================================
-class RangeCache(object):
-    def __init__(self):
-        self.cache = create_cache()
-        self.temp_dir = None
-        atexit.register(self.cleanup)
-
-    def cleanup(self):
-        if self.temp_dir:  # pragma: no cover
-            print('Removing: ' + self.temp_dir)
-            rmtree(self.temp_dir, True)
-            self.temp_dir = None
-
-    def handle_range(self, wbrequest, key, wbresponse_func,
-                     url, start, end, use_206):
-        # key must be set
-        assert(key)
-        if key not in self.cache:
-            wbrequest.custom_params['noredir'] = True
-            response = wbresponse_func()
-
-            # only cache 200 responses
-            if not response.status_headers.get_statuscode().startswith('200'):
-                return response.status_headers, response.body
-
-            if not self.temp_dir:
-                self.temp_dir = mkdtemp(prefix='_pywbcache')
-            else:
-                pass
-                #self._check_dir_size(self.temp_dir)
-
-            with NamedTemporaryFile(delete=False, dir=self.temp_dir) as fh:
-                for obj in response.body:
-                    fh.write(obj)
-
-                name = fh.name
-
-            spec = dict(name=fh.name,
-                        headers=response.status_headers.headers)
-
-            self.cache[key] = yaml.dump(spec)
-        else:
-            spec = yaml.load(self.cache[key])
-
-            spec['headers'] = [tuple(x) for x in spec['headers']]
-
-        filelen = os.path.getsize(spec['name'])
-
-        maxlen = filelen - start
-
-        if end:
-            maxlen = min(maxlen, end - start + 1)
-
-        def read_range():
-            with open(spec['name'], 'rb') as fh:
-                fh.seek(start)
-                fh = LimitReader.wrap_stream(fh, maxlen)
-                while True:
-                    buf = fh.read()
-                    if not buf:
-                        break
-
-                    yield buf
-
-        status_headers = StatusAndHeaders('200 OK', spec['headers'])
-
-        if use_206:
-            StatusAndHeaders.add_range(status_headers, start,
-                                       maxlen,
-                                       filelen)
-
-        status_headers.replace_header('Content-Length', str(maxlen))
-
-        return status_headers, read_range()
-
-
-#=================================================================
-range_cache = RangeCache()
--- a/pywb/webapp/replay_views.py
+++ b/pywb/webapp/replay_views.py
@ -1,392 +0,0 @@
-import re
-import logging
-
-from io import BytesIO
-from six.moves.urllib.parse import urlsplit
-from itertools import chain
-
-from warcio.statusandheaders import StatusAndHeaders
-from warcio.limitreader import LimitReader
-from warcio.timeutils import timestamp_now
-from warcio.recordloader import ArchiveLoadFailed
-
-from pywb.utils.wbexception import WbException, NotFoundException
-
-from pywb.framework.wbrequestresponse import WbResponse
-from pywb.framework.memento import MementoResponse
-
-from pywb.rewrite.rewrite_content import RewriteContent
-
-from pywb.webapp.views import HeadInsertView
-
-from pywb.webapp.rangecache import range_cache
-
-
-#=================================================================
-class CaptureException(WbException):
-    """
-    raised to indicate an issue with a specific capture
-    and will be caught and result in a retry, if possible
-    if not, will result in a 502
-    """
-    def status(self):
-        return '502 Internal Server Error'
-
-
-#=================================================================
-class ReplayView(object):
-    STRIP_SCHEME_WWW = re.compile('^([\w]+:[/]*(?:www[\d]*\.)?)?(.*?)$', re.MULTILINE)
-
-    def __init__(self, content_loader, config):
-        self.content_loader = content_loader
-
-        framed = config.get('framed_replay')
-        self.content_rewriter = RewriteContent(is_framed_replay=framed)
-
-        self.head_insert_view = HeadInsertView.init_from_config(config)
-
-        self.buffer_response = config.get('buffer_response', True)
-        self.buffer_max_size = config.get('buffer_max_size', 16384)
-
-        self.redir_to_exact = config.get('redir_to_exact', True)
-
-        memento = config.get('enable_memento', False)
-        if memento:
-            self.response_class = MementoResponse
-        else:
-            self.response_class = WbResponse
-
-        self.enable_range_cache = config.get('enable_ranges', True)
-
-        self._reporter = config.get('reporter')
-
-    def render_content(self, wbrequest, cdx_lines, cdx_loader):
-        last_e = None
-        first = True
-
-        #cdx_lines = args[0]
-        #cdx_loader = args[1]
-
-        # List of already failed w/arcs
-        failed_files = []
-
-        response = None
-
-        # Iterate over the cdx until find one that works
-        # The cdx should already be sorted in
-        # closest-to-timestamp order (from the cdx server)
-        for cdx in cdx_lines:
-            try:
-                # optimize: can detect if redirect is needed just from the cdx,
-                # no need to load w/arc data if requiring exact match
-                if first:
-                    redir_response = self._redirect_if_needed(wbrequest, cdx)
-                    if redir_response:
-                        return redir_response
-
-                    first = False
-
-                response = self.cached_replay_capture(wbrequest,
-                                                      cdx,
-                                                      cdx_loader,
-                                                      failed_files)
-
-            except (CaptureException, ArchiveLoadFailed) as ce:
-                #import traceback
-                #traceback.print_exc()
-                logging.debug(ce)
-                last_e = ce
-                pass
-
-            if response:
-                return response
-
-        if not last_e:
-            # can only get here if cdx_lines is empty somehow
-            # should be filtered out before hand, but if not
-            msg = 'No Captures found for: ' + wbrequest.wb_url.url
-            last_e = NotFoundException(msg)
-
-        raise last_e
-
-    def cached_replay_capture(self, wbrequest, cdx, cdx_loader, failed_files):
-        def get_capture():
-            return self.replay_capture(wbrequest,
-                                       cdx,
-                                       cdx_loader,
-                                       failed_files)
-
-        if not self.enable_range_cache:
-            return get_capture()
-
-        range_info = wbrequest.extract_range()
-
-        if not range_info:
-            return get_capture()
-
-        range_status, range_iter = (range_cache.
-            handle_range(wbrequest,
-                         cdx.get('digest', cdx['urlkey']),
-                         get_capture,
-                         *range_info))
-
-        response = self.response_class(range_status,
-                                       range_iter,
-                                       wbrequest=wbrequest,
-                                       cdx=cdx)
-        return response
-
-    def replay_capture(self, wbrequest, cdx, cdx_loader, failed_files):
-        (status_headers, stream) = (self.content_loader(cdx,
-                                                        failed_files,
-                                                        cdx_loader,
-                                                        wbrequest))
-
-        # check and reject self-redirect
-        self._reject_self_redirect(wbrequest, cdx, status_headers)
-
-        # check if redir is needed
-        redir_response = self._redirect_if_needed(wbrequest, cdx)
-        if redir_response:
-            return redir_response
-
-        #length = status_headers.get_header('content-length')
-        #stream = LimitReader.wrap_stream(stream, length)
-
-        # one more check for referrer-based self-redirect
-        # TODO: evaluate this, as refreshing in browser may sometimes cause
-        # referrer to be set to the same page, incorrectly skipping a capture
-        # self._reject_referrer_self_redirect(wbrequest)
-
-        urlrewriter = wbrequest.urlrewriter
-
-        # if using url rewriter, use original url for rewriting purposes
-        if wbrequest and wbrequest.wb_url:
-            wbrequest.wb_url.url = cdx['url']
-
-        if wbrequest.options['is_ajax']:
-            wbrequest.urlrewriter.rewrite_opts['is_ajax'] = True
-
-        head_insert_func = None
-        if self.head_insert_view:
-            head_insert_func = (self.head_insert_view.
-                                create_insert_func(wbrequest))
-
-        result = (self.content_rewriter.
-                  rewrite_content(urlrewriter,
-                                  status_headers=status_headers,
-                                  stream=stream,
-                                  head_insert_func=head_insert_func,
-                                  urlkey=cdx['urlkey'],
-                                  cdx=cdx,
-                                  env=wbrequest.env))
-
-        (status_headers, response_iter, is_rewritten) = result
-
-        # buffer response if buffering enabled
-        if self.buffer_response:
-            content_len = status_headers.get_header('content-length')
-            try:
-                content_len = int(content_len)
-            except:
-                content_len = 0
-
-            if content_len <= 0:
-                max_size = self.buffer_max_size
-                response_iter = self.buffered_response(status_headers,
-                                                       response_iter,
-                                                       max_size)
-
-        # Set Content-Location if not exact capture
-        if not self.redir_to_exact:
-            mod = wbrequest.options.get('replay_mod', wbrequest.wb_url.mod)
-            canon_url = (wbrequest.urlrewriter.
-                         get_new_url(timestamp=cdx['timestamp'],
-                                     url=cdx['url'],
-                                     mod=mod))
-
-            status_headers.headers.append(('Content-Location', canon_url))
-
-        if wbrequest.wb_url.mod == 'vi_':
-            status_headers.headers.append(('access-control-allow-origin', '*'))
-
-        response = self.response_class(status_headers,
-                                       response_iter,
-                                       wbrequest=wbrequest,
-                                       cdx=cdx)
-
-        # notify reporter callback, if any
-        if self._reporter:
-            self._reporter(wbrequest, cdx, response)
-
-        return response
-
-    # Buffer rewrite iterator and return a response from a string
-    def buffered_response(self, status_headers, iterator, max_size):
-        out = BytesIO()
-        size = 0
-        read_all = True
-
-        try:
-            for buff in iterator:
-                buff = bytes(buff)
-                size += len(buff)
-                out.write(buff)
-                if max_size > 0 and size > max_size:
-                    read_all = False
-                    break
-
-        finally:
-            content = out.getvalue()
-            out.close()
-
-        if read_all:
-            content_length_str = str(len(content))
-
-            # remove existing content length
-            status_headers.replace_header('Content-Length',
-                                          content_length_str)
-            return [content]
-        else:
-            status_headers.remove_header('Content-Length')
-            return chain(iter([content]), iterator)
-
-    def _redirect_if_needed(self, wbrequest, cdx):
-        if not self.redir_to_exact:
-            return None
-
-        if wbrequest.options['is_proxy']:
-            return None
-
-        if wbrequest.custom_params.get('noredir'):
-            return None
-
-        is_timegate = (wbrequest.options.get('is_timegate', False))
-        if not is_timegate:
-            is_timegate = wbrequest.wb_url.is_latest_replay()
-
-        redir_needed = is_timegate or (cdx['timestamp'] != wbrequest.wb_url.timestamp)
-
-        if not redir_needed:
-            return None
-
-        if self.enable_range_cache and wbrequest.extract_range():
-            return None
-
-        #if is_timegate:
-        #    timestamp = timestamp_now()
-        #else:
-        timestamp = cdx['timestamp']
-
-        new_url = (wbrequest.urlrewriter.
-                   get_new_url(timestamp=timestamp,
-                               url=cdx['url']))
-
-        if wbrequest.method == 'POST':
-            #   FF shows a confirm dialog, so can't use 307 effectively
-            #   was: statusline = '307 Same-Method Internal Redirect'
-            return None
-        elif is_timegate:
-            statusline = '302 Found'
-        else:
-            # clear cdx line to indicate internal redirect
-            statusline = '302 Internal Redirect'
-            cdx = None
-
-        status_headers = StatusAndHeaders(statusline,
-                                          [('Location', new_url)])
-
-        return self.response_class(status_headers,
-                                   wbrequest=wbrequest,
-                                   cdx=cdx,
-                                   memento_is_redir=True)
-
-    def _reject_self_redirect(self, wbrequest, cdx, status_headers):
-        """
-        Check if response is a 3xx redirect to the same url
-        If so, reject this capture to avoid causing redirect loop
-        """
-        if not status_headers.statusline.startswith('3'):
-            return
-
-        # skip all 304s
-        if (status_headers.statusline.startswith('304') and
-             not wbrequest.wb_url.is_identity):
-
-            raise CaptureException('Skipping 304 Modified: ' + str(cdx))
-
-        request_url = wbrequest.wb_url.url.lower()
-        location_url = status_headers.get_header('Location')
-        if not location_url:
-            return
-
-        location_url = location_url.lower()
-        if location_url.startswith('/'):
-            host = urlsplit(cdx['url']).netloc
-            location_url = host + location_url
-
-        if (ReplayView.strip_scheme_www(request_url) ==
-             ReplayView.strip_scheme_www(location_url)):
-            raise CaptureException('Self Redirect: ' + str(cdx))
-
-    # TODO: reevaluate this, as it may reject valid refreshes of a page
-    def _reject_referrer_self_redirect(self, wbrequest):  # pragma: no cover
-        """
-        Perform final check for referrer based self-redirect.
-        This method should be called after verifying that
-        the request timestamp == capture timestamp
-
-        If referrer is same as current url,
-        reject this response and try another capture.
-        """
-        if not wbrequest.referrer:
-            return
-
-        # build full url even if using relative-rewriting
-        request_url = (wbrequest.host_prefix +
-                       wbrequest.rel_prefix + str(wbrequest.wb_url))
-
-        if (ReplayView.strip_scheme_www(request_url) ==
-             ReplayView.strip_scheme_www(wbrequest.referrer)):
-            raise CaptureException('Self Redirect via Referrer: ' +
-                                   str(wbrequest.wb_url))
-
-    @staticmethod
-    def strip_scheme_www(url):
-        """
-        >>> ReplayView.strip_scheme_www('https://example.com') ==\
-            ReplayView.strip_scheme_www('http://example.com')
-        True
-
-        >>> ReplayView.strip_scheme_www('https://example.com') ==\
-            ReplayView.strip_scheme_www('http:/example.com')
-        True
-
-        >>> ReplayView.strip_scheme_www('https://example.com') ==\
-            ReplayView.strip_scheme_www('example.com')
-        True
-
-        >>> ReplayView.strip_scheme_www('https://example.com') ==\
-            ReplayView.strip_scheme_www('http://www2.example.com')
-        True
-
-        >>> ReplayView.strip_scheme_www('about://example.com') ==\
-            ReplayView.strip_scheme_www('example.com')
-        True
-
-        >>> ReplayView.strip_scheme_www('http://') ==\
-            ReplayView.strip_scheme_www('')
-        True
-
-        >>> ReplayView.strip_scheme_www('#!@?') ==\
-            ReplayView.strip_scheme_www('#!@?')
-        True
-        """
-        m = ReplayView.STRIP_SCHEME_WWW.match(url)
-        match = m.group(2)
-        return match
-
-
-if __name__ == "__main__":
-    import doctest
-    doctest.testmod()
--- a/pywb/webapp/test/test_view_filters.py
+++ b/pywb/webapp/test/test_view_filters.py
@ -1,20 +0,0 @@
-"""
->>> format_ts('20141226101000')
-'Fri, Dec 26 2014 10:10:00'
-
->>> format_ts('20141226101000', '%s')
-1419588600
-
->>> is_wb_handler(DebugEchoHandler())
-False
-
-
-"""
-
-from pywb.webapp.views import format_ts, is_wb_handler
-from pywb.webapp.handlers import DebugEchoHandler
-
-
-if __name__ == "__main__":
-    import doctest
-    doctest.testmod()
--- a/pywb/webapp/views.py
+++ b/pywb/webapp/views.py
@ -1,222 +0,0 @@
-from warcio.timeutils import timestamp_to_datetime, timestamp_to_sec
-from pywb.framework.wbrequestresponse import WbResponse
-from pywb.framework.memento import make_timemap, LINK_FORMAT
-
-from six.moves.urllib.parse import urlsplit
-
-import logging
-import json
-import os
-
-from jinja2 import Environment
-from jinja2 import FileSystemLoader, PackageLoader, ChoiceLoader
-
-
-FILTERS = {}
-
-
-#=================================================================
-class template_filter(object):
-    """
-    Decorator for registering a function as a jinja2 filter
-    If optional argument is supplied, it is used as the filter name
-    Otherwise, the func name is the filter name
-    """
-    def __init__(self, param=None):
-        self.name = param
-
-    def __call__(self, func):
-        name = self.name
-        if not name:
-            name = func.__name__
-
-        FILTERS[name] = func
-        return func
-
-
-#=================================================================
-# Filters
-@template_filter()
-def format_ts(value, format_='%a, %b %d %Y %H:%M:%S'):
-    if format_ == '%s':
-        return timestamp_to_sec(value)
-    else:
-        value = timestamp_to_datetime(value)
-        return value.strftime(format_)
-
-
-@template_filter('urlsplit')
-def get_urlsplit(url):
-    split = urlsplit(url)
-    return split
-
-
-@template_filter()
-def is_wb_handler(obj):
-    if not hasattr(obj, 'handler'):
-        return False
-
-    return obj.handler.__class__.__name__ == "WBHandler"
-
-
-@template_filter()
-def tojson(obj):
-    return json.dumps(obj)
-
-
-#=================================================================
-class FileOnlyPackageLoader(PackageLoader):
-    def get_source(self, env, template):
-        dir_, file_ = os.path.split(template)
-        return super(FileOnlyPackageLoader, self).get_source(env, file_)
-
-
-#=================================================================
-class RelEnvironment(Environment):
-    """Override join_path() to enable relative template paths."""
-    def join_path(self, template, parent):
-        return os.path.join(os.path.dirname(parent), template)
-
-
-#=================================================================
-class J2TemplateView(object):
-    shared_jinja_env = None
-
-    def __init__(self, filename):
-        self.template_file = filename
-        self.jinja_env = self.init_shared_env()
-
-    @staticmethod
-    def init_shared_env(paths=['templates', '.', '/'],
-                        packages=['pywb'],
-                        overlay_env=None):
-
-        if J2TemplateView.shared_jinja_env:
-            return J2TemplateView.shared_jinja_env
-
-        loaders = J2TemplateView._add_loaders(paths, packages)
-        loader = ChoiceLoader(loaders)
-
-        if overlay_env:
-            jinja_env = overlay_env.overlay(loader=loader, trim_blocks=True)
-        else:
-            jinja_env = RelEnvironment(loader=loader, trim_blocks=True)
-
-        jinja_env.filters.update(FILTERS)
-        J2TemplateView.shared_jinja_env = jinja_env
-        return jinja_env
-
-    @staticmethod
-    def _add_loaders(paths, packages):
-        loaders = []
-        # add loaders for paths
-        for path in paths:
-            loaders.append(FileSystemLoader(path))
-
-        # add loaders for all specified packages
-        for package in packages:
-            loaders.append(FileOnlyPackageLoader(package))
-
-        return loaders
-
-    def render_to_string(self, **kwargs):
-        template = self.jinja_env.get_template(self.template_file)
-
-        wbrequest = kwargs.get('wbrequest')
-        if wbrequest:
-            params = wbrequest.env.get('pywb.template_params')
-            if params:
-                kwargs.update(params)
-
-        template_result = template.render(**kwargs)
-
-        return template_result
-
-    def render_response(self, **kwargs):
-        template_result = self.render_to_string(**kwargs)
-        status = kwargs.get('status', '200 OK')
-        content_type = kwargs.get('content_type', 'text/html; charset=utf-8')
-        return WbResponse.text_response(template_result,
-                                        status=status,
-                                        content_type=content_type)
-
-
-#=================================================================
-def init_view(config, key, view_class=J2TemplateView):
-    filename = config.get(key)
-    if not filename:
-        return None
-
-    logging.debug('Adding {0}: {1}'.format(key, filename))
-    return view_class(filename)
-
-
-#=================================================================
-class HeadInsertView(J2TemplateView):
-    def create_insert_func(self, wbrequest,
-                           include_ts=True):
-
-        if wbrequest.options['is_ajax']:
-            return None
-
-        url = wbrequest.wb_url.get_url()
-
-        top_url = wbrequest.wb_prefix
-        top_url += wbrequest.wb_url.to_str(mod=wbrequest.final_mod)
-
-        include_wombat = not wbrequest.wb_url.is_banner_only
-
-        def make_head_insert(rule, cdx):
-            cdx['url'] = url
-            return (self.render_to_string(wbrequest=wbrequest,
-                                          cdx=cdx,
-                                          top_url=top_url,
-                                          include_ts=include_ts,
-                                          include_wombat=include_wombat,
-                                          banner_html=self.banner_html,
-                                          rule=rule))
-        return make_head_insert
-
-    @staticmethod
-    def init_from_config(config):
-        view = config.get('head_insert_view')
-        if not view:
-            html = config.get('head_insert_html', 'templates/head_insert.html')
-
-            if html:
-                banner_html = config.get('banner_html', 'banner.html')
-                view = HeadInsertView(html)
-                logging.debug('Adding HeadInsert: {0}, Banner {1}'.
-                              format(html, banner_html))
-
-                view.banner_html = banner_html
-
-        return view
-
-
-#=================================================================
-# query views
-#=================================================================
-class J2HtmlCapturesView(J2TemplateView):
-    def render_response(self, wbrequest, cdx_lines, **kwargs):
-        def format_cdx_lines():
-            for cdx in cdx_lines:
-                cdx['_orig_url'] = cdx['url']
-                cdx['url'] = wbrequest.wb_url.get_url(url=cdx['url'])
-                yield cdx
-
-        return J2TemplateView.render_response(self,
-                                    cdx_lines=list(format_cdx_lines()),
-                                    url=wbrequest.wb_url.get_url(),
-                                    type=wbrequest.wb_url.type,
-                                    prefix=wbrequest.wb_prefix,
-                                    **kwargs)
-
-
-#=================================================================
-class MementoTimemapView(object):
-    def render_response(self, wbrequest, cdx_lines, **kwargs):
-        memento_lines = make_timemap(wbrequest, cdx_lines)
-
-        return WbResponse.text_stream(memento_lines,
-                                      content_type=LINK_FORMAT)
--- a/tests_disabled/test_auto_colls.py
+++ b/tests_disabled/test_auto_colls.py