diff --git a/.coveragerc b/.coveragerc
new file mode 100644
index 00000000..63400c07
--- /dev/null
+++ b/.coveragerc
@@ -0,0 +1,8 @@
+[run]
+omit = 
+    */test/*
+    */tests/*
+
+[report]
+exclude_lines =
+    if __name__ == .__main__.:
diff --git a/.travis.yml b/.travis.yml
index 81d946f7..bab78128 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,7 +4,14 @@ python:
 # command to install dependencies
 install:
   - "python setup.py -q install"
+  - "pip install python-coveralls"
+  - "pip install pytest-cov"
 # command to run tests
 #script: nosetests --with-doctest
 #script: py.test run-tests.py ./pywb/ --doctest-modules --ignore=setup.py
-script: py.test -v --doctest-module ./tests/*.py ./pywb/
+#script: py.test -v --doctest-module ./tests/*.py ./pywb/
+script: 
+    py.test --cov-config .coveragerc --cov pywb -v --doctest-module ./pywb/ tests/
+
+after_success:
+    coveralls
diff --git a/pywb/archivalrouter.py b/pywb/archivalrouter.py
index 354edddd..4d28b57e 100644
--- a/pywb/archivalrouter.py
+++ b/pywb/archivalrouter.py
@@ -3,13 +3,13 @@ import re
 
 from wbrequestresponse import WbRequest, WbResponse
 from pywb.rewrite.url_rewriter import UrlRewriter
-from pywb.rewrite.wburl import WbUrl
+
 
 #=================================================================
 # ArchivalRouter -- route WB requests in archival mode
 #=================================================================
 class ArchivalRouter:
-    def __init__(self, routes, hostpaths = None, abs_path = True, home_view = None, error_view = None):
+    def __init__(self, routes, hostpaths=None, abs_path=True, home_view=None, error_view=None):
         self.routes = routes
         self.fallback = ReferRedirect(hostpaths)
         self.abs_path = abs_path
@@ -69,24 +69,25 @@ class Route:
         if not matcher:
             return None
 
-        rel_prefix = matcher.group(0)
+        matched_str = matcher.group(0)
 
-        if rel_prefix:
-            wb_prefix = env['SCRIPT_NAME'] + '/' + rel_prefix + '/'
-            wb_url_str = request_uri[len(rel_prefix) + 2:] # remove the '/' + rel_prefix part of uri
+        if matched_str:
+            rel_prefix = env['SCRIPT_NAME'] + '/' + matched_str + '/'
+            wb_url_str = request_uri[len(matched_str) + 2:] # remove the '/' + rel_prefix part of uri
         else:
-            wb_prefix = env['SCRIPT_NAME'] + '/'
+            rel_prefix = env['SCRIPT_NAME'] + '/'
             wb_url_str = request_uri[1:] # the request_uri is the wb_url, since no coll
 
         coll = matcher.group(self.coll_group)
 
         wbrequest = WbRequest(env,
-                              request_uri = request_uri,
-                              wb_url_str = wb_url_str,
-                              wb_prefix = wb_prefix,
-                              coll = coll,
-                              host_prefix = WbRequest.make_host_prefix(env) if use_abs_prefix else '',
-                              wburl_class = self.handler.get_wburl_type())
+                              request_uri=request_uri,
+                              wb_url_str=wb_url_str,
+                              rel_prefix=rel_prefix,
+                              coll=coll,
+                              use_abs_prefix=use_abs_prefix,
+                              wburl_class = self.handler.get_wburl_type(),
+                              urlrewriter_class=UrlRewriter)
 
 
         # Allow for applying of additional filters
diff --git a/pywb/cdx/canonicalize.py b/pywb/cdx/canonicalize.py
index e0adb5c1..e2f818b9 100644
--- a/pywb/cdx/canonicalize.py
+++ b/pywb/cdx/canonicalize.py
@@ -2,6 +2,7 @@
 """
 
 import surt
+import urlparse
 from cdxobject import CDXException
 
 
@@ -69,6 +70,109 @@ index.html?a=b?c=)/')
         return surt
 
 
+#=================================================================
+def calc_search_range(url, match_type, surt_ordered=True, url_canon=None):
+    """
+    Canonicalize a url (either with custom canonicalizer or
+    standard canonicalizer with or without surt)
+
+    Then, compute a start and end search url search range
+    for a given match type.
+
+    Support match types:
+    * exact
+    * prefix
+    * host
+    * domain (only available when for surt ordering)
+
+    Examples below:
+
+    # surt ranges
+    >>> calc_search_range('http://example.com/path/file.html', 'exact')
+    ('com,example)/path/file.html', 'com,example)/path/file.html!')
+
+    >>> calc_search_range('http://example.com/path/file.html', 'prefix')
+    ('com,example)/path/file.html', 'com,example)/path/file.htmm')
+
+    >>> calc_search_range('http://example.com/path/file.html', 'host')
+    ('com,example)/', 'com,example*')
+
+    >>> calc_search_range('http://example.com/path/file.html', 'domain')
+    ('com,example)/', 'com,example-')
+
+    special case for tld domain range
+    >>> calc_search_range('com', 'domain')
+    ('com,', 'com-')
+
+    # non-surt ranges
+    >>> calc_search_range('http://example.com/path/file.html', 'exact', False)
+    ('example.com/path/file.html', 'example.com/path/file.html!')
+
+    >>> calc_search_range('http://example.com/path/file.html', 'prefix', False)
+    ('example.com/path/file.html', 'example.com/path/file.htmm')
+
+    >>> calc_search_range('http://example.com/path/file.html', 'host', False)
+    ('example.com/', 'example.com0')
+
+    # domain range not supported
+    >>> calc_search_range('http://example.com/path/file.html', 'domain', False)
+    Traceback (most recent call last):
+    Exception: matchType=domain unsupported for non-surt
+    """
+    def inc_last_char(x):
+        return x[0:-1] + chr(ord(x[-1]) + 1)
+
+    if not url_canon:
+        # make new canon
+        url_canon = UrlCanonicalizer(surt_ordered)
+    else:
+        # ensure surt order matches url_canon
+        surt_ordered = url_canon.surt_ordered
+
+    start_key = url_canon(url)
+
+    if match_type == 'exact':
+        end_key = start_key + '!'
+
+    elif match_type == 'prefix':
+        # add trailing slash if url has it
+        if url.endswith('/') and not start_key.endswith('/'):
+            start_key += '/'
+
+        end_key = inc_last_char(start_key)
+
+    elif match_type == 'host':
+        if surt_ordered:
+            host = start_key.split(')/')[0]
+
+            start_key = host + ')/'
+            end_key = host + '*'
+        else:
+            host = urlparse.urlsplit(url).netloc
+
+            start_key = host + '/'
+            end_key = host + '0'
+
+    elif match_type == 'domain':
+        if not surt_ordered:
+            raise Exception('matchType=domain unsupported for non-surt')
+
+        host = start_key.split(')/')[0]
+
+        # if tld, use com, as start_key
+        # otherwise, stick with com,example)/
+        if not ',' in host:
+            start_key = host + ','
+        else:
+            start_key = host + ')/'
+
+        end_key = host + '-'
+    else:
+        raise Exception('Invalid match_type: ' + match_type)
+
+    return (start_key, end_key)
+
+
 if __name__ == "__main__":
     import doctest
     doctest.testmod()
diff --git a/pywb/cdx/cdxobject.py b/pywb/cdx/cdxobject.py
index 203cb7ef..4eba8025 100644
--- a/pywb/cdx/cdxobject.py
+++ b/pywb/cdx/cdxobject.py
@@ -77,3 +77,34 @@ class CDXObject(OrderedDict):
 
         li = itertools.imap(lambda (n, val): val, self.items())
         return ' '.join(li)
+
+
+#=================================================================
+class IDXObject(OrderedDict):
+
+    FORMAT = ['urlkey', 'part', 'offset', 'length', 'lineno']
+    NUM_REQ_FIELDS = len(FORMAT) - 1  # lineno is an optional field
+
+    def __init__(self, idxline):
+        OrderedDict.__init__(self)
+
+        idxline = idxline.rstrip()
+        fields = idxline.split('\t')
+
+        if len(fields) < self.NUM_REQ_FIELDS:
+            msg = 'invalid idx format: {0} fields found, {1} required'
+            raise Exception(msg.format(len(fields), self.NUM_REQ_FIELDS))
+
+        for header, field in itertools.izip(self.FORMAT, fields):
+            self[header] = field
+
+        self['offset'] = int(self['offset'])
+        self['length'] = int(self['length'])
+        lineno = self.get('lineno')
+        if lineno:
+            self['lineno'] = int(lineno)
+
+        self.idxline = idxline
+
+    def __str__(self):
+        return self.idxline
diff --git a/pywb/cdx/cdxops.py b/pywb/cdx/cdxops.py
index 58bd920b..247f3d18 100644
--- a/pywb/cdx/cdxops.py
+++ b/pywb/cdx/cdxops.py
@@ -1,4 +1,4 @@
-from cdxobject import CDXObject, AccessException
+from cdxobject import CDXObject, IDXObject, AccessException
 from pywb.utils.timeutils import timestamp_to_sec
 
 import bisect
@@ -56,7 +56,7 @@ def cdx_text_out(cdx, fields):
 def cdx_load_and_filter(sources, params):
     cdx_iter = load_cdx_streams(sources, params)
 
-    cdx_iter = make_cdx_iter(cdx_iter)
+    cdx_iter = make_obj_iter(cdx_iter, params)
 
     if params.get('proxyAll'):
         return cdx_iter
@@ -102,9 +102,15 @@ def load_cdx_streams(sources, params):
 
 
 #=================================================================
-# convert text cdx stream to CDXObject
-def make_cdx_iter(text_iter):
-    return itertools.imap(lambda line: CDXObject(line), text_iter)
+# convert text cdx stream to CDXObject/IDXObject
+def make_obj_iter(text_iter, params):
+    # already converted
+    if params.get('showPagedIndex'):
+        cls = IDXObject
+    else:
+        cls = CDXObject
+
+    return itertools.imap(lambda line: cls(line), text_iter)
 
 
 #=================================================================
diff --git a/pywb/cdx/cdxserver.py b/pywb/cdx/cdxserver.py
index 69f19d21..1a68f7e4 100644
--- a/pywb/cdx/cdxserver.py
+++ b/pywb/cdx/cdxserver.py
@@ -1,10 +1,13 @@
-from canonicalize import UrlCanonicalizer
+from canonicalize import UrlCanonicalizer, calc_search_range
 
 from cdxops import cdx_load
-from cdxsource import CDXSource, CDXFile, RemoteCDXSource
+from cdxsource import CDXSource, CDXFile, RemoteCDXSource, RedisCDXSource
+from zipnum import ZipNumCluster
 from cdxobject import CDXObject, CaptureNotFoundException, CDXException
 from cdxdomainspecific import load_domain_specific_cdx_rules
 
+from pywb.utils.loaders import is_http
+
 from itertools import chain
 import logging
 import os
@@ -14,8 +17,23 @@ import urlparse
 #=================================================================
 class BaseCDXServer(object):
     def __init__(self, **kwargs):
-        self.url_canon = kwargs.get('url_canon', UrlCanonicalizer())
-        self.fuzzy_query = kwargs.get('fuzzy_query')
+        ds_rules = kwargs.get('ds_rules')
+        surt_ordered = kwargs.get('surt_ordered', True)
+
+        # load from domain-specific rules
+        if ds_rules:
+            self.url_canon, self.fuzzy_query = (
+                load_domain_specific_cdx_rules(ds_rules, surt_ordered))
+        # or custom passed in canonicalizer
+        else:
+            self.url_canon = kwargs.get('url_canon')
+            self.fuzzy_query = kwargs.get('fuzzy_query')
+
+        # set default canonicalizer if none set thus far
+        if not self.url_canon:
+            self.url_canon = UrlCanonicalizer(surt_ordered)
+
+        # set perms checker, if any
         self.perms_checker = kwargs.get('perms_checker')
 
     def _check_cdx_iter(self, cdx_iter, params):
@@ -66,7 +84,7 @@ class CDXServer(BaseCDXServer):
 
     def __init__(self, paths, **kwargs):
         super(CDXServer, self).__init__(**kwargs)
-        self.sources = create_cdx_sources(paths)
+        self.sources = create_cdx_sources(paths, kwargs.get('config'))
 
     def load_cdx(self, **params):
         # if key not set, assume 'url' is set and needs canonicalization
@@ -77,7 +95,14 @@ class CDXServer(BaseCDXServer):
                 msg = 'A url= param must be specified to query the cdx server'
                 raise CDXException(msg)
 
-            params['key'] = self.url_canon(url)
+            #params['key'] = self.url_canon(url)
+            match_type = params.get('matchType', 'exact')
+
+            key, end_key = calc_search_range(url=url,
+                                             match_type=match_type,
+                                             url_canon=self.url_canon)
+            params['key'] = key
+            params['end_key'] = end_key
 
         cdx_iter = cdx_load(self.sources, params, self.perms_checker)
 
@@ -124,36 +149,29 @@ def create_cdx_server(config, ds_rules_file=None):
         paths = config.get('index_paths')
         surt_ordered = config.get('surt_ordered', True)
         perms_checker = config.get('perms_checker')
+        pass_config = config
     else:
         paths = config
         surt_ordered = True
         perms_checker = None
+        pass_config = None
 
     logging.debug('CDX Surt-Ordered? ' + str(surt_ordered))
 
-    if ds_rules_file:
-        canon, fuzzy = load_domain_specific_cdx_rules(ds_rules_file,
-                                                      surt_ordered)
-    else:
-        canon, fuzzy = None, None
-
-    if not canon:
-        canon = UrlCanonicalizer(surt_ordered)
-
-    if (isinstance(paths, str) and
-        any(paths.startswith(x) for x in ['http://', 'https://'])):
+    if isinstance(paths, str) and is_http(paths):
         server_cls = RemoteCDXServer
     else:
         server_cls = CDXServer
 
     return server_cls(paths,
-                      url_canon=canon,
-                      fuzzy_query=fuzzy,
+                      config=pass_config,
+                      surt_ordered=surt_ordered,
+                      ds_rules=ds_rules_file,
                       perms_checker=perms_checker)
 
 
 #=================================================================
-def create_cdx_sources(paths):
+def create_cdx_sources(paths, config=None):
     sources = []
 
     if not isinstance(paths, list):
@@ -161,13 +179,13 @@ def create_cdx_sources(paths):
 
     for path in paths:
         if isinstance(path, CDXSource):
-            add_cdx_source(sources, path)
+            add_cdx_source(sources, path, config)
         elif isinstance(path, str):
             if os.path.isdir(path):
                 for file in os.listdir(path):
-                    add_cdx_source(sources, path + file)
+                    add_cdx_source(sources, path + file, config)
             else:
-                add_cdx_source(sources, path)
+                add_cdx_source(sources, path, config)
 
     if len(sources) == 0:
         logging.exception('No CDX Sources Found from: ' + str(sources))
@@ -176,9 +194,9 @@ def create_cdx_sources(paths):
 
 
 #=================================================================
-def add_cdx_source(sources, source):
+def add_cdx_source(sources, source, config):
     if not isinstance(source, CDXSource):
-        source = create_cdx_source(source)
+        source = create_cdx_source(source, config)
         if not source:
             return
 
@@ -187,19 +205,20 @@ def add_cdx_source(sources, source):
 
 
 #=================================================================
-def create_cdx_source(filename):
-    if filename.startswith('http://') or filename.startswith('https://'):
+def create_cdx_source(filename, config):
+    if is_http(filename):
         return RemoteCDXSource(filename)
 
+    if filename.startswith('redis://'):
+        return RedisCDXSource(filename, config)
+
     if filename.endswith('.cdx'):
         return CDXFile(filename)
 
+    if filename.endswith('.summary'):
+        return ZipNumCluster(filename, config)
+
     return None
-    #TODO: support zipnum
-    #elif filename.endswith('.summary')
-    #    return ZipNumCDXSource(filename)
-    #elif filename.startswith('redis://')
-    #    return RedisCDXSource(filename)
 
 
 #=================================================================
diff --git a/pywb/cdx/cdxsource.py b/pywb/cdx/cdxsource.py
index a8c92be5..783cf36b 100644
--- a/pywb/cdx/cdxsource.py
+++ b/pywb/cdx/cdxsource.py
@@ -1,9 +1,9 @@
-from pywb.utils.binsearch import iter_exact, iter_prefix
+from pywb.utils.binsearch import iter_range
 from pywb.utils.loaders import SeekableTextFileReader
 
 import urllib
 import urllib2
-
+import itertools
 
 #=================================================================
 class CDXSource(object):
@@ -24,17 +24,7 @@ class CDXFile(CDXSource):
 
     def load_cdx(self, params):
         source = SeekableTextFileReader(self.filename)
-
-        match_type = params.get('matchType')
-
-        if match_type == 'prefix':
-            iter_func = iter_prefix
-        else:
-            iter_func = iter_exact
-
-        key = params.get('key')
-
-        return iter_func(source, key)
+        return iter_range(source, params.get('key'), params.get('end_key'))
 
     def __str__(self):
         return 'CDX File - ' + self.filename
@@ -90,3 +80,35 @@ class RemoteCDXSource(CDXSource):
 
     def __str__(self):
         return 'Remote CDX Server: ' + self.remote_url
+
+
+#=================================================================
+class RedisCDXSource(CDXSource):
+    DEFAULT_KEY_PREFIX = 'c:'
+
+    def __init__(self, redis_url, config=None):
+        import redis
+        self.redis = redis.StrictRedis.from_url(redis_url)
+
+        self.key_prefix = self.DEFAULT_KEY_PREFIX
+        if config:
+            self.key_prefix = config.get('redis_key_prefix', self.key_prefix)
+        
+
+    def load_cdx(self, params):
+        """
+        Load cdx from redis cache, from an ordered list
+
+        Currently, there is no support for range queries
+        Only 'exact' matchType is supported
+        """
+        key = params['key']
+
+        # ensure only url/surt is part of key
+        key = key.split(' ')[0]
+        cdx_list = self.redis.zrange(self.key_prefix + key, 0, -1)
+
+        # key is not part of list, so prepend to each line
+        key += ' '
+        cdx_list = itertools.imap(lambda x: key + x, cdx_list)
+        return cdx_list
diff --git a/pywb/cdx/test/cdxserver_test.py b/pywb/cdx/test/cdxserver_test.py
index 2d023729..0e799ce9 100644
--- a/pywb/cdx/test/cdxserver_test.py
+++ b/pywb/cdx/test/cdxserver_test.py
@@ -132,8 +132,8 @@ org,iana)/domains/root/db 20140126200928 http://www.iana.org/domains/root/db tex
  ('filename', 'dupes.warc.gz')]
 
 # NOTE: external dependency -- need self-contained test
->>> x = CDXServer('http://web.archive.org/cdx/search/cdx').load_cdx(url = 'example.com', output = 'raw', limit = '2')
->>> pprint.pprint(x.next().items())
+#>>> x = CDXServer('http://web.archive.org/cdx/search/cdx').load_cdx(url = 'example.com', output = 'raw', limit = '2')
+#>>> pprint.pprint(x.next().items())
 [('urlkey', 'com,example)/'),
  ('timestamp', '20020120142510'),
  ('original', 'http://example.com:80/'),
diff --git a/pywb/cdx/zipnum.py b/pywb/cdx/zipnum.py
new file mode 100644
index 00000000..847c660f
--- /dev/null
+++ b/pywb/cdx/zipnum.py
@@ -0,0 +1,203 @@
+import os
+import collections
+import itertools
+import logging
+from cStringIO import StringIO
+import datetime
+
+from cdxsource import CDXSource
+from cdxobject import IDXObject
+
+from pywb.utils.loaders import BlockLoader
+from pywb.utils.loaders import SeekableTextFileReader
+from pywb.utils.bufferedreaders import gzip_decompressor
+from pywb.utils.binsearch import iter_range, linearsearch
+
+
+#=================================================================
+class ZipBlocks:
+    def __init__(self, part, offset, length, count):
+        self.part = part
+        self.offset = offset
+        self.length = length
+        self.count = count
+
+
+#=================================================================
+def readline_to_iter(stream):
+    try:
+        count = 0
+        buff = stream.readline()
+        while buff:
+            count += 1
+            yield buff
+            buff = stream.readline()
+
+    finally:
+        stream.close()
+
+
+#=================================================================
+class ZipNumCluster(CDXSource):
+    DEFAULT_RELOAD_INTERVAL = 10  # in minutes
+    DEFAULT_MAX_BLOCKS = 50
+
+    def __init__(self, summary, config=None):
+
+        loc = None
+        cookie_maker = None
+        self.max_blocks = self.DEFAULT_MAX_BLOCKS
+        reload_ival = self.DEFAULT_RELOAD_INTERVAL
+
+        if config:
+            loc = config.get('zipnum_loc')
+            cookie_maker = config.get('cookie_maker')
+
+            self.max_blocks = config.get('max_blocks', self.max_blocks)
+
+            reload_ival = config.get('reload_interval', reload_ival)
+
+        if not loc:
+            splits = os.path.splitext(summary)
+            loc = splits[0] + '.loc'
+
+        self.summary = summary
+        self.loc_filename = loc
+
+        # initial loc map
+        self.loc_map = {}
+        self.loc_mtime = 0
+        self.load_loc()
+
+        # reload interval
+        self.loc_update_time = datetime.datetime.now()
+        self.reload_interval = datetime.timedelta(minutes=reload_ival)
+
+        self.blk_loader = BlockLoader(cookie_maker=cookie_maker)
+
+    def load_loc(self):
+        # check modified time of current file before loading
+        new_mtime = os.path.getmtime(self.loc_filename)
+        if (new_mtime == self.loc_mtime):
+            return
+
+        # update loc file mtime
+        self.loc_mtime = new_mtime
+
+        logging.debug('Loading loc from: ' + self.loc_filename)
+        with open(self.loc_filename) as fh:
+            for line in fh:
+                parts = line.rstrip().split('\t')
+                self.loc_map[parts[0]] = parts[1:]
+
+    @staticmethod
+    def reload_timed(timestamp, val, delta, func):
+        now = datetime.datetime.now()
+        if now - timestamp >= delta:
+            func()
+            return now
+        return None
+
+    def reload_loc(self):
+        reload_time = self.reload_timed(self.loc_update_time,
+                                        self.loc_map,
+                                        self.reload_interval,
+                                        self.load_loc)
+
+        if reload_time:
+            self.loc_update_time = reload_time
+
+    def lookup_loc(self, part):
+        return self.loc_map[part]
+
+    def load_cdx(self, params):
+        self.reload_loc()
+
+        reader = SeekableTextFileReader(self.summary)
+
+        idx_iter = iter_range(reader,
+                              params['key'],
+                              params['end_key'],
+                              prev_size=1)
+
+        if params.get('showPagedIndex'):
+            params['proxyAll'] = True
+            return idx_iter
+        else:
+            blocks = self.idx_to_cdx(idx_iter, params)
+
+            def gen_cdx():
+                for blk in blocks:
+                    for cdx in blk:
+                        yield cdx
+
+            return gen_cdx()
+
+    def idx_to_cdx(self, idx_iter, params):
+        blocks = None
+        ranges = []
+
+        for idx in idx_iter:
+            idx = IDXObject(idx)
+
+            if (blocks and blocks.part == idx['part'] and
+                blocks.offset + blocks.length == idx['offset'] and
+                blocks.count < self.max_blocks):
+
+                    blocks.length += idx['length']
+                    blocks.count += 1
+                    ranges.append(idx['length'])
+
+            else:
+                if blocks:
+                    yield self.block_to_cdx_iter(blocks, ranges, params)
+
+                blocks = ZipBlocks(idx['part'],
+                                   idx['offset'],
+                                   idx['length'],
+                                   1)
+
+                ranges = [blocks.length]
+
+        if blocks:
+            yield self.block_to_cdx_iter(blocks, ranges, params)
+
+    def block_to_cdx_iter(self, blocks, ranges, params):
+        last_exc = None
+        last_traceback = None
+
+        for location in self.lookup_loc(blocks.part):
+            try:
+                return self.load_blocks(location, blocks, ranges, params)
+            except Exception as exc:
+                last_exc = exc
+                import sys
+                last_traceback = sys.exc_info()[2]
+
+        if last_exc:
+            raise exc, None, last_traceback
+        else:
+            raise Exception('No Locations Found for: ' + block.part)
+
+    def load_blocks(self, location, blocks, ranges, params):
+
+        if (logging.getLogger().getEffectiveLevel() <= logging.DEBUG):
+            msg = 'Loading {b.count} blocks from {loc}:{b.offset}+{b.length}'
+            logging.debug(msg.format(b=blocks, loc=location))
+
+        reader = self.blk_loader.load(location, blocks.offset, blocks.length)
+
+        def decompress_block(range_):
+            decomp = gzip_decompressor()
+            buff = decomp.decompress(reader.read(range_))
+            return readline_to_iter(StringIO(buff))
+
+        iter_ = itertools.chain(*itertools.imap(decompress_block, ranges))
+
+        # start bound
+        iter_ = linearsearch(iter_, params['key'])
+
+        # end bound
+        end = params['end_key']
+        iter_ = itertools.takewhile(lambda line: line < end, iter_)
+        return iter_
diff --git a/pywb/handlers.py b/pywb/handlers.py
index 4be855e3..c82db7fe 100644
--- a/pywb/handlers.py
+++ b/pywb/handlers.py
@@ -10,19 +10,28 @@ from wbexceptions import WbException, NotFoundException
 from views import TextCapturesView
 
 
-class BaseHandler:
-    @staticmethod
-    def get_wburl_type():
-        return WbUrl
-
+#=================================================================
+class BaseHandler(object):
     def __call__(self, wbrequest):
         return wbrequest
 
+    def get_wburl_type(self):
+        return None
+
+
+#=================================================================
+class WbUrlHandler(BaseHandler):
+    def get_wburl_type(self):
+        return WbUrl
+
+
 #=================================================================
 # Standard WB Handler
 #=================================================================
-class WBHandler(BaseHandler):
-    def __init__(self, index_reader, replay, html_view = None, search_view = None):
+class WBHandler(WbUrlHandler):
+    def __init__(self, index_reader, replay,
+                 html_view=None, search_view=None):
+
         self.index_reader = index_reader
         self.replay = replay
 
@@ -31,7 +40,6 @@ class WBHandler(BaseHandler):
         self.html_view = html_view
         self.search_view = search_view
 
-
     def __call__(self, wbrequest):
         if wbrequest.wb_url_str == '/':
             return self.render_search_page(wbrequest)
@@ -61,6 +69,7 @@ class WBHandler(BaseHandler):
     def __str__(self):
         return 'WBHandler: ' + str(self.index_reader) + ', ' + str(self.replay)
 
+
 #=================================================================
 # CDX-Server Handler -- pass all params to cdx server
 #=================================================================
@@ -75,11 +84,6 @@ class CDXHandler(BaseHandler):
 
         return self.view.render_response(wbrequest, cdx_lines)
 
-
-    @staticmethod
-    def get_wburl_type():
-        return None
-
     def __str__(self):
         return 'Index Reader: ' + str(self.index_reader)
 
@@ -115,10 +119,6 @@ class StaticHandler(BaseHandler):
         except IOError:
             raise NotFoundException('Static File Not Found: ' + wbrequest.wb_url_str)
 
-    @staticmethod
-    def get_wburl_type():
-        return None
-
     def __str__(self):
         return 'Static files from ' + self.static_path
 
@@ -130,6 +130,7 @@ class DebugEchoEnvHandler(BaseHandler):
     def __call__(self, wbrequest):
         return WbResponse.text_response(str(wbrequest.env))
 
+
 #=================================================================
 class DebugEchoHandler(BaseHandler):
     def __call__(self, wbrequest):
@@ -150,5 +151,3 @@ class PerfTimer:
         self.end = time.clock()
         if self.perfdict is not None:
             self.perfdict[self.name] = str(self.end - self.start)
-
-
diff --git a/pywb/indexreader.py b/pywb/indexreader.py
index b55de029..cea27a8f 100644
--- a/pywb/indexreader.py
+++ b/pywb/indexreader.py
@@ -37,7 +37,7 @@ class IndexReader(object):
     def load_cdx(self, **params):
         return self.cdx_server.load_cdx(**params)
 
-    def get_query_params(self, wburl, limit = 150000, collapse_time = None, replay_closest = 10):
+    def get_query_params(self, wburl, limit = 150000, collapse_time = None, replay_closest = 100):
         if wburl.type == wburl.URL_QUERY:
             raise NotImplementedError('Url Query Not Yet Supported')
 
diff --git a/pywb/proxy.py b/pywb/proxy.py
index 107f9d96..fc14d1e5 100644
--- a/pywb/proxy.py
+++ b/pywb/proxy.py
@@ -45,14 +45,14 @@ class ProxyRouter:
             return None
 
         wbrequest = WbRequest(env,
-                              request_uri = url,
-                              wb_url_str = url,
-                              wb_prefix = '',
-                              coll = '',
-                              host_prefix = self.hostpaths[0],
-                              wburl_class = self.handler.get_wburl_type(),
-                              url_rewriter_class = ProxyHttpsUrlRewriter,
-                              is_proxy = True)
+                              request_uri=url,
+                              wb_url_str=url,
+                              #rel_prefix=url,
+                              #host_prefix=self.hostpaths[0],
+                              wburl_class=self.handler.get_wburl_type(),
+                              urlrewriter_class=ProxyHttpsUrlRewriter,
+                              use_abs_prefix=False,
+                              is_proxy=True)
 
         return self.handler(wbrequest)
 
diff --git a/pywb/replay_views.py b/pywb/replay_views.py
index f5f9c504..4c6907eb 100644
--- a/pywb/replay_views.py
+++ b/pywb/replay_views.py
@@ -7,7 +7,6 @@ from wbrequestresponse import WbResponse
 from wbexceptions import CaptureException, InternalRedirect
 from pywb.warc.recordloader import ArchiveLoadFailed
 
-
 #=================================================================
 class ReplayView:
     def __init__(self, content_loader, content_rewriter, head_insert_view = None,
@@ -49,6 +48,9 @@ class ReplayView:
                 # check if redir is needed
                 self._redirect_if_needed(wbrequest, cdx)
 
+                # one more check for referrer-based self-redirect
+                self._reject_referrer_self_redirect(wbrequest, status_headers)
+
                 response = None
 
                 if self.content_rewriter and wbrequest.wb_url.mod != 'id_':
@@ -148,6 +150,7 @@ class ReplayView:
 
 
     def _reject_self_redirect(self, wbrequest, cdx, status_headers):
+        # self-redirect via location
         if status_headers.statusline.startswith('3'):
             request_url = wbrequest.wb_url.url.lower()
             location_url = status_headers.get_header('Location').lower()
@@ -156,3 +159,16 @@ class ReplayView:
             if (UrlRewriter.strip_protocol(request_url) == UrlRewriter.strip_protocol(location_url)):
                 raise CaptureException('Self Redirect: ' + str(cdx))
 
+    def _reject_referrer_self_redirect(self, wbrequest, status_headers):
+        # at correct timestamp now, but must check for referrer redirect
+        # indirect self-redirect, via meta-refresh, if referrer is same as current url
+        if status_headers.statusline.startswith('2'):
+            # build full url even if using relative-rewriting
+            request_url = wbrequest.host_prefix + wbrequest.rel_prefix + str(wbrequest.wb_url)
+            referrer_url = wbrequest.referrer
+            if (referrer_url and UrlRewriter.strip_protocol(request_url) == UrlRewriter.strip_protocol(referrer_url)):
+                raise CaptureException('Self Redirect via Referrer: ' + str(wbrequest.wb_url))
+
+
+
+
diff --git a/pywb/rewrite/rewrite_content.py b/pywb/rewrite/rewrite_content.py
index 9f904764..81cd23c9 100644
--- a/pywb/rewrite/rewrite_content.py
+++ b/pywb/rewrite/rewrite_content.py
@@ -6,7 +6,7 @@ from regex_rewriters import RegexRewriter, JSRewriter, CSSRewriter, XMLRewriter
 from header_rewriter import HeaderRewriter, RewrittenStatusAndHeaders
 
 from pywb.utils.statusandheaders import StatusAndHeaders
-from pywb.utils.bufferedreaders import BufferedReader, ChunkedDataReader
+from pywb.utils.bufferedreaders import DecompressingBufferedReader, ChunkedDataReader
 
 class RewriteContent:
 
@@ -54,7 +54,7 @@ class RewriteContent:
         # =========================================================================
         # special case -- need to ungzip the body
         if (rewritten_headers.contains_removed_header('content-encoding', 'gzip')):
-            stream = BufferedReader(stream, decomp_type='gzip')
+            stream = DecompressingBufferedReader(stream, decomp_type='gzip')
 
         if rewritten_headers.charset:
             encoding = rewritten_headers.charset
diff --git a/pywb/rewrite/test/test_rewrite_live.py b/pywb/rewrite/test/test_rewrite_live.py
index 691bec6d..6d66ce60 100644
--- a/pywb/rewrite/test/test_rewrite_live.py
+++ b/pywb/rewrite/test/test_rewrite_live.py
@@ -24,9 +24,9 @@ def test_example_2():
 
 
 
-def test_example_3():
-    status_headers, buff = get_rewritten('http://archive.org/', urlrewriter)
+#def test_example_3():
+#    status_headers, buff = get_rewritten('http://archive.org/', urlrewriter)
 
-    assert '/pywb/20131226101010/http://example.com/about/terms.php' in buff, buff
+#    assert '/pywb/20131226101010/http://example.com/about/terms.php' in buff, buff
 
 
diff --git a/pywb/rewrite/url_rewriter.py b/pywb/rewrite/url_rewriter.py
index c4cc4054..6889fc92 100644
--- a/pywb/rewrite/url_rewriter.py
+++ b/pywb/rewrite/url_rewriter.py
@@ -103,10 +103,12 @@ class UrlRewriter:
 
         return self.prefix + self.wburl.to_str(timestamp=timestamp, url=url)
 
-
     def set_base_url(self, newUrl):
         self.wburl.url = newUrl
 
+    def __repr__(self):
+        return "UrlRewriter('{0}', '{1}')".format(self.wburl, self.prefix)
+
     @staticmethod
     def strip_protocol(url):
         for protocol in UrlRewriter.PROTOCOLS:
diff --git a/pywb/rewrite/wburl.py b/pywb/rewrite/wburl.py
index 77bd437d..6be56b6c 100644
--- a/pywb/rewrite/wburl.py
+++ b/pywb/rewrite/wburl.py
@@ -1,9 +1,5 @@
 #!/usr/bin/python
 
-import re
-import rfc3987
-
-# WbUrl : wb archival url representation for WB
 """
 WbUrl represents the standard wayback archival url format.
 A regular url is a subset of the WbUrl (latest replay).
@@ -34,9 +30,38 @@ replay form:
 
 latest_replay: (no timestamp)
 http://example.com
+
+Additionally, the BaseWbUrl provides the base components
+(url, timestamp, end_timestamp, modifier, type) which
+can be used to provide a custom representation of the
+wayback url format.
+
 """
 
-class WbUrl:
+import re
+import rfc3987
+
+
+#=================================================================
+class BaseWbUrl(object):
+    QUERY = 'query'
+    URL_QUERY = 'url_query'
+    REPLAY = 'replay'
+    LATEST_REPLAY = 'latest_replay'
+
+
+    def __init__(self, url='', mod='',
+                 timestamp='', end_timestamp='', type=None):
+
+        self.url = url
+        self.timestamp = timestamp
+        self.end_timestamp = end_timestamp
+        self.mod = mod
+        self.type = type
+
+
+#=================================================================
+class WbUrl(BaseWbUrl):
     """
     # Replay Urls
     # ======================
@@ -107,22 +132,14 @@ class WbUrl:
     QUERY_REGEX = re.compile('^(?:([\w\-:]+)/)?(\d*)(?:-(\d+))?\*/?(.*)$')
     REPLAY_REGEX = re.compile('^(\d*)([a-z]+_)?/{0,3}(.*)$')
 
-    QUERY = 'query'
-    URL_QUERY = 'url_query'
-    REPLAY = 'replay'
-    LATEST_REPLAY = 'latest_replay'
-
     DEFAULT_SCHEME = 'http://'
     # ======================
 
 
     def __init__(self, url):
+        super(WbUrl, self).__init__()
+
         self.original_url = url
-        self.type = None
-        self.url = ''
-        self.timestamp = ''
-        self.end_timestamp = ''
-        self.mod = ''
 
         if not any (f(url) for f in [self._init_query, self._init_replay]):
             raise Exception('Invalid WbUrl: ', url)
diff --git a/tests/test_archivalrouter.py b/pywb/test/test_archivalrouter.py
similarity index 73%
rename from tests/test_archivalrouter.py
rename to pywb/test/test_archivalrouter.py
index 415626e6..4379fbfd 100644
--- a/tests/test_archivalrouter.py
+++ b/pywb/test/test_archivalrouter.py
@@ -1,13 +1,19 @@
 """
-Test Route
-# route with relative path
->>> Route('web', BaseHandler())({'REL_REQUEST_URI': '/web/test.example.com', 'SCRIPT_NAME': ''}, False)
-{'wb_url': ('latest_replay', '', '', 'http://test.example.com', 'http://test.example.com'), 'coll': 'web', 'wb_prefix': '/web/', 'request_uri': '/web/test.example.com'}
+# Test WbRequest parsed via a Route
+# route with relative path, print resulting wbrequest
+>>> print_req(Route('web', WbUrlHandler())({'REL_REQUEST_URI': '/web/test.example.com', 'SCRIPT_NAME': ''}, False))
+{'coll': 'web',
+ 'request_uri': '/web/test.example.com',
+ 'wb_prefix': '/web/',
+ 'wb_url': ('latest_replay', '', '', 'http://test.example.com', 'http://test.example.com')}
 
-# route with absolute path, running at script /my_pywb
->>> Route('web', BaseHandler())({'REL_REQUEST_URI': '/web/2013im_/test.example.com', 'SCRIPT_NAME': '/my_pywb', 'HTTP_HOST': 'localhost:8081', 'wsgi.url_scheme': 'https'}, True)
-{'wb_url': ('replay', '2013', 'im_', 'http://test.example.com', '2013im_/http://test.example.com'), 'coll': 'web', 'wb_prefix': 'https://localhost:8081/my_pywb/web/', 'request_uri': '/web/2013im_/test.example.com'}
 
+# route with absolute path, running at script /my_pywb, print resultingwbrequest
+>>> print_req(Route('web', WbUrlHandler())({'REL_REQUEST_URI': '/web/2013im_/test.example.com', 'SCRIPT_NAME': '/my_pywb', 'HTTP_HOST': 'localhost:8081', 'wsgi.url_scheme': 'https'}, True))
+{'coll': 'web',
+ 'request_uri': '/web/2013im_/test.example.com',
+ 'wb_prefix': 'https://localhost:8081/my_pywb/web/',
+ 'wb_url': ('replay', '2013', 'im_', 'http://test.example.com', '2013im_/http://test.example.com')}
 
 # not matching route -- skipped
 >>> Route('web', BaseHandler())({'REL_REQUEST_URI': '/other/test.example.com', 'SCRIPT_NAME': ''}, False)
@@ -65,7 +71,12 @@ False
 """
 
 from pywb.archivalrouter import Route, ReferRedirect
-from pywb.handlers import BaseHandler
+from pywb.handlers import BaseHandler, WbUrlHandler
+import pprint
+
+def print_req(req):
+    varlist = vars(req)
+    pprint.pprint({k: varlist[k] for k in ('request_uri', 'wb_prefix', 'wb_url', 'coll')})
 
 
 def _test_redir(match_host, request_uri, referrer, script_name = '', coll = 'coll', http_host = None):
@@ -74,7 +85,7 @@ def _test_redir(match_host, request_uri, referrer, script_name = '', coll = 'col
     if http_host:
         env['HTTP_HOST'] = http_host
 
-    routes = [Route(coll, BaseHandler())]
+    routes = [Route(coll, WbUrlHandler())]
 
     redir = ReferRedirect(match_host)
     #req = WbRequest.from_uri(request_uri, env)
@@ -85,4 +96,6 @@ def _test_redir(match_host, request_uri, referrer, script_name = '', coll = 'col
     return rep.status_headers.get_header('Location')
 
 
-
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod()
diff --git a/pywb/test/test_wbrequestresponse.py b/pywb/test/test_wbrequestresponse.py
new file mode 100644
index 00000000..600ec926
--- /dev/null
+++ b/pywb/test/test_wbrequestresponse.py
@@ -0,0 +1,87 @@
+"""
+# WbRequest Tests
+# =================
+>>> print_req_from_uri('/save/_embed/example.com/?a=b')
+{'wb_url': ('latest_replay', '', '', 'http://_embed/example.com/?a=b', 'http://_embed/example.com/?a=b'), 'coll': 'save', 'wb_prefix': '/save/', 'request_uri': '/save/_embed/example.com/?a=b'}
+
+>>> print_req_from_uri('/2345/20101024101112im_/example.com/?b=c')
+{'wb_url': ('replay', '20101024101112', 'im_', 'http://example.com/?b=c', '20101024101112im_/http://example.com/?b=c'), 'coll': '2345', 'wb_prefix': '/2345/', 'request_uri': '/2345/20101024101112im_/example.com/?b=c'}
+
+>>> print_req_from_uri('/2010/example.com')
+{'wb_url': ('latest_replay', '', '', 'http://example.com', 'http://example.com'), 'coll': '2010', 'wb_prefix': '/2010/', 'request_uri': '/2010/example.com'}
+
+>>> print_req_from_uri('../example.com')
+{'wb_url': ('latest_replay', '', '', 'http://example.com', 'http://example.com'), 'coll': '', 'wb_prefix': '/', 'request_uri': '../example.com'}
+
+# Abs path
+>>> print_req_from_uri('/2010/example.com', {'wsgi.url_scheme': 'https', 'HTTP_HOST': 'localhost:8080'}, use_abs_prefix = True)
+{'wb_url': ('latest_replay', '', '', 'http://example.com', 'http://example.com'), 'coll': '2010', 'wb_prefix': 'https://localhost:8080/2010/', 'request_uri': '/2010/example.com'}
+
+# No Scheme, so stick to relative
+>>> print_req_from_uri('/2010/example.com', {'HTTP_HOST': 'localhost:8080'}, use_abs_prefix = True)
+{'wb_url': ('latest_replay', '', '', 'http://example.com', 'http://example.com'), 'coll': '2010', 'wb_prefix': '/2010/', 'request_uri': '/2010/example.com'}
+
+
+
+# WbResponse Tests
+# =================
+>>> WbResponse.text_response('Test')
+{'body': ['Test'], 'status_headers': StatusAndHeaders(protocol = '', statusline = '200 OK', headers = [('Content-Type', 'text/plain')])}
+
+>>> WbResponse.text_stream(['Test', 'Another'], '404')
+{'body': ['Test', 'Another'], 'status_headers': StatusAndHeaders(protocol = '', statusline = '404', headers = [('Content-Type', 'text/plain')])}
+
+>>> WbResponse.redir_response('http://example.com/otherfile')
+{'body': [], 'status_headers': StatusAndHeaders(protocol = '', statusline = '302 Redirect', headers = [('Location', 'http://example.com/otherfile')])}
+
+"""
+
+
+from pywb.rewrite.wburl import WbUrl
+from pywb.rewrite.url_rewriter import UrlRewriter
+from pywb.utils.statusandheaders import StatusAndHeaders
+
+from pywb.wbrequestresponse import WbRequest, WbResponse
+
+
+def print_req_from_uri(request_uri, env={}, use_abs_prefix=False):
+    response = req_from_uri(request_uri, env, use_abs_prefix)
+    varlist = vars(response)
+    print str({k: varlist[k] for k in ('request_uri', 'wb_prefix', 'wb_url', 'coll')})
+
+
+def req_from_uri(request_uri, env={}, use_abs_prefix=False):
+    if not request_uri:
+        request_uri = env.get('REL_REQUEST_URI')
+
+    parts = request_uri.split('/', 2)
+
+    # Has coll prefix
+    if len(parts) == 3:
+        rel_prefix = '/' + parts[1] + '/'
+        wb_url_str = parts[2]
+        coll = parts[1]
+    # No Coll Prefix
+    elif len(parts) == 2:
+        rel_prefix = '/'
+        wb_url_str = parts[1]
+        coll = ''
+    else:
+        rel_prefix = '/'
+        wb_url_str = parts[0]
+        coll = ''
+
+    return WbRequest(env,
+                     request_uri=request_uri,
+                     rel_prefix=rel_prefix,
+                     wb_url_str=wb_url_str,
+                     coll=coll,
+                     wburl_class=WbUrl,
+                     urlrewriter_class=UrlRewriter,
+                     use_abs_prefix=use_abs_prefix)
+
+
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod()
+
diff --git a/pywb/utils/binsearch.py b/pywb/utils/binsearch.py
index 96b2e9de..7d939c18 100644
--- a/pywb/utils/binsearch.py
+++ b/pywb/utils/binsearch.py
@@ -35,6 +35,58 @@ def binsearch_offset(reader, key, compare_func=cmp, block_size=8192):
     return min_ * block_size
 
 
+#=================================================================
+def binsearch(reader, key, compare_func=cmp, block_size=8192):
+    """
+    Perform a binary search for a specified key to within a 'block_size'
+    (default 8192) granularity, and return first full line found.
+    """
+
+    min_ = binsearch_offset(reader, key, compare_func, block_size)
+
+    reader.seek(min_)
+
+    if min_ > 0:
+        reader.readline()  # skip partial line
+
+    def gen_iter(line):
+        while line:
+            yield line.rstrip()
+            line = reader.readline()
+
+    return gen_iter(reader.readline())
+
+
+#=================================================================
+def linearsearch(iter_, key, prev_size=0, compare_func=cmp):
+    """
+    Perform a linear search over iterator until
+    current_line >= key
+
+    optionally also tracking upto N previous lines, which are
+    returned before the first matched line.
+
+    if end of stream is reached before a match is found,
+    nothing is returned (prev lines discarded also)
+    """
+
+    prev_deque = deque(maxlen=prev_size + 1)
+
+    matched = False
+
+    for line in iter_:
+        prev_deque.append(line)
+        if compare_func(line, key) >= 0:
+            matched = True
+            break
+
+    # no matches, so return empty iterator
+    if not matched:
+        return []
+
+    return itertools.chain(prev_deque, iter_)
+
+
 #=================================================================
 def search(reader, key, prev_size=0, compare_func=cmp, block_size=8192):
     """
@@ -45,46 +97,27 @@ def search(reader, key, prev_size=0, compare_func=cmp, block_size=8192):
     When performin_g linear search, keep track of up to N previous lines before
     first matching line.
     """
-    min_ = binsearch_offset(reader, key, compare_func, block_size)
+    iter_ = binsearch(reader, key, compare_func, block_size)
+    iter_ = linearsearch(iter_,
+                         key, prev_size=prev_size,
+                         compare_func=compare_func)
+    return iter_
 
-    reader.seek(min_)
 
-    if min_ > 0:
-        reader.readline()  # skip partial line
+#=================================================================
+def iter_range(reader, start, end, prev_size=0):
+    """
+    Creates an iterator which iterates over lines where
+    start <= line < end (end exclusive)
+    """
 
-    if prev_size > 1:
-        prev_deque = deque(max_len=prev_size)
+    iter_ = search(reader, start, prev_size=prev_size)
 
-    line = None
+    end_iter = itertools.takewhile(
+       lambda line: line < end,
+       iter_)
 
-    while True:
-        line = reader.readline()
-        if not line:
-            break
-        if compare_func(line, key) >= 0:
-            break
-
-        if prev_size == 1:
-            prev = line
-        elif prev_size > 1:
-            prev_deque.append(line)
-
-    def gen_iter(line):
-        """
-        Create iterator over any previous lines to
-        current matched line
-        """
-        if prev_size == 1:
-            yield prev.rstrip()
-        elif prev_size > 1:
-            for i in prev_deque:
-                yield i.rstrip()
-
-        while line:
-            yield line.rstrip()
-            line = reader.readline()
-
-    return gen_iter(line)
+    return end_iter
 
 
 #=================================================================
diff --git a/pywb/utils/bufferedreaders.py b/pywb/utils/bufferedreaders.py
index 27a3ed33..6be38b85 100644
--- a/pywb/utils/bufferedreaders.py
+++ b/pywb/utils/bufferedreaders.py
@@ -11,7 +11,7 @@ def gzip_decompressor():
 
 
 #=================================================================
-class BufferedReader(object):
+class DecompressingBufferedReader(object):
     """
     A wrapping line reader which wraps an existing reader.
     Read operations operate on underlying buffer, which is filled to
@@ -29,7 +29,7 @@ class BufferedReader(object):
 
     DECOMPRESSORS = {'gzip': gzip_decompressor}
 
-    def __init__(self, stream, max_len=0, block_size=1024, decomp_type=None):
+    def __init__(self, stream, block_size=1024, decomp_type=None):
         self.stream = stream
         self.block_size = block_size
 
@@ -44,24 +44,19 @@ class BufferedReader(object):
 
         self.buff = None
         self.num_read = 0
-        self.max_len = max_len
 
     def _fillbuff(self, block_size=None):
         if not block_size:
             block_size = self.block_size
 
         if not self.buff or self.buff.pos >= self.buff.len:
-            if self.max_len > 0:
-                to_read = min(self.max_len - self.num_read, self.block_size)
-            else:
-                to_read = self.block_size
-
-            data = self.stream.read(to_read)
+            data = self.stream.read(block_size)
             self._process_read(data)
 
     def _process_read(self, data):
         data = self._decompress(data)
-        self.num_read += len(data)
+        self.buff_size = len(data)
+        self.num_read += self.buff_size
         self.buff = StringIO.StringIO(data)
 
     def _decompress(self, data):
@@ -78,12 +73,40 @@ class BufferedReader(object):
         return data
 
     def read(self, length=None):
+        """
+        Fill bytes and read some number of bytes
+        (up to length if specified)
+        < length bytes may be read if reached the end of input
+        or at a buffer boundary. If at a boundary, the subsequent
+        call will fill buffer anew.
+        """
         self._fillbuff()
         return self.buff.read(length)
 
     def readline(self, length=None):
+        """
+        Fill buffer and read a full line from the buffer
+        (up to specified length, if provided)
+        If no newline found at end, try filling buffer again in case
+        at buffer boundary.
+        """
         self._fillbuff()
-        return self.buff.readline(length)
+        linebuff = self.buff.readline(length)
+        # we may be at a boundary
+        while not linebuff.endswith('\n'):
+            if length:
+                length -= len(linebuff)
+                if length <= 0:
+                    break
+
+            self._fillbuff()
+
+            if self.buff_size == 0:
+                break
+
+            linebuff += self.buff.readline(length)
+
+        return linebuff
 
     def close(self):
         if self.stream:
@@ -97,7 +120,7 @@ class ChunkedDataException(Exception):
 
 
 #=================================================================
-class ChunkedDataReader(BufferedReader):
+class ChunkedDataReader(DecompressingBufferedReader):
     r"""
     A ChunkedDataReader is a BufferedReader which also supports de-chunking
     of the data if it happens to be http 'chunk-encoded'.
@@ -133,7 +156,7 @@ class ChunkedDataReader(BufferedReader):
 
     def _fillbuff(self, block_size=None):
         if self.not_chunked:
-            return BufferedReader._fillbuff(self, block_size)
+            return super(ChunkedDataReader, self)._fillbuff(block_size)
 
         if self.all_chunks_read:
             return
diff --git a/pywb/utils/loaders.py b/pywb/utils/loaders.py
index 4d458738..a117f539 100644
--- a/pywb/utils/loaders.py
+++ b/pywb/utils/loaders.py
@@ -9,18 +9,50 @@ import urllib2
 import time
 
 
+def is_http(filename):
+    return any(filename.startswith(x) for x in ['http://', 'https://'])
+
+
 #=================================================================
-# load a reader from http
-#=================================================================
-class HttpLoader(object):
+class BlockLoader(object):
     """
-    Load a file-like reader over http using range requests
-    and an optional cookie created via a cookie_maker
+    a loader which can stream blocks of content
+    given a uri, offset and optional length.
+    Currently supports: http/https and file/local file system
     """
     def __init__(self, cookie_maker=None):
         self.cookie_maker = cookie_maker
 
     def load(self, url, offset, length):
+        """
+        Determine loading method based on uri
+        """
+        if is_http(url):
+            return self.load_http(url, offset, length)
+        else:
+            return self.load_file(url, offset, length)
+
+    def load_file(self, url, offset, length):
+        """
+        Load a file-like reader from the local file system
+        """
+
+        if url.startswith('file://'):
+            url = url[len('file://'):]
+
+        afile = open(url, 'rb')
+        afile.seek(offset)
+
+        if length > 0:
+            return LimitReader(afile, length)
+        else:
+            return afile
+
+    def load_http(self, url, offset, length):
+        """
+        Load a file-like reader over http using range requests
+        and an optional cookie created via a cookie_maker
+        """
         if length > 0:
             range_header = 'bytes={0}-{1}'.format(offset, offset + length - 1)
         else:
@@ -71,25 +103,6 @@ class HMACCookieMaker(object):
         return cookie
 
 
-#=================================================================
-# load a reader from local filesystem
-#=================================================================
-class FileLoader(object):
-    """
-    Load a file-like reader from the local file system
-    """
-
-    def load(self, url, offset, length):
-        if url.startswith('file://'):
-            url = url[len('file://'):]
-
-        afile = open(url, 'rb')
-        afile.seek(offset)
-
-        if length > 0:
-            return LimitReader(afile, length)
-
-
 #=================================================================
 # Limit Reader
 #=================================================================
diff --git a/pywb/utils/statusandheaders.py b/pywb/utils/statusandheaders.py
index 01bb6614..92e897fc 100644
--- a/pywb/utils/statusandheaders.py
+++ b/pywb/utils/statusandheaders.py
@@ -65,23 +65,36 @@ class StatusAndHeadersParser(object):
         """
         parse stream for status line and headers
         return a StatusAndHeaders object
+
+        support continuation headers starting with space or tab
         """
         statusline = stream.readline().rstrip()
 
         protocol_status = self.split_prefix(statusline, self.statuslist)
 
         if not protocol_status:
-            msg = 'Expected Status Line - Found: ' + statusline
+            msg = 'Expected Status Line starting with {0} - Found: {1}'
+            msg = msg.format(self.statuslist, statusline)
             raise StatusAndHeadersParserException(msg, statusline)
 
         headers = []
 
         line = stream.readline().rstrip()
-        while line and line != '\r\n':
+        while line:
             name, value = line.split(':', 1)
-            header = (name, value.strip())
+            name = name.rstrip(' \t')
+            value = value.lstrip()
+
+            next_line = stream.readline().rstrip()
+
+            # append continuation lines, if any
+            while next_line and next_line.startswith((' ', '\t')):
+                value += next_line
+                next_line = stream.readline().rstrip()
+
+            header = (name, value)
             headers.append(header)
-            line = stream.readline().rstrip()
+            line = next_line
 
         return StatusAndHeaders(statusline=protocol_status[1].strip(),
                                 headers=headers,
@@ -107,4 +120,3 @@ class StatusAndHeadersParserException(Exception):
     def __init__(self, msg, statusline):
         super(StatusAndHeadersParserException, self).__init__(msg)
         self.statusline = statusline
-
diff --git a/pywb/utils/test/binsearch_test.py b/pywb/utils/test/binsearch_test.py
index d35551ec..40ea1f58 100644
--- a/pywb/utils/test/binsearch_test.py
+++ b/pywb/utils/test/binsearch_test.py
@@ -9,6 +9,7 @@ org,iana)/domains/root/db 20140126200927 http://www.iana.org/domains/root/db/ te
 org,iana)/domains/root/db 20140126200928 http://www.iana.org/domains/root/db text/html 200 DHXA725IW5VJJFRTWBQT6BEZKRE7H57S - - 18365 672225 iana.warc.gz
 org,iana)/domains/root/servers 20140126201227 http://www.iana.org/domains/root/servers text/html 200 AFW34N3S4NK2RJ6QWMVPB5E2AIUETAHU - - 3137 733840 iana.warc.gz
 
+# Exact Search
 >>> print_binsearch_results('org,iana)/domains/root', iter_exact)
 org,iana)/domains/root 20140126200912 http://www.iana.org/domains/root text/html 200 YWA2R6UVWCYNHBZJKBTPYPZ5CJWKGGUX - - 2691 657746 iana.warc.gz
 
@@ -19,18 +20,45 @@ org,iana)/ 20140126200624 http://www.iana.org/ text/html 200 OSSAPWJ23L56IYVRW3G
 org,iana)/domains/root/db 20140126200927 http://www.iana.org/domains/root/db/ text/html 302 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ - - 446 671278 iana.warc.gz
 org,iana)/domains/root/db 20140126200928 http://www.iana.org/domains/root/db text/html 200 DHXA725IW5VJJFRTWBQT6BEZKRE7H57S - - 18365 672225 iana.warc.gz
 
-# Exact Search
+>>> print_binsearch_results('org,iana)/time-zones', iter_exact)
+org,iana)/time-zones 20140126200737 http://www.iana.org/time-zones text/html 200 4Z27MYWOSXY2XDRAJRW7WRMT56LXDD4R - - 2449 569675 iana.warc.gz
+
+# Exact search -- no matches
 >>> print_binsearch_results('org,iaana)/', iter_exact)
 >>> print_binsearch_results('org,ibna)/', iter_exact)
 
->>> print_binsearch_results('org,iana)/time-zones', iter_exact)
-org,iana)/time-zones 20140126200737 http://www.iana.org/time-zones text/html 200 4Z27MYWOSXY2XDRAJRW7WRMT56LXDD4R - - 2449 569675 iana.warc.gz
+
+# Range Search (end exclusive)
+>>> print_binsearch_results_range('org,iana)/about', 'org,iana)/domains', iter_range)
+org,iana)/about 20140126200706 http://www.iana.org/about text/html 200 6G77LZKFAVKH4PCWWKMW6TRJPSHWUBI3 - - 2962 483588 iana.warc.gz
+org,iana)/about/performance/ietf-draft-status 20140126200815 http://www.iana.org/about/performance/ietf-draft-status text/html 302 Y7CTA2QZUSCDTJCSECZNSPIBLJDO7PJJ - - 584 596566 iana.warc.gz
+org,iana)/about/performance/ietf-statistics 20140126200804 http://www.iana.org/about/performance/ietf-statistics text/html 302 HNYDN7XRX46RQTT2OFIWXKEYMZQAJWHD - - 582 581890 iana.warc.gz
+org,iana)/dnssec 20140126201306 http://www.iana.org/dnssec text/html 302 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ - - 442 772827 iana.warc.gz
+org,iana)/dnssec 20140126201307 https://www.iana.org/dnssec text/html 200 PHLRSX73EV3WSZRFXMWDO6BRKTVUSASI - - 2278 773766 iana.warc.gz
+
+
+# Range Search -- exact
+>>> print_binsearch_results_range('org,iana)/about', 'org,iana)/about!', iter_range)
+org,iana)/about 20140126200706 http://www.iana.org/about text/html 200 6G77LZKFAVKH4PCWWKMW6TRJPSHWUBI3 - - 2962 483588 iana.warc.gz
+
+# Range Search -- exact + 1 prev
+>>> print_binsearch_results_range('org,iana)/about', 'org,iana)/about!', iter_range, prev_size=1)
+org,iana)/_js/2013.1/jquery.js 20140126201307 https://www.iana.org/_js/2013.1/jquery.js warc/revisit - AAW2RS7JB7HTF666XNZDQYJFA6PDQBPO - - 543 778507 iana.warc.gz
+org,iana)/about 20140126200706 http://www.iana.org/about text/html 200 6G77LZKFAVKH4PCWWKMW6TRJPSHWUBI3 - - 2962 483588 iana.warc.gz
+
+# Range Search -- exact + 2 prev
+>>> print_binsearch_results_range('org,iana)/about', 'org,iana)/about!', iter_range, prev_size=2)
+org,iana)/_js/2013.1/jquery.js 20140126201248 http://www.iana.org/_js/2013.1/jquery.js warc/revisit - AAW2RS7JB7HTF666XNZDQYJFA6PDQBPO - - 544 765491 iana.warc.gz
+org,iana)/_js/2013.1/jquery.js 20140126201307 https://www.iana.org/_js/2013.1/jquery.js warc/revisit - AAW2RS7JB7HTF666XNZDQYJFA6PDQBPO - - 543 778507 iana.warc.gz
+org,iana)/about 20140126200706 http://www.iana.org/about text/html 200 6G77LZKFAVKH4PCWWKMW6TRJPSHWUBI3 - - 2962 483588 iana.warc.gz
+
+
 """
 
 
 #=================================================================
 import os
-from pywb.utils.binsearch import iter_prefix, iter_exact
+from pywb.utils.binsearch import iter_prefix, iter_exact, iter_range
 from pywb.utils.loaders import SeekableTextFileReader
 
 from pywb import get_test_dir
@@ -45,6 +73,13 @@ def print_binsearch_results(key, iter_func):
         print line
 
 
+def print_binsearch_results_range(key, end_key, iter_func, prev_size=0):
+    cdx =  SeekableTextFileReader(test_cdx_dir + 'iana.cdx')
+
+    for line in iter_func(cdx, key, end_key, prev_size=prev_size):
+        print line
+
+
 if __name__ == "__main__":
     import doctest
     doctest.testmod()
diff --git a/pywb/utils/test/loaders_test.py b/pywb/utils/test/loaders_test.py
index 73d4b3dd..7dc42d83 100644
--- a/pywb/utils/test/loaders_test.py
+++ b/pywb/utils/test/loaders_test.py
@@ -10,9 +10,9 @@
 >>> read_multiple(LimitReader(StringIO.StringIO('abcdefghjiklmnopqrstuvwxyz'), 10), [2, 2, 20])
 'efghji'
 
-# FileLoader Tests (includes LimitReader)
+# BlockLoader Tests (includes LimitReader)
 # Ensure attempt to read more than 100 bytes, reads exactly 100 bytes
->>> len(FileLoader().load(test_cdx_dir + 'iana.cdx', 0, 100).read('400'))
+>>> len(BlockLoader().load(test_cdx_dir + 'iana.cdx', 0, 100).read('400'))
 100
 
 # SeekableTextFileReader Test
@@ -23,25 +23,39 @@
 >>> seek_read_full(sr, 100)
 'org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126200826 http://www.iana.org/_css/2013.1/fonts/Inconsolata.otf application/octet-stream 200 LNMEDYOENSOEI5VPADCKL3CB6N3GWXPR - - 34054 620049 iana.warc.gz\\n'
 
-#BufferedReader readline()
->>> BufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb')).readline()
+# Buffered Reader Tests
+#=================================================================
+
+#DecompressingBufferedReader readline()
+>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb')).readline()
 ' CDX N b a m s k r M S V g\\n'
 
-#BufferedReader readline() with decompression
->>> BufferedReader(open(test_cdx_dir + 'iana.cdx.gz', 'rb'), decomp_type = 'gzip').readline()
+#DecompressingBufferedReader readline() with decompression
+>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx.gz', 'rb'), decomp_type = 'gzip').readline()
 ' CDX N b a m s k r M S V g\\n'
 
->>> HttpLoader(HMACCookieMaker('test', 'test', 5)).load('http://example.com', 41, 14).read()
+>>> BlockLoader(HMACCookieMaker('test', 'test', 5)).load('http://example.com', 41, 14).read()
 'Example Domain'
+
+# test very small block size
+>>> dbr = DecompressingBufferedReader(StringIO.StringIO('ABCDEFG\\nHIJKLMN\\nOPQR\\nXYZ'), block_size = 3)
+>>> dbr.readline(); dbr.readline(4); dbr.readline(); dbr.readline(); dbr.readline(2); dbr.readline(); dbr.readline()
+'ABCDEFG\\n'
+'HIJK'
+'LMN\\n'
+'OPQR\\n'
+'XY'
+'Z'
+''
 """
 
 
 #=================================================================
 import os
 import StringIO
-from pywb.utils.loaders import FileLoader, HttpLoader, HMACCookieMaker
+from pywb.utils.loaders import BlockLoader, HMACCookieMaker
 from pywb.utils.loaders import LimitReader, SeekableTextFileReader
-from pywb.utils.bufferedreaders import BufferedReader
+from pywb.utils.bufferedreaders import DecompressingBufferedReader
 
 from pywb import get_test_dir
 #test_cdx_dir = os.path.dirname(os.path.realpath(__file__)) + '/../sample-data/'
diff --git a/pywb/utils/test/statusandheaders_test.py b/pywb/utils/test/statusandheaders_test.py
new file mode 100644
index 00000000..3473e71e
--- /dev/null
+++ b/pywb/utils/test/statusandheaders_test.py
@@ -0,0 +1,29 @@
+"""
+>>> StatusAndHeadersParser(['HTTP/1.0']).parse(StringIO.StringIO(status_headers_1))
+StatusAndHeaders(protocol = 'HTTP/1.0', statusline = '200 OK', headers = [ ('Content-Type', 'ABC'),
+  ('Some', 'Value'),
+  ('Multi-Line', 'Value1    Also This')])
+
+>>> StatusAndHeadersParser(['Other']).parse(StringIO.StringIO(status_headers_1))
+Traceback (most recent call last):
+StatusAndHeadersParserException: Expected Status Line starting with ['Other'] - Found: HTTP/1.0 200 OK
+"""
+
+
+from pywb.utils.statusandheaders import StatusAndHeadersParser
+import StringIO
+
+
+status_headers_1 = "\
+HTTP/1.0 200 OK\r\n\
+Content-Type: ABC\r\n\
+Some: Value\r\n\
+Multi-Line: Value1\r\n\
+    Also This\r\n\
+\r\n\
+Body"
+
+
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod()
diff --git a/pywb/utils/timeutils.py b/pywb/utils/timeutils.py
index 62929d50..7af3401f 100644
--- a/pywb/utils/timeutils.py
+++ b/pywb/utils/timeutils.py
@@ -17,7 +17,8 @@ DATE_TIMESPLIT = re.compile(r'[^\d]')
 
 TIMESTAMP_14 = '%Y%m%d%H%M%S'
 
-PAD_STAMP_END = '29991231235959'
+#PAD_STAMP_END = '29991231235959'
+PAD_6 = '299912'
 
 
 def iso_date_to_datetime(string):
@@ -58,41 +59,145 @@ def iso_date_to_timestamp(string):
     return datetime_to_timestamp(iso_date_to_datetime(string))
 
 
-# default pad is end of range for compatibility
-def pad_timestamp(string, pad_str=PAD_STAMP_END):
+# pad to certain length (default 6)
+def _pad_timestamp(string, pad_str=PAD_6):
     """
-    >>> pad_timestamp('20')
-    '20991231235959'
+    >>> _pad_timestamp('20')
+    '209912'
 
-    >>> pad_timestamp('2014')
-    '20141231235959'
+    >>> _pad_timestamp('2014')
+    '201412'
 
-    >>> pad_timestamp('20141011')
-    '20141011235959'
+    >>> _pad_timestamp('20141011')
+    '20141011'
 
-    >>> pad_timestamp('201410110010')
-    '20141011001059'
+    >>> _pad_timestamp('201410110010')
+    '201410110010'
      """
 
     str_len = len(string)
     pad_len = len(pad_str)
 
-    return string if str_len >= pad_len else string + pad_str[str_len:]
+    if str_len < pad_len:
+        string = string + pad_str[str_len:]
+
+    return string
 
 
 def timestamp_to_datetime(string):
     """
-    >>> timestamp_to_datetime('20131226095010')
-    time.struct_time(tm_year=2013, tm_mon=12, tm_mday=26, \
-tm_hour=9, tm_min=50, tm_sec=10, tm_wday=3, tm_yday=360, tm_isdst=-1)
+    # >14-digit -- rest ignored
+    >>> timestamp_to_datetime('2014122609501011')
+    datetime.datetime(2014, 12, 26, 9, 50, 10)
 
+    # 14-digit
+    >>> timestamp_to_datetime('20141226095010')
+    datetime.datetime(2014, 12, 26, 9, 50, 10)
+
+    # 13-digit padding
+    >>> timestamp_to_datetime('2014122609501')
+    datetime.datetime(2014, 12, 26, 9, 50, 59)
+
+    # 12-digit padding
+    >>> timestamp_to_datetime('201412260950')
+    datetime.datetime(2014, 12, 26, 9, 50, 59)
+
+    # 11-digit padding
+    >>> timestamp_to_datetime('20141226095')
+    datetime.datetime(2014, 12, 26, 9, 59, 59)
+
+    # 10-digit padding
+    >>> timestamp_to_datetime('2014122609')
+    datetime.datetime(2014, 12, 26, 9, 59, 59)
+
+    # 9-digit padding
+    >>> timestamp_to_datetime('201412260')
+    datetime.datetime(2014, 12, 26, 23, 59, 59)
+
+    # 8-digit padding
+    >>> timestamp_to_datetime('20141226')
+    datetime.datetime(2014, 12, 26, 23, 59, 59)
+
+    # 7-digit padding
+    >>> timestamp_to_datetime('2014122')
+    datetime.datetime(2014, 12, 31, 23, 59, 59)
+
+    # 6-digit padding
+    >>> timestamp_to_datetime('201410')
+    datetime.datetime(2014, 10, 31, 23, 59, 59)
+
+    # 5-digit padding
+    >>> timestamp_to_datetime('20141')
+    datetime.datetime(2014, 12, 31, 23, 59, 59)
+
+    # 4-digit padding
     >>> timestamp_to_datetime('2014')
-    time.struct_time(tm_year=2014, tm_mon=12, tm_mday=31, \
-tm_hour=23, tm_min=59, tm_sec=59, tm_wday=2, tm_yday=365, tm_isdst=-1)
+    datetime.datetime(2014, 12, 31, 23, 59, 59)
+
+    # 3-digit padding
+    >>> timestamp_to_datetime('201')
+    datetime.datetime(2019, 12, 31, 23, 59, 59)
+
+    # 2-digit padding
+    >>> timestamp_to_datetime('20')
+    datetime.datetime(2099, 12, 31, 23, 59, 59)
+
+    # 1-digit padding
+    >>> timestamp_to_datetime('2')
+    datetime.datetime(2999, 12, 31, 23, 59, 59)
+
+    # 1-digit out-of-range padding
+    >>> timestamp_to_datetime('3')
+    datetime.datetime(2999, 12, 31, 23, 59, 59)
+
+    # 0-digit padding
+    >>> timestamp_to_datetime('')
+    datetime.datetime(2999, 12, 31, 23, 59, 59)
+
+    # bad month
+    >>> timestamp_to_datetime('20131709005601')
+    datetime.datetime(2013, 12, 9, 0, 56, 1)
+
+    # all out of range except minutes
+    >>> timestamp_to_datetime('40001965252477')
+    datetime.datetime(2999, 12, 31, 23, 24, 59)
+
     """
 
-    # Default pad to end of range for comptability
-    return time.strptime(pad_timestamp(string), TIMESTAMP_14)
+    # pad to 6 digits
+    string = _pad_timestamp(string, PAD_6)
+
+
+    def clamp(val, min_, max_):
+        try:
+            val = int(val)
+            val = max(min_, min(val, max_))
+            return val
+        except:
+            return max_
+
+    def extract(string, start, end, min_, max_):
+        if len(string) >= end:
+            return clamp(string[start:end], min_, max_)
+        else:
+            return max_
+
+    # now parse, clamp to boundary
+    year = extract(string, 0, 4, 1900, 2999)
+    month = extract(string, 4, 6, 1, 12)
+    day = extract(string, 6, 8, 1, calendar.monthrange(year, month)[1])
+    hour = extract(string, 8, 10, 0, 23)
+    minute = extract(string, 10, 12, 0, 59)
+    second = extract(string, 12, 14, 0, 59)
+
+    return datetime.datetime(year=year,
+                             month=month,
+                             day=day,
+                             hour=hour,
+                             minute=minute,
+                             second=second)
+
+    #return time.strptime(pad_timestamp(string), TIMESTAMP_14)
 
 
 def timestamp_to_sec(string):
@@ -104,7 +209,7 @@ def timestamp_to_sec(string):
     1420070399
     """
 
-    return calendar.timegm(timestamp_to_datetime(string))
+    return calendar.timegm(timestamp_to_datetime(string).utctimetuple())
 
 
 if __name__ == "__main__":
diff --git a/pywb/views.py b/pywb/views.py
index f693d1e6..67f928d6 100644
--- a/pywb/views.py
+++ b/pywb/views.py
@@ -56,9 +56,9 @@ class J2TemplateView:
 
     # Filters
     @staticmethod
-    def format_ts(value, format='%a, %b %d %Y %H:%M:%S'):
+    def format_ts(value, format_='%a, %b %d %Y %H:%M:%S'):
         value = timeutils.timestamp_to_datetime(value)
-        return time.strftime(format, value)
+        return value.strftime(format_)
 
     @staticmethod
     def get_host(url):
diff --git a/pywb/warc/recordloader.py b/pywb/warc/recordloader.py
index 05973f6b..446e0da3 100644
--- a/pywb/warc/recordloader.py
+++ b/pywb/warc/recordloader.py
@@ -6,8 +6,8 @@ from pywb.utils.statusandheaders import StatusAndHeaders
 from pywb.utils.statusandheaders import StatusAndHeadersParser
 from pywb.utils.statusandheaders import StatusAndHeadersParserException
 
-from pywb.utils.loaders import FileLoader, HttpLoader
-from pywb.utils.bufferedreaders import BufferedReader
+from pywb.utils.loaders import BlockLoader
+from pywb.utils.bufferedreaders import DecompressingBufferedReader
 
 #=================================================================
 ArcWarcRecord = collections.namedtuple('ArchiveRecord',
@@ -32,24 +32,12 @@ class ArcWarcRecordLoader:
     ARC_HEADERS = ["uri", "ip-address", "creation-date",
                    "content-type", "length"]
 
-    @staticmethod
-    def create_default_loaders(cookie_maker=None):
-        http = HttpLoader(cookie_maker)
-        file = FileLoader()
-        return {
-            'http': http,
-            'https': http,
-            'file': file,
-            '': file
-            }
+    def __init__(self, loader=None, cookie_maker=None, block_size=8192):
+        if not loader:
+            loader = BlockLoader(cookie_maker)
 
-    def __init__(self, loaders={}, cookie_maker=None, chunk_size=8192):
-        self.loaders = loaders
-
-        if not self.loaders:
-            self.loaders = self.create_default_loaders(cookie_maker)
-
-        self.chunk_size = chunk_size
+        self.loader = loader
+        self.block_size = block_size
 
         self.arc_parser = ARCHeadersParser(self.ARC_HEADERS)
 
@@ -60,22 +48,25 @@ class ArcWarcRecordLoader:
     def load(self, url, offset, length):
         url_parts = urlparse.urlsplit(url)
 
-        loader = self.loaders.get(url_parts.scheme)
-        if not loader:
-            raise ArchiveLoadFailed('Unknown Protocol', url)
+        #loader = self.loaders.get(url_parts.scheme)
+        #if not loader:
+        #    raise ArchiveLoadFailed('Unknown Protocol', url)
 
         try:
             length = int(length)
         except:
             length = -1
 
-        raw = loader.load(url, long(offset), length)
+        raw = self.loader.load(url, long(offset), length)
 
         decomp_type = 'gzip'
 
-        stream = BufferedReader(raw, length, self.chunk_size, decomp_type)
+        # Create decompressing stream
+        stream = DecompressingBufferedReader(stream = raw,
+                                             decomp_type = decomp_type,
+                                             block_size = self.block_size)
 
-        (the_format, rec_headers) = self._load_headers(stream)
+        (the_format, rec_headers) = self._detect_type_load_headers(stream)
 
         if the_format == 'arc':
             rec_type = 'response'
@@ -111,7 +102,7 @@ class ArcWarcRecordLoader:
         return ArcWarcRecord((the_format, rec_type),
                              rec_headers, stream, status_headers)
 
-    def _load_headers(self, stream):
+    def _detect_type_load_headers(self, stream):
         """
         Try parsing record as WARC, then try parsing as ARC.
         if neither one succeeds, we're out of luck.
diff --git a/pywb/warc/test/test_loading.py b/pywb/warc/test/test_loading.py
index 47176e3e..02ab54cb 100644
--- a/pywb/warc/test/test_loading.py
+++ b/pywb/warc/test/test_loading.py
@@ -213,3 +213,6 @@ def load_from_cdx_test(cdx):
     except Exception as e:
         print 'Exception: ' + e.__class__.__name__
 
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod()
diff --git a/pywb/wbrequestresponse.py b/pywb/wbrequestresponse.py
index e2715177..4a459c4b 100644
--- a/pywb/wbrequestresponse.py
+++ b/pywb/wbrequestresponse.py
@@ -1,99 +1,75 @@
-from pywb.rewrite.wburl import WbUrl
-from pywb.rewrite.url_rewriter import UrlRewriter
 from pywb.utils.statusandheaders import StatusAndHeaders
-
 import pprint
-#WB Request and Response
 
+
+#=================================================================
 class WbRequest:
     """
-    >>> WbRequest.from_uri('/save/_embed/example.com/?a=b')
-    {'wb_url': ('latest_replay', '', '', 'http://_embed/example.com/?a=b', 'http://_embed/example.com/?a=b'), 'coll': 'save', 'wb_prefix': '/save/', 'request_uri': '/save/_embed/example.com/?a=b'}
+    Represents the main pywb request object.
 
-    >>> WbRequest.from_uri('/2345/20101024101112im_/example.com/?b=c')
-    {'wb_url': ('replay', '20101024101112', 'im_', 'http://example.com/?b=c', '20101024101112im_/http://example.com/?b=c'), 'coll': '2345', 'wb_prefix': '/2345/', 'request_uri': '/2345/20101024101112im_/example.com/?b=c'}
+    Contains various info from wsgi env, add additional info
+    about the request, such as coll, relative prefix,
+    host prefix, absolute prefix.
 
-    >>> WbRequest.from_uri('/2010/example.com')
-    {'wb_url': ('latest_replay', '', '', 'http://example.com', 'http://example.com'), 'coll': '2010', 'wb_prefix': '/2010/', 'request_uri': '/2010/example.com'}
-
-    >>> WbRequest.from_uri('../example.com')
-    {'wb_url': ('latest_replay', '', '', 'http://example.com', 'http://example.com'), 'coll': '', 'wb_prefix': '/', 'request_uri': '../example.com'}
-
-    # Abs path
-    >>> WbRequest.from_uri('/2010/example.com', {'wsgi.url_scheme': 'https', 'HTTP_HOST': 'localhost:8080'}, use_abs_prefix = True)
-    {'wb_url': ('latest_replay', '', '', 'http://example.com', 'http://example.com'), 'coll': '2010', 'wb_prefix': 'https://localhost:8080/2010/', 'request_uri': '/2010/example.com'}
-
-    # No Scheme, so stick to relative
-    >>> WbRequest.from_uri('/2010/example.com', {'HTTP_HOST': 'localhost:8080'}, use_abs_prefix = True)
-    {'wb_url': ('latest_replay', '', '', 'http://example.com', 'http://example.com'), 'coll': '2010', 'wb_prefix': '/2010/', 'request_uri': '/2010/example.com'}
+    If a wburl and url rewriter classes are specified, the class
+    also contains the url rewriter.
 
     """
-
-    @staticmethod
-    def from_uri(request_uri, env = {}, use_abs_prefix = False):
-        if not request_uri:
-            request_uri = env.get('REL_REQUEST_URI')
-
-        parts = request_uri.split('/', 2)
-
-        # Has coll prefix
-        if len(parts) == 3:
-            wb_prefix = '/' + parts[1] + '/'
-            wb_url_str = parts[2]
-            coll = parts[1]
-        # No Coll Prefix
-        elif len(parts) == 2:
-            wb_prefix = '/'
-            wb_url_str = parts[1]
-            coll = ''
-        else:
-            wb_prefix = '/'
-            wb_url_str = parts[0]
-            coll = ''
-
-        host_prefix = WbRequest.make_host_prefix(env) if use_abs_prefix else ''
-
-        return WbRequest(env, request_uri, wb_prefix, wb_url_str, coll, host_prefix = host_prefix)
-
-
     @staticmethod
     def make_host_prefix(env):
         try:
-            return env['wsgi.url_scheme'] + '://' + env['HTTP_HOST']
+            host = env.get('HTTP_HOST')
+            if not host:
+                host = env['SERVER_NAME'] + ':' + env['SERVER_PORT']
+
+            return env['wsgi.url_scheme'] + '://' + host
         except KeyError:
             return ''
 
 
-    def __init__(self, env, request_uri, wb_prefix, wb_url_str, coll,
-                 host_prefix = '',
-                 wburl_class = WbUrl,
-                 url_rewriter_class = UrlRewriter,
-                 is_proxy = False):
+    def __init__(self, env,
+                 request_uri=None,
+                 rel_prefix='',
+                 wb_url_str='/',
+                 coll='',
+                 host_prefix='',
+                 use_abs_prefix=False,
+                 wburl_class=None,
+                 urlrewriter_class=None,
+                 is_proxy=False):
 
         self.env = env
 
         self.request_uri = request_uri if request_uri else env.get('REL_REQUEST_URI')
 
-        self.host_prefix = host_prefix
+        self.coll = coll
+
+        if not host_prefix:
+            host_prefix = self.make_host_prefix(env)
+
+        self.host_prefix = host_prefix
+        self.rel_prefix = rel_prefix
+
+        if use_abs_prefix:
+            self.wb_prefix = host_prefix + rel_prefix
+        else:
+            self.wb_prefix = rel_prefix
 
-        self.wb_prefix = host_prefix + wb_prefix
 
         if not wb_url_str:
             wb_url_str = '/'
 
+        self.wb_url_str = wb_url_str
+
         # wb_url present and not root page
         if wb_url_str != '/' and wburl_class:
-            self.wb_url_str = wb_url_str
             self.wb_url = wburl_class(wb_url_str)
-            self.urlrewriter = url_rewriter_class(self.wb_url, self.wb_prefix)
+            self.urlrewriter = urlrewriter_class(self.wb_url, self.wb_prefix)
         else:
         # no wb_url, just store blank wb_url
-            self.wb_url_str = wb_url_str
             self.wb_url = None
             self.urlrewriter = None
 
-        self.coll = coll
-
         self.referrer = env.get('HTTP_REFERER')
 
         self.is_ajax = self._is_ajax()
@@ -122,24 +98,19 @@ class WbRequest:
 
 
     def __repr__(self):
-        #return "WbRequest(env, '" + (self.wb_url) + "', '" + (self.coll) + "')"
-        #return str(vars(self))
         varlist = vars(self)
-        return str({k: varlist[k] for k in ('request_uri', 'wb_prefix', 'wb_url', 'coll')})
+        varstr = pprint.pformat(varlist)
+        return varstr
 
 
+#=================================================================
 class WbResponse:
     """
-    >>> WbResponse.text_response('Test')
-    {'body': ['Test'], 'status_headers': StatusAndHeaders(protocol = '', statusline = '200 OK', headers = [('Content-Type', 'text/plain')])}
+    Represnts a pywb wsgi response object.
 
-    >>> WbResponse.text_stream(['Test', 'Another'], '404')
-    {'body': ['Test', 'Another'], 'status_headers': StatusAndHeaders(protocol = '', statusline = '404', headers = [('Content-Type', 'text/plain')])}
-
-    >>> WbResponse.redir_response('http://example.com/otherfile')
-    {'body': [], 'status_headers': StatusAndHeaders(protocol = '', statusline = '302 Redirect', headers = [('Location', 'http://example.com/otherfile')])}
+    Holds a status_headers object and a response iter, to be
+    returned to wsgi container.
     """
-
     def __init__(self, status_headers, value = []):
         self.status_headers = status_headers
         self.body = value
@@ -180,8 +151,3 @@ class WbResponse:
 
     def __repr__(self):
         return str(vars(self))
-
-if __name__ == "__main__":
-    import doctest
-    doctest.testmod()
-
diff --git a/tests/test_integration.py b/tests/test_integration.py
index ec7fd6bd..1a7a943c 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -75,6 +75,11 @@ class TestWb:
         assert 'wb.js' in resp.body
         assert '/pywb/20140127171238/http://www.iana.org/time-zones' in resp.body
 
+    def test_replay_content_length_1(self):
+        # test larger file, rewritten file (svg!)
+        resp = self.testapp.get('/pywb/20140126200654/http://www.iana.org/_img/2013.1/rir-map.svg')
+        assert resp.headers['Content-Length'] == str(len(resp.body))
+
 
     def test_redirect_1(self):
         resp = self.testapp.get('/pywb/20140127171237/http://www.iana.org/')
@@ -119,6 +124,20 @@ class TestWb:
         assert resp.content_type == 'text/css'
 
 
+    def test_referrer_self_redirect(self):
+        uri = '/pywb/20140127171239/http://www.iana.org/_css/2013.1/screen.css'
+        host = 'somehost:8082'
+        referrer = 'http://' + host + uri
+
+        # capture is normally a 200
+        resp = self.testapp.get(uri)
+        assert resp.status_int == 200
+
+        # redirect causes skip of this capture, redirect to next
+        resp = self.testapp.get(uri, headers = [('Referer', referrer), ('Host', host)], status = 302)
+        assert resp.status_int == 302
+
+
     def test_excluded_content(self):
         resp = self.testapp.get('/pywb/http://www.iana.org/_img/bookmark_icon.ico', status = 403)
         assert resp.status_int == 403