diff --git a/pywb/archivalrouter.py b/pywb/archivalrouter.py
index 4d28b57e..5d3dc9f4 100644
--- a/pywb/archivalrouter.py
+++ b/pywb/archivalrouter.py
@@ -50,7 +50,10 @@ class Route:
 
     def __init__(self, regex, handler, coll_group = 0, config = {}, lookahead = SLASH_QUERY_LOOKAHEAD):
         self.path = regex
-        self.regex = re.compile(regex + lookahead)
+        if regex:
+            self.regex = re.compile(regex + lookahead)
+        else:
+            self.regex = re.compile('')
         self.handler = handler
         # collection id from regex group (default 0)
         self.coll_group = coll_group
@@ -70,7 +73,6 @@ class Route:
             return None
 
         matched_str = matcher.group(0)
-
         if matched_str:
             rel_prefix = env['SCRIPT_NAME'] + '/' + matched_str + '/'
             wb_url_str = request_uri[len(matched_str) + 2:] # remove the '/' + rel_prefix part of uri
diff --git a/pywb/cdx/cdxsource.py b/pywb/cdx/cdxsource.py
index 783cf36b..ba5f8b3b 100644
--- a/pywb/cdx/cdxsource.py
+++ b/pywb/cdx/cdxsource.py
@@ -1,6 +1,8 @@
 from pywb.utils.binsearch import iter_range
 from pywb.utils.loaders import SeekableTextFileReader
 
+from cdxobject import AccessException
+
 import urllib
 import urllib2
 import itertools
@@ -93,7 +95,7 @@ class RedisCDXSource(CDXSource):
         self.key_prefix = self.DEFAULT_KEY_PREFIX
         if config:
             self.key_prefix = config.get('redis_key_prefix', self.key_prefix)
-        
+
 
     def load_cdx(self, params):
         """
diff --git a/pywb/cdx/perms.py b/pywb/cdx/perms.py
index a7b90eb4..ad6ea00d 100644
--- a/pywb/cdx/perms.py
+++ b/pywb/cdx/perms.py
@@ -1,7 +1,7 @@
 
 
 #=================================================================
-class AllowAllPerms:
+class AllowAllPerms(object):
     """
     Sample Perm Checker which allows all
     """
diff --git a/pywb/cdx/test/cdxserver_test.py b/pywb/cdx/test/cdxserver_test.py
index 44483ca4..e5fac6b3 100644
--- a/pywb/cdx/test/cdxserver_test.py
+++ b/pywb/cdx/test/cdxserver_test.py
@@ -141,7 +141,7 @@ org,iana)/domains/root/db 20140126200928 http://www.iana.org/domains/root/db tex
  ('offset', '334'),
  ('filename', 'dupes.warc.gz')]
 
-# NOTE: external dependency -- need self-contained test
+# NOTE: external dependency -- need self-contained test TODO
 >>> x = CDXServer('http://web.archive.org/cdx/search/cdx').load_cdx(url = 'example.com', output = 'raw', limit = '2')
 >>> pprint.pprint(x.next().items())
 [('urlkey', 'com,example)/'),
@@ -152,6 +152,10 @@ org,iana)/domains/root/db 20140126200928 http://www.iana.org/domains/root/db tex
  ('digest', 'HT2DYGA5UKZCPBSFVCV3JOBXGW2G5UUA'),
  ('length', '1792')]
 
+
+>>> x = CDXServer('http://web.archive.org/cdx/search/cdx').load_cdx(url = 'facebook.com', output = 'raw', limit = '2')
+Traceback (most recent call last):
+AccessException: Blocked By Robots
 """
 
 #=================================================================
diff --git a/pywb/replay_views.py b/pywb/replay_views.py
index 9113ad5f..31e7af9a 100644
--- a/pywb/replay_views.py
+++ b/pywb/replay_views.py
@@ -7,6 +7,7 @@ from wbrequestresponse import WbResponse
 from wbexceptions import CaptureException, InternalRedirect
 from pywb.warc.recordloader import ArchiveLoadFailed
 
+from pywb.utils.loaders import LimitReader
 
 #=================================================================
 class ReplayView:
@@ -54,10 +55,21 @@ class ReplayView:
 
                 response = None
 
+                # if Content-Length for payload is present, ensure we don't read past it
+                content_len = status_headers.get_header('content-length')
+                try:
+                    content_len=int(content_len)
+                    if content_len > 0:
+                        stream = LimitReader(stream, content_len)
+                except ValueError:
+                    pass
+
                 if self.content_rewriter and wbrequest.wb_url.mod != 'id_':
                     response = self.rewrite_content(wbrequest, cdx, status_headers, stream)
                 else:
                     (status_headers, stream) = self.sanitize_content(status_headers, stream)
+                    #status_headers.remove_header('content-length')
+
                     response_iter = self.stream_to_iter(stream)
                     response = WbResponse(status_headers, response_iter)
 
diff --git a/pywb/test/test_archivalrouter.py b/pywb/test/test_archivalrouter.py
index 4379fbfd..229fafb6 100644
--- a/pywb/test/test_archivalrouter.py
+++ b/pywb/test/test_archivalrouter.py
@@ -15,6 +15,13 @@
  'wb_prefix': 'https://localhost:8081/my_pywb/web/',
  'wb_url': ('replay', '2013', 'im_', 'http://test.example.com', '2013im_/http://test.example.com')}
 
+# route with no collection
+>>> print_req(Route('', BaseHandler())({'REL_REQUEST_URI': 'http://example.com', 'SCRIPT_NAME': '/pywb'}, False))
+{'coll': '',
+ 'request_uri': 'http://example.com',
+ 'wb_prefix': '/pywb/',
+ 'wb_url': None}
+
 # not matching route -- skipped
 >>> Route('web', BaseHandler())({'REL_REQUEST_URI': '/other/test.example.com', 'SCRIPT_NAME': ''}, False)
 
@@ -67,6 +74,13 @@ False
 >>> _test_redir('http://localhost:8080/', '/../other.html', 'http://localhost:8080/extra/coll/20131010/http://example.com/path/page.html', '/extr')
 False
 
+# With no collection
+>>> _test_redir('http://localhost:8080/', '/other.html', 'http://localhost:8080/2013/http://example.com/path/page.html', coll='')
+'http://localhost:8080/2013/http://example.com/other.html'
+
+# With SCRIPT_NAME but no collection
+>>> _test_redir('http://localhost:8080/', '/other.html', 'http://localhost:8080/pywb-access/http://example.com/path/page.html', '/pywb-access', coll='')
+'http://localhost:8080/pywb-access/http://example.com/other.html'
 
 """
 
diff --git a/pywb/utils/canonicalize.py b/pywb/utils/canonicalize.py
index bd21e4ca..73555ca6 100644
--- a/pywb/utils/canonicalize.py
+++ b/pywb/utils/canonicalize.py
@@ -118,10 +118,15 @@ def calc_search_range(url, match_type, surt_ordered=True, url_canon=None):
     >>> calc_search_range('http://example.com/path/file.html', 'host', False)
     ('example.com/', 'example.com0')
 
-    # domain range not supported
+    # errors: domain range not supported
     >>> calc_search_range('http://example.com/path/file.html', 'domain', False)
     Traceback (most recent call last):
-    Exception: matchType=domain unsupported for non-surt
+    UrlCanonicalizeException: matchType=domain unsupported for non-surt
+
+    >>> calc_search_range('http://example.com/path/file.html', 'blah', False)
+    Traceback (most recent call last):
+    UrlCanonicalizeException: Invalid match_type: blah
+
     """
     def inc_last_char(x):
         return x[0:-1] + chr(ord(x[-1]) + 1)
@@ -159,7 +164,7 @@ def calc_search_range(url, match_type, surt_ordered=True, url_canon=None):
 
     elif match_type == 'domain':
         if not surt_ordered:
-            raise Exception('matchType=domain unsupported for non-surt')
+            raise UrlCanonicalizeException('matchType=domain unsupported for non-surt')
 
         host = start_key.split(')/')[0]
 
@@ -172,7 +177,7 @@ def calc_search_range(url, match_type, surt_ordered=True, url_canon=None):
 
         end_key = host + '-'
     else:
-        raise Exception('Invalid match_type: ' + match_type)
+        raise UrlCanonicalizeException('Invalid match_type: ' + match_type)
 
     return (start_key, end_key)
 
diff --git a/tests/test_integration.py b/tests/test_integration.py
index 1a7a943c..5a165041 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -2,6 +2,7 @@ import webtest
 from pywb.pywb_init import pywb_config
 from pywb.wbapp import create_wb_app
 from pywb.cdx.cdxobject import CDXObject
+from pywb.cdx.perms import AllowAllPerms
 
 class TestWb:
     TEST_CONFIG = 'test_config.yaml'
@@ -73,7 +74,19 @@ class TestWb:
 
         assert 'Mon, Jan 27 2014 17:12:38' in resp.body
         assert 'wb.js' in resp.body
-        assert '/pywb/20140127171238/http://www.iana.org/time-zones' in resp.body
+        assert '/pywb/20140127171238/http://www.iana.org/time-zones"' in resp.body
+
+    def test_replay_identity_1(self):
+        resp = self.testapp.get('/pywb/20140127171251id_/http://example.com')
+        #resp = self.testapp.get('/pywb/20140126200654id_/http://www.iana.org/_img/2013.1/rir-map.svg')
+        #resp = self.testapp.get('/pywb/20140127171239id_/http://www.iana.org/_css/2013.1/screen.css')
+        #self._assert_basic_html(resp)
+
+        # no wb header insertion
+        assert 'wb.js' not in resp.body
+
+        # original unrewritten url present
+        assert '"http://www.iana.org/domains/example"' in resp.body
 
     def test_replay_content_length_1(self):
         # test larger file, rewritten file (svg!)
@@ -198,38 +211,21 @@ class TestWb:
 # Reporter callback for replay view
 class PrintReporter:
     def __call__(self, wbrequest, cdx, response):
-        print wbrequest
-        print cdx
+        #print wbrequest
+        #print cdx
         pass
 
 #=================================================================
-class TestExclusionPerms:
+class TestExclusionPerms(AllowAllPerms):
     """
-    Sample Perm Checker which allows all
+    Sample Perm Checker with hard-coded exclusion
     """
     def allow_url_lookup(self, urlkey, url):
         """
         Return true/false if url or urlkey (canonicalized url)
         should be allowed
         """
-        print urlkey
         if urlkey == 'org,iana)/_img/bookmark_icon.ico':
             return False
 
-        return True
-
-    def allow_capture(self, cdx):
-        """
-        Return true/false is specified capture (cdx) should be
-        allowed
-        """
-        return True
-
-    def filter_fields(self, cdx):
-        """
-        Filter out any forbidden cdx fields from cdx dictionary
-        """
-        return cdx
-
-
-
+        return super(TestExclusionPerms, self).allow_url_lookup(urlkey, url)