diff --git a/config.yaml b/config.yaml
index 07a2c303..8891f756 100644
--- a/config.yaml
+++ b/config.yaml
@@ -92,4 +92,8 @@ static_routes:
 enable_http_proxy: true
 
 # enable cdx server api for querying cdx directly (experimental)
-#enable_cdx_api: false
+enable_cdx_api: true
+
+# custom rules for domain specific matching
+# set to false to disable
+#domain_specific_rules: rules.yaml
diff --git a/pywb/cdx/canonicalize.py b/pywb/cdx/canonicalize.py
new file mode 100644
index 00000000..e0adb5c1
--- /dev/null
+++ b/pywb/cdx/canonicalize.py
@@ -0,0 +1,74 @@
+""" Standard url-canonicalzation, surt and non-surt
+"""
+
+import surt
+from cdxobject import CDXException
+
+
+#=================================================================
+class UrlCanonicalizer(object):
+    def __init__(self, surt_ordered=True):
+        self.surt_ordered = surt_ordered
+
+    def __call__(self, url):
+        return canonicalize(url, self.surt_ordered)
+
+
+#=================================================================
+def canonicalize(url, surt_ordered=True):
+    """
+    Canonicalize url and convert to surt
+    If not in surt ordered mode, convert back to url form
+    as surt conversion is currently part of canonicalization
+
+    >>> canonicalize('http://example.com/path/file.html', surt_ordered=True)
+    'com,example)/path/file.html'
+
+    >>> canonicalize('http://example.com/path/file.html', surt_ordered=False)
+    'example.com/path/file.html'
+    """
+    try:
+        key = surt.surt(url)
+    except Exception as e:
+        raise CDXException('Invalid Url: ' + url)
+
+    # if not surt, unsurt the surt to get canonicalized non-surt url
+    if not surt_ordered:
+        key = unsurt(key)
+
+    return key
+
+
+#=================================================================
+def unsurt(surt):
+    """
+    # Simple surt
+    >>> unsurt('com,example)/')
+    'example.com/'
+
+    # Broken surt
+    >>> unsurt('com,example)')
+    'com,example)'
+
+    # Long surt
+    >>> unsurt('suffix,domain,sub,subsub,another,subdomain)/path/file/\
+index.html?a=b?c=)/')
+    'subdomain.another.subsub.sub.domain.suffix/path/file/index.html?a=b?c=)/'
+    """
+
+    try:
+        index = surt.index(')/')
+        parts = surt[0:index].split(',')
+        parts.reverse()
+        host = '.'.join(parts)
+        host += surt[index + 1:]
+        return host
+
+    except ValueError:
+        # May not be a valid surt
+        return surt
+
+
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod()
diff --git a/pywb/cdx/cdxdomainspecific.py b/pywb/cdx/cdxdomainspecific.py
new file mode 100644
index 00000000..2c733c8d
--- /dev/null
+++ b/pywb/cdx/cdxdomainspecific.py
@@ -0,0 +1,125 @@
+import yaml
+import re
+import logging
+import pkgutil
+
+from canonicalize import unsurt, UrlCanonicalizer
+
+
+#=================================================================
+def load_domain_specific_cdx_rules(filename, surt_ordered):
+    fh = pkgutil.get_data(__package__, filename)
+    config = yaml.load(fh)
+
+    # Load Canonicalizer Rules
+    rules = StartsWithRule.load_rules(config.get('canon_rules'),
+                                      surt_ordered)
+
+    if rules:
+        canon = CustomUrlCanonicalizer(rules, surt_ordered)
+    else:
+        canon = None
+
+    # Load Fuzzy Lookup Rules
+    rules = StartsWithRule.load_rules(config.get('fuzzy_lookup_rules'),
+                                      surt_ordered)
+
+    if rules:
+        fuzzy = FuzzyQuery(rules)
+    else:
+        fuzzy = None
+
+    logging.debug('CANON: ' + str(canon))
+    logging.debug('FUZZY: ' + str(fuzzy))
+    return (canon, fuzzy)
+
+
+#=================================================================
+class CustomUrlCanonicalizer(UrlCanonicalizer):
+    def __init__(self, rules, surt_ordered=True):
+        super(CustomUrlCanonicalizer, self).__init__(surt_ordered)
+        self.rules = rules
+
+    def __call__(self, url):
+        urlkey = super(CustomUrlCanonicalizer, self).__call__(url)
+
+        for rule in self.rules:
+            if not any(urlkey.startswith(x) for x in rule.starts):
+                continue
+
+            m = rule.regex.match(urlkey)
+            if not m:
+                continue
+
+            if rule.replace:
+                return m.expand(rule.replace)
+
+        return urlkey
+
+
+#=================================================================
+class FuzzyQuery:
+    def __init__(self, rules):
+        self.rules = rules
+
+    def __call__(self, params):
+        matched_rule = None
+
+        urlkey = params['key']
+        url = params['url']
+
+        for rule in self.rules:
+            if not any(urlkey.startswith(x) for x in rule.starts):
+                continue
+
+            m = rule.regex.search(urlkey)
+            if not m:
+                continue
+
+            matched_rule = rule
+
+            if len(m.groups()) == 1:
+                params['filter'] = '=urlkey:' + m.group(1)
+
+            break
+
+        if not matched_rule:
+            return None
+
+        inx = url.find('?')
+        if inx > 0:
+            params['url'] = url[:inx + 1]
+
+        params['matchType'] = 'prefix'
+        params['key'] = None
+        return params
+
+
+#=================================================================
+class StartsWithRule:
+    def __init__(self, config, surt_ordered=True):
+        self.starts = config.get('startswith')
+        if not isinstance(self.starts, list):
+            self.starts = [self.starts]
+
+        self.regex = re.compile(config.get('matches'))
+        self.replace = config.get('replace')
+
+    def unsurt(self):
+        # must convert to non-surt form
+        self.starts = map(unsurt, self.starts)
+        self.regex = unsurt(self.regex)
+        self.replace = unsurt(self.replace)
+
+    @staticmethod
+    def load_rules(rules_config, surt_ordered=True):
+        if not rules_config:
+            return []
+
+        rules = map(StartsWithRule, rules_config)
+
+        if not surt_ordered:
+            for rule in rules:
+                rule.unsurt()
+
+        return rules
diff --git a/pywb/cdx/cdxobject.py b/pywb/cdx/cdxobject.py
index ac3975b2..203cb7ef 100644
--- a/pywb/cdx/cdxobject.py
+++ b/pywb/cdx/cdxobject.py
@@ -2,6 +2,24 @@ from collections import OrderedDict
 import itertools
 
 
+#=================================================================
+class CDXException(Exception):
+    def status(self):
+        return '400 Bad Request'
+
+
+#=================================================================
+class CaptureNotFoundException(CDXException):
+    def status(self):
+        return '404 Not Found'
+
+
+#=================================================================
+class AccessException(CDXException):
+    def status(self):
+        return '403 Access Denied'
+
+
 #=================================================================
 class CDXObject(OrderedDict):
     CDX_FORMATS = [
diff --git a/pywb/cdx/cdxserver.py b/pywb/cdx/cdxserver.py
index 2beef250..17d16314 100644
--- a/pywb/cdx/cdxserver.py
+++ b/pywb/cdx/cdxserver.py
@@ -1,82 +1,103 @@
-import surt
-from cdxops import cdx_load
+from canonicalize import UrlCanonicalizer
 
-import itertools
+from cdxops import cdx_load
+from cdxsource import CDXSource, CDXFile, RemoteCDXSource
+from cdxobject import CDXObject, CaptureNotFoundException, CDXException
+from cdxdomainspecific import load_domain_specific_cdx_rules
+
+from itertools import chain
 import logging
 import os
 import urlparse
 
-from cdxsource import CDXSource, CDXFile, RemoteCDXSource
-from cdxobject import CDXObject
+
+#=================================================================
+class BaseCDXServer(object):
+    def __init__(self, url_canon=None, fuzzy_query=None):
+        self.url_canon = url_canon if url_canon else UrlCanonicalizer()
+        self.fuzzy_query = fuzzy_query
+
+    def _check_cdx_iter(self, cdx_iter, params):
+        """ Check cdx iter semantics
+        If iter is empty (no matches), check if fuzzy matching
+        is allowed, and try it -- otherwise,
+        throw CaptureNotFoundException
+        """
+
+        cdx_iter = self.peek_iter(cdx_iter)
+
+        if cdx_iter:
+            return cdx_iter
+
+        url = params['url']
+
+        if self.fuzzy_query and params.get('allow_fuzzy'):
+            if not 'key' in params:
+                params['key'] = self.url_canon(url)
+
+            params = self.fuzzy_query(params)
+            if params:
+                params['allow_fuzzy'] = False
+                return self.load_cdx(**params)
+
+        msg = 'No Captures found for: ' + url
+        raise CaptureNotFoundException(msg)
+
+    def load_cdx(self, **params):
+        raise NotImplementedError('Implement in subclass')
+
+    @staticmethod
+    def peek_iter(iterable):
+        try:
+            first = next(iterable)
+        except StopIteration:
+            return None
+
+        return chain([first], iterable)
 
 
 #=================================================================
-class CDXException(Exception):
-    def status(self):
-        return '400 Bad Request'
-
-
-#=================================================================
-class AccessException(CDXException):
-    def status(self):
-        return '403 Bad Request'
-
-
-#=================================================================
-class CDXServer(object):
+class CDXServer(BaseCDXServer):
     """
     Top-level cdx server object which maintains a list of cdx sources,
     responds to queries and dispatches to the cdx ops for processing
     """
 
-    def __init__(self, paths, surt_ordered=True):
+    def __init__(self, paths, url_canon=None, fuzzy_query=None):
+        super(CDXServer, self).__init__(url_canon, fuzzy_query)
         self.sources = create_cdx_sources(paths)
-        self.surt_ordered = surt_ordered
 
     def load_cdx(self, **params):
         # if key not set, assume 'url' is set and needs canonicalization
         if not params.get('key'):
-            params['key'] = self._canonicalize(params)
+            try:
+                url = params['url']
+            except KeyError:
+                msg = 'A url= param must be specified to query the cdx server'
+                raise CDXException(msg)
+
+            params['key'] = self.url_canon(url)
 
         convert_old_style_params(params)
 
-        return cdx_load(self.sources, params)
+        cdx_iter = cdx_load(self.sources, params)
 
-    def _canonicalize(self, params):
-        """
-        Canonicalize url and convert to surt
-        If no surt-mode, convert back to url form
-        as surt conversion is currently part of canonicalization
-        """
-        try:
-            url = params['url']
-        except KeyError:
-            msg = 'A url= param must be specified to query the cdx server'
-            raise CDXException(msg)
-
-        try:
-            key = surt.surt(url)
-        except Exception as e:
-            raise CDXException('Invalid Url: ' + url)
-
-        # if not surt, unsurt the surt to get canonicalized non-surt url
-        if not self.surt_ordered:
-            key = unsurt(key)
-
-        return key
+        return self._check_cdx_iter(cdx_iter, params)
 
     def __str__(self):
         return 'CDX server serving from ' + str(self.sources)
 
 
 #=================================================================
-class RemoteCDXServer(object):
+class RemoteCDXServer(BaseCDXServer):
     """
     A special cdx server that uses a single RemoteCDXSource
     It simply proxies the query params to the remote source
     and performs no local processing/filtering
     """
-    def __init__(self, source):
+    def __init__(self, source, url_canon=None, fuzzy_query=None):
+        super(RemoteCDXServer, self).__init__(url_canon, fuzzy_query)
+
         if isinstance(source, RemoteCDXSource):
             self.source = source
         elif (isinstance(source, str) and
@@ -87,18 +108,19 @@ class RemoteCDXServer(object):
 
     def load_cdx(self, **params):
         remote_iter = self.source.load_cdx(params)
+
         # if need raw, convert to raw format here
         if params.get('output') == 'raw':
-            return (CDXObject(cdx) for cdx in remote_iter)
-        else:
-            return remote_iter
+            remote_iter = (CDXObject(cdx) for cdx in remote_iter)
+
+        return self._check_cdx_iter(remote_iter, params)
 
     def __str__(self):
         return 'Remote CDX server serving from ' + str(self.sources[0])
 
 
 #=================================================================
-def create_cdx_server(config):
+def create_cdx_server(config, ds_rules_file=None):
     if hasattr(config, 'get'):
         paths = config.get('index_paths')
         surt_ordered = config.get('surt_ordered', True)
@@ -108,11 +130,22 @@ def create_cdx_server(config):
 
     logging.debug('CDX Surt-Ordered? ' + str(surt_ordered))
 
+    if ds_rules_file:
+        canon, fuzzy = load_domain_specific_cdx_rules(ds_rules_file,
+                                                      surt_ordered)
+    else:
+        canon, fuzzy = None, None
+
+    if not canon:
+        canon = UrlCanonicalizer(surt_ordered)
+
     if (isinstance(paths, str) and
         any(paths.startswith(x) for x in ['http://', 'https://'])):
-        return RemoteCDXServer(paths)
+        server_cls = RemoteCDXServer
     else:
-        return CDXServer(paths)
+        server_cls = CDXServer
+
+    return server_cls(paths, url_canon=canon, fuzzy_query=fuzzy)
 
 
 #=================================================================
@@ -170,13 +203,17 @@ def convert_old_style_params(params):
     """
     Convert old-style CDX Server param semantics
     """
-    collapse_time = params.get('collapseTime')
-    if collapse_time:
-        params['collapse_time'] = collapse_time
+    param = params.get('collapseTime')
+    if param:
+        params['collapse_time'] = param
 
-    resolve_revisits = params.get('resolveRevisits')
-    if resolve_revisits:
-        params['resolve_revisits'] = resolve_revisits
+    param = params.get('matchType')
+    if param:
+        params['match_type'] = param
+
+    param = params.get('resolveRevisits')
+    if param:
+        params['resolve_revisits'] = param
 
     if params.get('sort') == 'reverse':
         params['reverse'] = True
@@ -204,38 +241,3 @@ def extract_params_from_wsgi_env(env):
             params[name] = val[0]
 
     return params
-
-
-#=================================================================
-def unsurt(surt):
-    """
-    # Simple surt
-    >>> unsurt('com,example)/')
-    'example.com)/'
-
-    # Broken surt
-    >>> unsurt('com,example)')
-    'com,example)'
-
-    # Long surt
-    >>> unsurt('suffix,domain,sub,subsub,another,subdomain)/path/file/\
-index.html?a=b?c=)/')
-    'subdomain.another.subsub.sub.domain.suffix)/path/file/index.html?a=b?c=)/'
-    """
-
-    try:
-        index = surt.index(')/')
-        parts = surt[0:index].split(',')
-        parts.reverse()
-        host = '.'.join(parts)
-        host += surt[index:]
-        return host
-
-    except ValueError:
-        # May not be a valid surt
-        return surt
-
-
-if __name__ == "__main__":
-    import doctest
-    doctest.testmod()
diff --git a/pywb/cdx/rules.yaml b/pywb/cdx/rules.yaml
new file mode 100644
index 00000000..1da70582
--- /dev/null
+++ b/pywb/cdx/rules.yaml
@@ -0,0 +1,24 @@
+
+fuzzy_lookup_rules:
+    - startswith: 'com,twitter)/i/profiles/show/'
+      matches: '/profiles/show/.*with_replies\?.*(max_id=[^&]+)'
+
+    - startswith: 'com,facebook)/ajax/pagelet/generic.php/profiletimelinesectionpagelet'
+      matches: 'com,facebook\)/.*[?&]data=(.*(?:[&]|query_type[^,]+))'
+
+    - startswith: ['com,yimg,l)/g/combo', 'com,yahooapis,yui)/combo']
+      matches: '([^/]+(?:\.css|\.js))'
+
+    # matches all urls
+    - startswith: ''
+      matches: '[&?](?:_|uncache)=[\d]+[&]?'
+
+canon_rules:
+    - startswith: 'com,facebook)/ajax/pagelet/generic.php/profiletimelinesectionpagelet'
+      matches: 'com,facebook\)/.*[?&]data=([^&]+).*'
+      replace: 'com,facebook)/ajax/pagelet/generic.php/profiletimelinesectionpagelet?data=\1'
+
+
+
+
+
diff --git a/pywb/cdx/test/cdxserver_test.py b/pywb/cdx/test/cdxserver_test.py
index c1434228..fc96acb2 100644
--- a/pywb/cdx/test/cdxserver_test.py
+++ b/pywb/cdx/test/cdxserver_test.py
@@ -25,6 +25,8 @@ org,iana)/_js/2013.1/jquery.js 20140126201307 https://www.iana.org/_js/2013.1/jq
 
 # No matching results
 >>> cdx_ops_test('http://iana.org/dont_have_this', reverse = True, resolve_revisits = True, limit = 2)
+Traceback (most recent call last):
+CaptureNotFoundException: No Captures found for: http://iana.org/dont_have_this
 
 
 # Filter cdx (default: regex)
diff --git a/pywb/indexreader.py b/pywb/indexreader.py
index aaf60705..493c1bbd 100644
--- a/pywb/indexreader.py
+++ b/pywb/indexreader.py
@@ -1,13 +1,7 @@
 import urllib
 import urllib2
 
-from wbexceptions import NotFoundException
-
-from itertools import chain
-from pprint import pprint
-
-from pywb.cdx.cdxserver import create_cdx_server, CDXException
-from pywb.cdx.cdxobject import CDXObject
+from pywb.cdx.cdxserver import create_cdx_server
 
 #=================================================================
 class IndexReader(object):
@@ -18,8 +12,8 @@ class IndexReader(object):
     Creates an appropriate query based on wbrequest type info
     """
 
-    def __init__(self, config):
-        self.cdx_server = create_cdx_server(config)
+    def __init__(self, config, ds_rules_file=None):
+        self.cdx_server = create_cdx_server(config, ds_rules_file)
 
     def load_for_request(self, wbrequest):
         wburl = wbrequest.wb_url
@@ -29,19 +23,14 @@ class IndexReader(object):
 
         # add any custom filter from the request
         if wbrequest.query_filter:
-            params['filter'] = wbrequest.query_filter
+            params['filter'].extend(wbrequest.query_filter)
 
         if wbrequest.custom_params:
             params.update(wbrequest.custom_params)
 
-        params['url'] = wburl.url
+        params['allow_fuzzy'] = True
 
-        cdxlines = self.load_cdx(output='raw', **params)
-
-        cdxlines = self.peek_iter(cdxlines)
-
-        if cdxlines is None:
-            raise NotFoundException('No Captures found for: ' + wburl.url)
+        cdxlines = self.load_cdx(url=wburl.url, output='raw', **params)
 
         return cdxlines
 
@@ -54,7 +43,7 @@ class IndexReader(object):
 
         return {
             wburl.QUERY:
-                {'collapseTime': collapse_time, 'filter': '!statuscode:(500|502|504)', 'limit': limit},
+                {'collapseTime': collapse_time, 'filter': ['!statuscode:(500|502|504)'], 'limit': limit},
 
             wburl.URL_QUERY:
                 {'collapse': 'urlkey', 'matchType': 'prefix', 'showGroupCount': True, 'showUniqCount': True, 'lastSkipTimestamp': True, 'limit': limit,
@@ -62,21 +51,12 @@ class IndexReader(object):
                 },
 
             wburl.REPLAY:
-                {'sort': 'closest', 'filter': '!statuscode:(500|502|504)', 'limit': replay_closest, 'closest': wburl.timestamp, 'resolveRevisits': True},
+                {'sort': 'closest', 'filter': ['!statuscode:(500|502|504)'], 'limit': replay_closest, 'closest': wburl.timestamp, 'resolveRevisits': True},
 
             # BUG: resolveRevisits currently doesn't work for this type of query
             # This is not an issue in archival mode, as there is a redirect to the actual timestamp query
             # but may be an issue in proxy mode
             wburl.LATEST_REPLAY:
-                {'sort': 'reverse', 'filter': 'statuscode:[23]..', 'limit': '1', 'resolveRevisits': True}
+                {'sort': 'reverse', 'filter': ['statuscode:[23]..'], 'limit': '1', 'resolveRevisits': True}
 
         }[wburl.type]
-
-    @staticmethod
-    def peek_iter(iterable):
-        try:
-            first = next(iterable)
-        except StopIteration:
-            return None
-
-        return chain([first], iterable)
diff --git a/pywb/pywb_init.py b/pywb/pywb_init.py
index a6d0500b..c4b40ee2 100644
--- a/pywb/pywb_init.py
+++ b/pywb/pywb_init.py
@@ -21,6 +21,8 @@ DEFAULTS = {
     'error_html': 'ui/error.html',
 
     'static_routes': {'static/default': 'static/'},
+
+    'domain_specific_rules': 'rules.yaml',
 }
 
 class DictChain:
@@ -30,7 +32,7 @@ class DictChain:
     def get(self, key, default_val=None):
         for d in self.dicts:
             val = d.get(key)
-            if val:
+            if val is not None:
                 return val
         return default_val
 
@@ -52,11 +54,13 @@ def pywb_config_manual(passed_config = {}):
     for name, value in collections.iteritems():
         if isinstance(value, str):
             route_config = config
-            cdx_server = IndexReader(value)
+            cdx_config = value
         else:
             route_config = DictChain(value, config)
-            cdx_server = IndexReader(route_config)
+            cdx_config = route_config
 
+        ds_rules = route_config.get('domain_specific_rules', None)
+        cdx_server = IndexReader(cdx_config, ds_rules)
 
         wb_handler = config_utils.create_wb_handler(
             cdx_server = cdx_server,
@@ -118,7 +122,8 @@ def pywb_config(config_file = None):
     if not config_file:
         config_file = os.environ.get('PYWB_CONFIG', DEFAULT_CONFIG_FILE)
 
-    config = yaml.load(open(config_file))
+    with open(config_file) as fh:
+        config = yaml.load(fh)
 
     return pywb_config_manual(config)
 
diff --git a/pywb/rewrite/rewrite_content.py b/pywb/rewrite/rewrite_content.py
index ae1383ff..9f904764 100644
--- a/pywb/rewrite/rewrite_content.py
+++ b/pywb/rewrite/rewrite_content.py
@@ -54,8 +54,7 @@ class RewriteContent:
         # =========================================================================
         # special case -- need to ungzip the body
         if (rewritten_headers.contains_removed_header('content-encoding', 'gzip')):
-            stream = BufferedReader(stream, 'gzip')
-
+            stream = BufferedReader(stream, decomp_type='gzip')
 
         if rewritten_headers.charset:
             encoding = rewritten_headers.charset
diff --git a/pywb/wbexceptions.py b/pywb/wbexceptions.py
index 8796db76..afacc325 100644
--- a/pywb/wbexceptions.py
+++ b/pywb/wbexceptions.py
@@ -1,14 +1,15 @@
 
+
 class WbException(Exception):
     pass
 
 class NotFoundException(WbException):
-    def status(_):
+    def status(self):
         return '404 Not Found'
 
 # Exceptions that effect a specific capture and result in a retry
 class CaptureException(WbException):
-    def status(_):
+    def status(self):
         return '500 Internal Server Error'
 
 class InternalRedirect(WbException):
diff --git a/test_config.yaml b/test_config.yaml
index 38a15f37..04dfee37 100644
--- a/test_config.yaml
+++ b/test_config.yaml
@@ -93,3 +93,6 @@ enable_cdx_api: true
 # optional reporter callback func
 # if set, called with request and cdx object
 reporter_func: pywb.run-tests.print_reporter
+
+# custom rules for domain specific matching
+#domain_specific_rules: rules.yaml
diff --git a/tests/test_integration.py b/tests/test_integration.py
index e639163b..59b4fc36 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -50,6 +50,13 @@ class TestWb:
         # 1 Capture (filtered) + header
         assert len(resp.html.find_all('tr')) == 2
 
+    def test_calendar_query_fuzzy_match(self):
+        # fuzzy match removing _= according to standard rules.yaml
+        resp = self.testapp.get('/pywb/*/http://www.iana.org/_css/2013.1/screen.css?_=3141592653')
+        self._assert_basic_html(resp)
+        # 17 Captures + header
+        assert len(resp.html.find_all('tr')) == 18
+
     def test_cdx_query(self):
         resp = self.testapp.get('/pywb/cdx_/*/http://www.iana.org/')
         self._assert_basic_text(resp)