seperate iter_sources from list_sources api

all errors returned as json block with error msg tests for not found, invalid errors
2025-03-24 06:59:52 +01:00 · 2016-02-29 12:34:06 -08:00 · 2016-02-29 12:34:06 -08:00 · 008e5284b1
commit 008e5284b1
parent 68090d00c1
12 changed files with 304 additions and 145 deletions
--- a/rezag/aggindexsource.py
+++ b/rezag/aggindexsource.py
@ -63,7 +63,6 @@ class BaseAggregator(object):
        try:
            _src_params = all_params['_all_src_params'].get(name)
            all_params['_src_params'] = _src_params
            cdx_iter = source.load_index(all_params)
        except NotFoundException as nf:
            print('Not found in ' + name)
@ -89,15 +88,21 @@ class BaseAggregator(object):
        return cdx_iter
-    def _on_source_error(self, name):
+    def _on_source_error(self, name):  #pragma: no cover
        pass
    def _load_all(self, params):  #pragma: no cover
        raise NotImplemented()
-    def get_sources(self, params):  #pragma: no cover
+    def _iter_sources(self, params):  #pragma: no cover
        raise NotImplemented()
    def get_source_list(self, params):
        srcs = self._iter_sources(params)
        result = [(name, str(value)) for name, value in srcs]
        result = {'sources': dict(result)}
        return result
 #=============================================================================
 class BaseSourceListAggregator(BaseAggregator):
@ -107,7 +112,7 @@ class BaseSourceListAggregator(BaseAggregator):
    def get_all_sources(self, params):
        return self.sources
-    def get_sources(self, params):
+    def _iter_sources(self, params):
        sources = self.get_all_sources(params)
        srcs_list = params.get('sources')
        if not srcs_list:
@ -125,7 +130,7 @@ class SeqAggMixin(object):
    def _load_all(self, params):
-        sources = list(self.get_sources(params))
+        sources = list(self._iter_sources(params))
        return list([self.load_child_source(name, source, params)
                     for name, source in sources])
@ -160,8 +165,8 @@ class TimeoutMixin(object):
        return False
-    def get_sources(self, params):
+    def _iter_sources(self, params):
-        sources = super(TimeoutMixin, self).get_sources(params)
+        sources = super(TimeoutMixin, self)._iter_sources(params)
        for name, source in sources:
            if not self.is_timed_out(name):
                yield name, source
@ -185,7 +190,7 @@ class GeventMixin(object):
    def _load_all(self, params):
        params['_timeout'] = self.timeout
-        sources = list(self.get_sources(params))
+        sources = list(self._iter_sources(params))
        def do_spawn(name, source):
            return self.pool.spawn(self.load_child_source, name, source, params)
@ -223,7 +228,7 @@ class ConcurrentMixin(object):
    def _load_all(self, params):
        params['_timeout'] = self.timeout
-        sources = list(self.get_sources(params))
+        sources = list(self._iter_sources(params))
        with self.pool_class(max_workers=self.size) as executor:
            def do_spawn(name, source):
@ -257,7 +262,8 @@ class BaseDirectoryIndexAggregator(BaseAggregator):
        self.base_prefix = base_prefix
        self.base_dir = base_dir
-    def get_sources(self, params):
+    def _iter_sources(self, params):
        self._set_src_params(params)
        # see if specific params (when part of another agg)
        src_params = params.get('_src_params')
        if not src_params:
@ -270,7 +276,6 @@ class BaseDirectoryIndexAggregator(BaseAggregator):
            the_dir = self.base_dir
        the_dir = os.path.join(self.base_prefix, the_dir)
        try:
            sources = list(self._load_files(the_dir))
        except Exception:
@ -290,6 +295,10 @@ class BaseDirectoryIndexAggregator(BaseAggregator):
                        rel_path = ''
                    yield rel_path, FileIndexSource(filename)
    def __str__(self):
        return 'file_dir'
 class DirectoryIndexAggregator(SeqAggMixin, BaseDirectoryIndexAggregator):
    pass
--- a/rezag/app.py
+++ b/rezag/app.py
@ -1,31 +1,50 @@
-from rezag.inputrequest import WSGIInputRequest, POSTInputRequest
+from rezag.inputrequest import DirectWSGIInputRequest, POSTInputRequest
-from bottle import route, request, response, default_app
+from bottle import route, request, response, default_app, abort
 from pywb.utils.wbexception import WbException
 import traceback
 import json
 def err_handler(exc):
    response.status = exc.status_code
    response.content_type = 'application/json'
    return json.dumps({'message': exc.body})
 def wrap_error(func):
    def do_d(*args, **kwargs):
        try:
            return func(*args, **kwargs)
        except WbException as exc:
            if application.debug:
                traceback.print_exc()
            abort(exc.status(), exc.msg)
        except Exception as e:
            if application.debug:
                traceback.print_exc()
            abort(500, 'Internal Error: ' + str(e))
    return do_d
 def add_route(path, handler):
-    def debug(func):
+    @wrap_error
-        def do_d():
+    def direct_input_request(mode=''):
            try:
                return func()
            except Exception:
                import traceback
                traceback.print_exc()
        return do_d
    def direct_input_request():
        params = dict(request.query)
-        params['_input_req'] = WSGIInputRequest(request.environ)
+        params['_input_req'] = DirectWSGIInputRequest(request.environ)
        return handler(params)
-    def post_fullrequest():
+    @wrap_error
    def post_fullrequest(mode=''):
        params = dict(request.query)
        params['_input_req'] = POSTInputRequest(request.environ)
        return handler(params)
-    route(path + '/postreq', method=['POST'], callback=debug(post_fullrequest))
+    route(path + '/postreq', method=['POST'], callback=post_fullrequest)
-    route(path, method=['ANY'], callback=debug(direct_input_request))
+    route(path, method=['ANY'], callback=direct_input_request)
 application = default_app()
 application.default_error_handler = err_handler
--- a/rezag/handlers.py
+++ b/rezag/handlers.py
@ -1,12 +1,13 @@
-from rezag.responseloader import  WARCPathHandler, LiveWebHandler
+from rezag.responseloader import  WARCPathLoader, LiveWebLoader
 from rezag.utils import MementoUtils
-from pywb.warc.recordloader import ArchiveLoadFailed
+from pywb.utils.wbexception import BadRequestException, WbException
 from pywb.utils.wbexception import NotFoundException
 from bottle import response
 #=============================================================================
 def to_cdxj(cdx_iter, fields):
-    response.headers['Content-Type'] = 'text/x-cdxj'
+    response.headers['Content-Type'] = 'application/x-cdxj'
    return [cdx.to_cdxj(fields) for cdx in cdx_iter]
 def to_json(cdx_iter, fields):
@ -37,26 +38,36 @@ class IndexHandler(object):
        self.index_source = index_source
        self.opts = opts or {}
-    def __call__(self, params):
+    def get_supported_modes(self):
-        if params.get('mode') == 'sources':
+        return dict(modes=['list_modes', 'list_sources', 'index'])
-            srcs = self.index_source.get_sources(params)
+
-            result = [(name, str(value)) for name, value in srcs]
+    def _load_index_source(self, params):
-            result = {'sources': dict(result)}
+        url = params.get('url')
-            return result
+        if not url:
            raise BadRequestException('The "url" param is required')
        input_req = params.get('_input_req')
        if input_req:
-            params['alt_url'] = input_req.include_post_query(params.get('url'))
+            params['alt_url'] = input_req.include_post_query(url)
-        cdx_iter = self.index_source(params)
+        return self.index_source(params)
    def __call__(self, params):
        mode = params.get('mode', 'index')
        if mode == 'list_sources':
            return self.index_source.get_source_list(params)
        if mode == 'list_modes' or mode != 'index':
            return self.get_supported_modes()
        output = params.get('output', self.DEF_OUTPUT)
        fields = params.get('fields')
        handler = self.OUTPUTS.get(output)
        if not handler:
-            handler = self.OUTPUTS[self.DEF_OUTPUT]
+            raise BadRequestException('output={0} not supported'.format(output))
        cdx_iter = self._load_index_source(params)
        res = handler(cdx_iter, fields)
        return res
@ -67,57 +78,59 @@ class ResourceHandler(IndexHandler):
        super(ResourceHandler, self).__init__(index_source)
        self.resource_loaders = resource_loaders
    def get_supported_modes(self):
        res = super(ResourceHandler, self).get_supported_modes()
        res['modes'].append('resource')
        return res
    def __call__(self, params):
        if params.get('mode', 'resource') != 'resource':
            return super(ResourceHandler, self).__call__(params)
-        input_req = params.get('_input_req')
+        cdx_iter = self._load_index_source(params)
-        if input_req:
+        last_exc = None
            params['alt_url'] = input_req.include_post_query(params.get('url'))
        cdx_iter = self.index_source(params)
        any_found = False
        for cdx in cdx_iter:
            any_found = True
            for loader in self.resource_loaders:
                try:
                    resp = loader(cdx, params)
-                    if resp:
+                    if resp is not None:
                        return resp
-                except ArchiveLoadFailed as e:
+                except WbException as e:
-                    print(e)
+                    last_exc = e
                    pass
-        if any_found:
+        if last_exc:
-            raise ArchiveLoadFailed('Resource Found, could not be Loaded')
+            raise last_exc
            #raise ArchiveLoadFailed('Resource Found, could not be Loaded')
        else:
-            raise ArchiveLoadFailed('No Resource Found')
+            raise NotFoundException('No Resource Found')
 #=============================================================================
 class DefaultResourceHandler(ResourceHandler):
    def __init__(self, index_source, warc_paths=''):
-        loaders = [WARCPathHandler(warc_paths, index_source),
+        loaders = [WARCPathLoader(warc_paths, index_source),
-                   LiveWebHandler()
+                   LiveWebLoader()
                  ]
        super(DefaultResourceHandler, self).__init__(index_source, loaders)
 #=============================================================================
 class HandlerSeq(object):
-    def __init__(self, loaders):
+    def __init__(self, handlers):
-        self.loaders = loaders
+        self.handlers = handlers
    def __call__(self, params):
-        for loader in self.loaders:
+        last_exc = None
        for handler in self.handlers:
            try:
-                res = loader(params)
+                res = handler(params)
-                if res:
+                if res is not None:
                    return res
-            except ArchiveLoadFailed:
+            except WbException as e:
-                pass
+                last_exc = e
-        raise ArchiveLoadFailed('No Resource Found')
+        if last_exc:
            raise last_exc
        else:
            raise NotFoundException('No Resource Found')
--- a/rezag/indexsource.py
+++ b/rezag/indexsource.py
@ -14,6 +14,9 @@ from rezag.liverec import patched_requests as requests
 from rezag.utils import MementoUtils
 WAYBACK_ORIG_SUFFIX = '{timestamp}id_/{url}'
 #=============================================================================
 class BaseIndexSource(object):
    def load_index(self, params):  #pragma: no cover
@ -22,10 +25,10 @@ class BaseIndexSource(object):
    @staticmethod
    def res_template(template, params):
        src_params = params.get('_src_params')
-        if src_params:
+        if not src_params:
-            res = template.format(**src_params)
+            res = template.format(url=params['url'])
        else:
-            res = template
+            res = template.format(url=params['url'], **src_params)
        return res
@ -59,7 +62,7 @@ class RemoteIndexSource(BaseIndexSource):
    def load_index(self, params):
        api_url = self.res_template(self.api_url_template, params)
-        api_url += '?url=' + params['url']
+        print('API URL', api_url)
        r = requests.get(api_url, timeout=params.get('_timeout'))
        if r.status_code >= 400:
            raise NotFoundException(api_url)
@ -169,7 +172,6 @@ class MementoIndexSource(BaseIndexSource):
    def get_timegate_links(self, params, closest):
        url = self.res_template(self.timegate_url, params)
        url += params['url']
        accept_dt = timestamp_to_http_date(closest)
        res = requests.head(url, headers={'Accept-Datetime': accept_dt})
        if res.status_code >= 400:
@ -179,7 +181,6 @@ class MementoIndexSource(BaseIndexSource):
    def get_timemap_links(self, params):
        url = self.res_template(self.timemap_url, params)
        url += params['url']
        res = requests.get(url, timeout=params.get('_timeout'))
        if res.status_code >= 400:
            raise NotFoundException(url)
@ -200,9 +201,9 @@ class MementoIndexSource(BaseIndexSource):
    @staticmethod
    def from_timegate_url(timegate_url, path='link'):
-        return MementoIndexSource(timegate_url,
+        return MementoIndexSource(timegate_url + '{url}',
-                                  timegate_url + 'timemap/' + path + '/',
+                                  timegate_url + 'timemap/' + path + '/{url}',
-                                  timegate_url + '{timestamp}id_/{url}')
+                                  timegate_url + WAYBACK_ORIG_SUFFIX)
    def __str__(self):
        return 'memento'
--- a/rezag/inputrequest.py
+++ b/rezag/inputrequest.py
@ -1,4 +1,3 @@
 from pywb.utils.loaders import extract_client_cookie
 from pywb.utils.loaders import extract_post_query, append_post_query
 from pywb.utils.loaders import LimitReader
 from pywb.utils.statusandheaders import StatusAndHeadersParser
@ -9,7 +8,7 @@ from io import BytesIO
 #=============================================================================
-class WSGIInputRequest(object):
+class DirectWSGIInputRequest(object):
    def __init__(self, env):
        self.env = env
@ -20,26 +19,10 @@ class WSGIInputRequest(object):
        headers = {}
        for name, value in iteritems(self.env):
            # will be set by requests to match actual host
            if name == 'HTTP_HOST':
                #name = 'Host'
                #value = splits.netloc
                # will be set automatically
                continue
            #elif name == 'HTTP_ORIGIN':
            #    name = 'Origin'
            #    value = (splits.scheme + '://' + splits.netloc)
            elif name == 'HTTP_X_CSRFTOKEN':
                name = 'X-CSRFToken'
                cookie_val = extract_client_cookie(env, 'csrftoken')
                if cookie_val:
                    value = cookie_val
            #elif name == 'HTTP_X_FORWARDED_PROTO':
            #    name = 'X-Forwarded-Proto'
            #    value = splits.scheme
            elif name.startswith('HTTP_'):
                name = name[5:].title().replace('_', '-')
@ -55,10 +38,7 @@ class WSGIInputRequest(object):
        return headers
    def get_req_body(self):
-        input_ = self.env.get('wsgi.input')
+        input_ = self.env['wsgi.input']
        if not input_:
            return None
        len_ = self._get_content_length()
        enc = self._get_header('Transfer-Encoding')
@ -70,9 +50,6 @@ class WSGIInputRequest(object):
            data = None
        return data
        #buf = data.read().decode('utf-8')
        #print(buf)
        #return StringIO(buf)
    def _get_content_type(self):
        return self.env.get('CONTENT_TYPE')
@ -105,7 +82,7 @@ class WSGIInputRequest(object):
 #=============================================================================
-class POSTInputRequest(WSGIInputRequest):
+class POSTInputRequest(DirectWSGIInputRequest):
    def __init__(self, env):
        self.env = env
--- a/rezag/responseloader.py
+++ b/rezag/responseloader.py
@ -2,6 +2,7 @@ from rezag.liverec import BaseRecorder
 from rezag.liverec import request as remote_request
 from pywb.utils.timeutils import timestamp_to_datetime, datetime_to_http_date
 from pywb.utils.wbexception import LiveResourceException
 from pywb.warc.resolvingloader import ResolvingLoader
 from io import BytesIO
@ -29,7 +30,7 @@ def incr_reader(stream, header=None, size=8192):
 #=============================================================================
-class WARCPathHandler(object):
+class WARCPathLoader(object):
    def __init__(self, paths, cdx_source):
        self.paths = paths
        if isinstance(paths, str):
@ -108,7 +109,7 @@ class HeaderRecorder(BaseRecorder):
 #=============================================================================
-class LiveWebHandler(object):
+class LiveWebLoader(object):
    SKIP_HEADERS = (b'link',
                    b'memento-datetime',
                    b'content-location',
@ -140,14 +141,17 @@ class LiveWebHandler(object):
        method = input_req.get_req_method()
        data = input_req.get_req_body()
-        upstream_res = remote_request(url=load_url,
+        try:
-                                      method=method,
+            upstream_res = remote_request(url=load_url,
-                                      recorder=recorder,
+                                          method=method,
-                                      stream=True,
+                                          recorder=recorder,
-                                      allow_redirects=False,
+                                          stream=True,
-                                      headers=req_headers,
+                                          allow_redirects=False,
-                                      data=data,
+                                          headers=req_headers,
-                                      timeout=params.get('_timeout'))
+                                          data=data,
                                          timeout=params.get('_timeout'))
        except Exception:
            raise LiveResourceException(load_url)
        resp_headers = recorder.get_header()
@ -175,7 +179,7 @@ class LiveWebHandler(object):
        return dt.strftime('%Y-%m-%dT%H:%M:%SZ')
    @staticmethod
-    def _make_warc_id(id_=None):
+    def _make_warc_id(id_=None):  #pragma: no cover
        if not id_:
            id_ = uuid.uuid1()
        return '<urn:uuid:{0}>'.format(id_)
--- a/rezag/utils.py
+++ b/rezag/utils.py
@ -77,6 +77,7 @@ class MementoUtils(object):
            from_date = timestamp_to_http_date(first_cdx['timestamp'])
        except StopIteration:
            first_cdx = None
            return
        # first memento link
        yield MementoUtils.make_timemap_memento_link(first_cdx, datetime=from_date)
@ -91,4 +92,4 @@ class MementoUtils(object):
        # last memento link, if any
        if prev_cdx:
-            yield MementoUtils.make_timemap_memento_link(prev_cdx, end='')
+            yield MementoUtils.make_timemap_memento_link(prev_cdx, end='\n')
--- a/setup.py
+++ b/setup.py
@ -32,8 +32,11 @@ setup(
        'rezag',
        ],
    install_requires=[
-        'pywb',
+        'pywb==1.0b',
        ],
    dependency_links=[
        'git+https://github.com/ikreymer/pywb.git@py3#egg=pywb-1.0b-py3',
    ],
    zip_safe=True,
    entry_points="""
        [console_scripts]
--- a/test/test_dir_agg.py
+++ b/test/test_dir_agg.py
@ -33,6 +33,9 @@ def setup_module():
    shutil.copy(to_path('testdata/iana.cdxj'), coll_B)
    shutil.copy(to_path('testdata/dupes.cdxj'), coll_C)
    with open(to_path(root_dir) + 'somefile', 'w') as fh:
        fh.write('foo')
    global dir_loader
    dir_loader = DirectoryIndexAggregator(dir_prefix, dir_path)
@ -121,7 +124,7 @@ def test_agg_dir_and_memento():
               'local': dir_loader}
    agg_source = SimpleAggregator(sources)
-    res = agg_source({'url': 'example.com/', 'param.coll': '*', 'closest': '20100512', 'limit': 6})
+    res = agg_source({'url': 'example.com/', 'param.local.coll': '*', 'closest': '20100512', 'limit': 6})
    exp = [
        {'source': 'ia', 'timestamp': '20100513052358', 'load_url': 'http://web.archive.org/web/20100513052358id_/http://example.com/'},
@ -144,7 +147,7 @@ def test_agg_no_dir_1():
 def test_agg_no_dir_2():
-    loader = DirectoryIndexAggregator(root_dir, 'no_such')
+    loader = DirectoryIndexAggregator(root_dir, '')
    res = loader({'url': 'example.com/', 'param.coll': 'X'})
    exp = []
@ -152,4 +155,31 @@ def test_agg_no_dir_2():
    assert(to_json_list(res) == exp)
 def test_agg_dir_sources_1():
    res = dir_loader.get_source_list({'url': 'example.com/', 'param.coll': '*'})
    exp = {'sources': {'colls/A/indexes': 'file',
                       'colls/B/indexes': 'file',
                       'colls/C/indexes': 'file'}
          }
    assert(res == exp)
 def test_agg_dir_sources_2():
    res = dir_loader.get_source_list({'url': 'example.com/', 'param.coll': '[A,C]'})
    exp = {'sources': {'colls/A/indexes': 'file',
                       'colls/C/indexes': 'file'}
          }
    assert(res == exp)
 def test_agg_dir_sources_single_dir():
    loader = DirectoryIndexAggregator('testdata/', '')
    res = loader.get_source_list({'url': 'example.com/'})
    exp = {'sources': {}}
    assert(res == exp)
--- a/test/test_handlers.py
+++ b/test/test_handlers.py
@ -42,13 +42,17 @@ def setup_module(self):
    source3 = SimpleAggregator({'example': FileIndexSource(to_path('testdata/example.cdxj'))})
    handler3 = DefaultResourceHandler(source3, to_path('testdata/'))
    add_route('/fallback', HandlerSeq([handler3,
                                       handler2,
                                       live_handler]))
    add_route('/seq', HandlerSeq([handler3,
                                  handler2]))
-    bottle.debug = True
+    add_route('/empty', HandlerSeq([]))
    add_route('/invalid', HandlerSeq(['foo']))
    application.debug = True
    global testapp
    testapp = webtest.TestApp(application)
@ -61,8 +65,23 @@ class TestResAgg(object):
    def setup(self):
        self.testapp = testapp
    def test_list_handlers(self):
        resp = self.testapp.get('/many?mode=list_modes')
        assert resp.json == {'modes': ['list_modes', 'list_sources', 'index', 'resource']}
        resp = self.testapp.get('/many?mode=other')
        assert resp.json == {'modes': ['list_modes', 'list_sources', 'index', 'resource']}
        # defaults to resource, must specify url
        resp = self.testapp.get('/many', status=400)
        assert resp.json == {'message': 'The "url" param is required'}
    def test_list_sources(self):
        resp = self.testapp.get('/many?mode=list_sources')
        assert resp.json == {'sources': {'local': 'file_dir', 'ia': 'memento', 'rhiz': 'memento', 'live': 'live'}}
    def test_live_index(self):
-        resp = self.testapp.get('/live?url=http://httpbin.org/get&mode=index&output=json')
+        resp = self.testapp.get('/live?mode=index&url=http://httpbin.org/get&output=json')
        resp.charset = 'utf-8'
        res = to_json_list(resp.text)
@ -71,7 +90,8 @@ class TestResAgg(object):
                        'load_url': 'http://httpbin.org/get', 'source': 'live', 'timestamp': '2016'}])
    def test_live_resource(self):
-        resp = self.testapp.get('/live?url=http://httpbin.org/get?foo=bar&mode=resource')
+        headers = {'foo': 'bar'}
        resp = self.testapp.get('/live?url=http://httpbin.org/get?foo=bar', headers=headers)
        assert resp.headers['WARC-Coll'] == 'live'
        assert resp.headers['WARC-Target-URI'] == 'http://httpbin.org/get?foo=bar'
@ -82,7 +102,7 @@ class TestResAgg(object):
    def test_live_post_resource(self):
-        resp = self.testapp.post('/live?url=http://httpbin.org/post&mode=resource',
+        resp = self.testapp.post('/live?url=http://httpbin.org/post',
                                 OrderedDict([('foo', 'bar')]))
        assert resp.headers['WARC-Coll'] == 'live'
@ -204,6 +224,11 @@ foo=bar&test=abc"""
        assert resp.headers['WARC-Target-URI'] == 'http://example.com/'
        assert b'HTTP/1.1 200 OK' in resp.body
    def test_error_fallback_live_not_found(self):
        resp = self.testapp.get('/fallback?url=http://invalid.url-not-found', status=400)
        assert resp.json == {'message': 'http://invalid.url-not-found'}
    def test_agg_local_revisit(self):
        resp = self.testapp.get('/many?url=http://www.example.com/&closest=20140127171251&sources=local')
@ -214,3 +239,24 @@ foo=bar&test=abc"""
        assert resp.headers['WARC-Refers-To-Date'] == '2014-01-27T17:12:00Z'
        assert b'HTTP/1.1 200 OK' in resp.body
        assert b'<!doctype html>' in resp.body
    def test_error_invalid_index_output(self):
        resp = self.testapp.get('/live?mode=index&url=http://httpbin.org/get&output=foobar', status=400)
        assert resp.json == {'message': 'output=foobar not supported'}
    def test_error_local_not_found(self):
        resp = self.testapp.get('/many?url=http://not-found.error/&sources=local', status=404)
        assert resp.json == {'message': 'No Resource Found'}
    def test_error_empty(self):
        resp = self.testapp.get('/empty?url=http://example.com/', status=404)
        assert resp.json == {'message': 'No Resource Found'}
    def test_error_invalid(self):
        resp = self.testapp.get('/invalid?url=http://example.com/', status=500)
        assert resp.json['message'].startswith('Internal Error')
--- a/test/test_indexsource.py
+++ b/test/test_indexsource.py
@ -32,16 +32,20 @@ local_sources = [
 remote_sources = [
-    RemoteIndexSource('http://webenact.rhizome.org/all-cdx',
+    RemoteIndexSource('http://webenact.rhizome.org/all-cdx?url={url}',
                      'http://webenact.rhizome.org/all/{timestamp}id_/{url}'),
-    MementoIndexSource('http://webenact.rhizome.org/all/',
+    MementoIndexSource('http://webenact.rhizome.org/all/{url}',
-                       'http://webenact.rhizome.org/all/timemap/*/',
+                       'http://webenact.rhizome.org/all/timemap/*/{url}',
                       'http://webenact.rhizome.org/all/{timestamp}id_/{url}')
 ]
 ait_source = RemoteIndexSource('http://wayback.archive-it.org/cdx?url={url}',
                               'http://wayback.archive-it.org/all/{timestamp}id_/{url}')
 def query_single_source(source, params):
    string = str(source)
    return SimpleAggregator({'source': source})(params)
@ -182,4 +186,22 @@ def test_file_not_found():
 def test_ait_filters():
    ait_source = RemoteIndexSource('http://wayback.archive-it.org/cdx/search/cdx?url={url}&filter=filename:ARCHIVEIT-({colls})-.*',
                                   'http://wayback.archive-it.org/all/{timestamp}id_/{url}')
    cdxlist = query_single_source(ait_source, {'url': 'http://iana.org/', 'param.source.colls': '5610|933'})
    filenames = [cdx['filename'] for cdx in cdxlist]
    prefix = ('ARCHIVEIT-5610-', 'ARCHIVEIT-933-')
    assert(all([x.startswith(prefix) for x in filenames]))
    cdxlist = query_single_source(ait_source, {'url': 'http://iana.org/', 'param.source.colls': '1883|366|905'})
    filenames = [cdx['filename'] for cdx in cdxlist]
    prefix = ('ARCHIVEIT-1883-', 'ARCHIVEIT-366-', 'ARCHIVEIT-905-')
    assert(all([x.startswith(prefix) for x in filenames]))
--- a/test/test_memento_agg.py
+++ b/test/test_memento_agg.py
@ -27,10 +27,11 @@ aggs = {'simple': SimpleAggregator(sources),
        'processes': ThreadedTimeoutAggregator(sources, timeout=5.0, use_processes=True),
       }
-#@pytest.mark.parametrize("agg", aggs, ids=["simple", "gevent_timeout"])
+#def pytest_generate_tests(metafunc):
-def pytest_generate_tests(metafunc):
+#    metafunc.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
    metafunc.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
 def test_mem_agg_index_1(agg):
    url = 'http://iana.org/'
    res = agg(dict(url=url, closest='20140126000000', limit=5))
@ -46,6 +47,7 @@ def test_mem_agg_index_1(agg):
    assert(json_list(res) == exp)
@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
 def test_mem_agg_index_2(agg):
    url = 'http://example.com/'
    res = agg(dict(url=url, closest='20100512', limit=6))
@ -60,6 +62,7 @@ def test_mem_agg_index_2(agg):
    assert(json_list(res) == exp)
@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
 def test_mem_agg_index_3(agg):
    url = 'http://vvork.com/'
    res = agg(dict(url=url, closest='20141001', limit=5))
@ -73,6 +76,7 @@ def test_mem_agg_index_3(agg):
    assert(json_list(res) == exp)
@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
 def test_mem_agg_index_4(agg):
    url = 'http://vvork.com/'
    res = agg(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait'))
@ -83,10 +87,11 @@ def test_mem_agg_index_4(agg):
    assert(json_list(res) == exp)
-def test_handler_output_cdxj(agg):
+def test_handler_output_cdxj():
-    loader = IndexHandler(agg)
+    agg = GeventTimeoutAggregator(sources, timeout=5.0)
    handler = IndexHandler(agg)
    url = 'http://vvork.com/'
-    res = loader(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait'))
+    res = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait'))
    exp = """\
 com,vvork)/ 20141006184357 {"url": "http://www.vvork.com/", "mem_rel": "memento", "memento_url": "http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"}
@ -96,10 +101,11 @@ com,vvork)/ 20131004231540 {"url": "http://vvork.com/", "mem_rel": "last memento
    assert(''.join(res) == exp)
-def test_handler_output_json(agg):
+def test_handler_output_json():
-    loader = IndexHandler(agg)
+    agg = GeventTimeoutAggregator(sources, timeout=5.0)
    handler = IndexHandler(agg)
    url = 'http://vvork.com/'
-    res = loader(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='json'))
+    res = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='json'))
    exp = """\
 {"urlkey": "com,vvork)/", "timestamp": "20141006184357", "url": "http://www.vvork.com/", "mem_rel": "memento", "memento_url": "http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"}
@ -109,22 +115,50 @@ def test_handler_output_json(agg):
    assert(''.join(res) == exp)
-def test_handler_output_link(agg):
+def test_handler_output_link():
-    loader = IndexHandler(agg)
+    agg = GeventTimeoutAggregator(sources, timeout=5.0)
    handler = IndexHandler(agg)
    url = 'http://vvork.com/'
-    res = loader(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='link'))
+    res = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='link'))
    exp = """\
 <http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/>; rel="memento"; datetime="Mon, 06 Oct 2014 18:43:57 GMT"; src="rhiz",
-<http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/>; rel="memento"; datetime="Fri, 04 Oct 2013 23:15:40 GMT"; src="ait"\
+<http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/>; rel="memento"; datetime="Fri, 04 Oct 2013 23:15:40 GMT"; src="ait"
 """
    assert(''.join(res) == exp)
-def test_handler_output_text(agg):
+def test_handler_output_link_2():
-    loader = IndexHandler(agg)
+    agg = GeventTimeoutAggregator(sources, timeout=5.0)
    handler = IndexHandler(agg)
    url = 'http://iana.org/'
    res = handler(dict(url=url, closest='20140126000000', limit=5, output='link'))
    exp = """\
 <http://web.archive.org/web/20140126093743id_/http://iana.org/>; rel="memento"; datetime="Sun, 26 Jan 2014 09:37:43 GMT"; src="ia",
 <filename://iana.warc.gz>; rel="memento"; datetime="Sun, 26 Jan 2014 20:06:24 GMT"; src="local",
 <http://web.archive.org/web/20140123034755id_/http://iana.org/>; rel="memento"; datetime="Thu, 23 Jan 2014 03:47:55 GMT"; src="ia",
 <http://web.archive.org/web/20140129175203id_/http://iana.org/>; rel="memento"; datetime="Wed, 29 Jan 2014 17:52:03 GMT"; src="ia",
 <http://wayback.archive-it.org/all/20140107040552id_/http://iana.org/>; rel="memento"; datetime="Tue, 07 Jan 2014 04:05:52 GMT"; src="ait"
 """
    assert(''.join(res) == exp)
 def test_handler_output_link_3():
    agg = GeventTimeoutAggregator(sources, timeout=5.0)
    handler = IndexHandler(agg)
    url = 'http://foo.bar.non-existent'
    res = handler(dict(url=url, closest='20140126000000', limit=5, output='link'))
    exp = ''
    assert(''.join(res) == exp)
 def test_handler_output_text():
    agg = GeventTimeoutAggregator(sources, timeout=5.0)
    handler = IndexHandler(agg)
    url = 'http://vvork.com/'
-    res = loader(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='text'))
+    res = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='text'))
    exp = """\
 com,vvork)/ 20141006184357 http://www.vvork.com/ memento http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/ http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/ rhiz
@ -133,9 +167,10 @@ com,vvork)/ 20131004231540 http://vvork.com/ last memento http://wayback.archive
    assert(''.join(res) == exp)
-def test_handler_list_sources(agg):
+def test_handler_list_sources():
-    loader = IndexHandler(agg)
+    agg = GeventTimeoutAggregator(sources, timeout=5.0)
-    res = loader(dict(mode='sources'))
+    handler = IndexHandler(agg)
    res = handler(dict(mode='list_sources'))
    assert(res == {'sources': {'bl': 'memento',
                               'ait': 'memento',
@ -143,4 +178,3 @@ def test_handler_list_sources(agg):
                               'rhiz': 'memento',
                               'local': 'file'}})