diff --git a/webagg/aggregator.py b/webagg/aggregator.py index 0f148492..cb7cf10a 100644 --- a/webagg/aggregator.py +++ b/webagg/aggregator.py @@ -221,24 +221,53 @@ class BaseDirectoryIndexSource(BaseAggregator): def _load_files(self, glob_dir): for the_dir in glob.iglob(glob_dir): - for name in os.listdir(the_dir): - filename = os.path.join(the_dir, name) + for result in self._load_files_single_dir(the_dir): + yield result - if filename.endswith(self.CDX_EXT): - print('Adding ' + filename) - rel_path = os.path.relpath(the_dir, self.base_prefix) - if rel_path == '.': - full_name = name - else: - full_name = rel_path + '/' + name + def _load_files_single_dir(self, the_dir): + for name in os.listdir(the_dir): + filename = os.path.join(the_dir, name) - yield full_name, FileIndexSource(filename) + if filename.endswith(self.CDX_EXT): + print('Adding ' + filename) + rel_path = os.path.relpath(the_dir, self.base_prefix) + if rel_path == '.': + full_name = name + else: + full_name = rel_path + '/' + name + + yield full_name, FileIndexSource(filename) def __str__(self): return 'file_dir' +#============================================================================= class DirectoryIndexSource(SeqAggMixin, BaseDirectoryIndexSource): pass +#============================================================================= +class CacheDirectoryIndexSource(DirectoryIndexSource): + def __init__(self, *args, **kwargs): + super(CacheDirectoryIndexSource, self).__init__(*args, **kwargs) + self.cached_file_list = {} + + def _load_files_single_dir(self, the_dir): + try: + stat = os.stat(the_dir) + except Exception as e: + stat = 0 + + result = self.cached_file_list.get(the_dir) + + if result: + last_stat, files = result + if stat and last_stat == stat: + print('Dir {0} unchanged'.format(the_dir)) + return files + + files = super(CacheDirectoryIndexSource, self)._load_files_single_dir(the_dir) + files = list(files) + self.cached_file_list[the_dir] = (stat, files) + return files diff --git a/webagg/responseloader.py b/webagg/responseloader.py index 4e7aeaf3..94a1f153 100644 --- a/webagg/responseloader.py +++ b/webagg/responseloader.py @@ -254,7 +254,7 @@ class LiveWebLoader(BaseLoader): try: fp = upstream_res.raw._fp.fp - if hasattr(fp, 'raw'): + if hasattr(fp, 'raw'): #pragma: no cover fp = fp.raw remote_ip = fp._sock.getpeername()[0] except: #pragma: no cover diff --git a/webagg/test/test_dir_agg.py b/webagg/test/test_dir_agg.py index b55d3755..0da78bf3 100644 --- a/webagg/test/test_dir_agg.py +++ b/webagg/test/test_dir_agg.py @@ -7,7 +7,10 @@ from .testutils import to_path, to_json_list, TempDirTests, BaseTestClass from mock import patch -from webagg.aggregator import DirectoryIndexSource, SimpleAggregator +import time + +from webagg.aggregator import DirectoryIndexSource, CacheDirectoryIndexSource +from webagg.aggregator import SimpleAggregator from webagg.indexsource import MementoIndexSource @@ -44,6 +47,7 @@ class TestDirAgg(TempDirTests, BaseTestClass): fh.write('foo') cls.dir_loader = DirectoryIndexSource(dir_prefix, dir_path) + cls.cache_dir_loader = CacheDirectoryIndexSource(dir_prefix, dir_path) def test_agg_no_coll_set(self): res, errs = self.dir_loader(dict(url='example.com/')) @@ -188,3 +192,25 @@ class TestDirAgg(TempDirTests, BaseTestClass): + def test_cache_dir_sources_1(self): + exp = {'sources': {'colls/A/indexes/example.cdxj': 'file', + 'colls/B/indexes/iana.cdxj': 'file', + 'colls/C/indexes/dupes.cdxj': 'file'} + } + + res = self.cache_dir_loader.get_source_list({'url': 'example.com/', 'param.coll': '*'}) + assert(res == exp) + + res = self.cache_dir_loader.get_source_list({'url': 'example.com/', 'param.coll': '*'}) + assert(res == exp) + + new_file = os.path.join(self.root_dir, 'colls/C/indexes/empty.cdxj') + + with open(new_file, 'a') as fh: + os.utime(new_file) + + res = self.cache_dir_loader.get_source_list({'url': 'example.com/', 'param.coll': '*'}) + + # New File Included + exp['sources']['colls/C/indexes/empty.cdxj'] = 'file' + assert(res == exp) diff --git a/webagg/test/test_upstream.py b/webagg/test/test_upstream.py index 037b62e9..cd107811 100644 --- a/webagg/test/test_upstream.py +++ b/webagg/test/test_upstream.py @@ -36,7 +36,7 @@ class TestUpstream(LiveServerTests, BaseTestClass): def test_live_paths(self): res = requests.get(self.base_url + '/') - assert set(res.json().keys()) == {'/live/postreq', '/live'} + assert set(res.json().keys()) == {'/live/postreq', '/live', '/replay/postreq', '/replay'} def test_upstream_paths(self): res = self.testapp.get('/') diff --git a/webagg/test/testutils.py b/webagg/test/testutils.py index 4c5c42b6..c46e17f1 100644 --- a/webagg/test/testutils.py +++ b/webagg/test/testutils.py @@ -7,7 +7,7 @@ from multiprocessing import Process from wsgiref.simple_server import make_server -from webagg.aggregator import SimpleAggregator +from webagg.aggregator import SimpleAggregator, CacheDirectoryIndexSource from webagg.app import ResAggApp from webagg.handlers import DefaultResourceHandler from webagg.indexsource import LiveIndexSource @@ -66,6 +66,12 @@ class LiveServerTests(object): {'live': LiveIndexSource()}) ) ) + app.add_route('/replay', + DefaultResourceHandler(SimpleAggregator( + {'replay': CacheDirectoryIndexSource('./testdata/')}), + './testdata/' + ) + ) return app.application @classmethod