1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

Windows Build Fixes/Appveyor CI (#225)

windows build fixes: all tests should pass, ci with appveyor
- add appveyor.yml
- path fixes for windows, use os.path.join
- templates_dir: use '/' always for jinja2 paths
- auto colls: ensure chdir before deleting dir
- recorder: ensure warc writer is always closed
- recorder: disable locking in warcwriter on windows for now (read access not avail, shared
lock seems to not be working)
- zipnum: ensure block is closed after read!
- cached dir test: wait before adding file
- tests: adjust timeout tests to allow more leeway in timing
This commit is contained in:
Ilya Kreymer 2017-08-05 17:12:16 -07:00 committed by GitHub
parent a6ab167dd3
commit bcb5bef39d
12 changed files with 110 additions and 50 deletions

28
appveyor.yml Normal file
View File

@ -0,0 +1,28 @@
environment:
global:
CMD_IN_ENV: "cmd /E:ON /V:ON /C obvci_appveyor_python_build_env.cmd"
matrix:
- PYTHON: "C:\\Python27"
- PYTHON: "C:\\Python27-x64"
- PYTHON: "C:\\Python35"
- PYTHON: "C:\\Python35-x64"
- PYTHON: "C:\\Python36"
- PYTHON: "C:\\Python36-x64"
install:
- "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%"
- "pip install --disable-pip-version-check --user --upgrade pip"
- "pip install -U setuptools"
- "pip install coverage pytest-cov coveralls"
- "pip install cffi"
- "pip install pyopenssl"
- "pip install certauth boto youtube-dl"
build_script:
- "python setup.py install"
test_script:
- "python setup.py test"

View File

@ -1,6 +1,5 @@
certauth certauth
youtube-dl youtube-dl
boto boto
uwsgi
git+https://github.com/t0m/pyamf.git@python3 git+https://github.com/t0m/pyamf.git@python3
git+https://github.com/esnme/ultrajson.git git+https://github.com/esnme/ultrajson.git

View File

@ -142,9 +142,10 @@ class FrontEndApp(object):
if not coll or not self.warcserver.root_dir: if not coll or not self.warcserver.root_dir:
return return
environ['pywb.templates_dir'] = os.path.join(self.warcserver.root_dir, # jinja2 template paths always use '/' as separator
coll, environ['pywb.templates_dir'] = '/'.join([self.warcserver.root_dir,
self.templates_dir) coll,
self.templates_dir])
def serve_listing(self, environ): def serve_listing(self, environ):
result = {'fixed': self.warcserver.list_fixed_routes(), result = {'fixed': self.warcserver.list_fixed_routes(),

View File

@ -97,7 +97,8 @@ class MultiFileWARCWriter(BaseWARCWriter):
def _close_file(self, fh): def _close_file(self, fh):
try: try:
portalocker.lock(fh, portalocker.LOCK_UN) if os.name != 'nt':
portalocker.lock(fh, portalocker.LOCK_UN)
fh.close() fh.close()
except Exception as e: except Exception as e:
print(e) print(e)
@ -222,7 +223,8 @@ class MultiFileWARCWriter(BaseWARCWriter):
self.fh_cache.pop(dir_key, None) self.fh_cache.pop(dir_key, None)
elif is_new: elif is_new:
portalocker.lock(out, portalocker.LOCK_EX | portalocker.LOCK_NB) if os.name != 'nt':
portalocker.lock(out, portalocker.LOCK_EX | portalocker.LOCK_NB)
self.fh_cache[dir_key] = (out, filename) self.fh_cache[dir_key] = (out, filename)
def iter_open_files(self): def iter_open_files(self):

View File

@ -66,7 +66,7 @@ class TestRecorder(LiveServerTests, FakeRedisTests, TempDirTests, BaseTestClass)
dedup_index = WritableRedisIndexer(redis_url=redis_url, dedup_index = WritableRedisIndexer(redis_url=redis_url,
file_key_template=file_key_template, file_key_template=file_key_template,
rel_path_template=self.root_dir + '/warcs/', rel_path_template=to_path(self.root_dir + '/warcs/'),
dupe_policy=dupe_policy) dupe_policy=dupe_policy)
return dedup_index return dedup_index
@ -293,11 +293,11 @@ class TestRecorder(LiveServerTests, FakeRedisTests, TempDirTests, BaseTestClass)
assert cdx['urlkey'] == 'org,httpbin)/user-agent' assert cdx['urlkey'] == 'org,httpbin)/user-agent'
assert cdx['mime'] == 'application/json' assert cdx['mime'] == 'application/json'
assert cdx['offset'] == '0' assert cdx['offset'] == '0'
assert cdx['filename'].startswith('USER/COLL/') assert cdx['filename'].startswith(to_path('USER/COLL/'))
assert cdx['filename'].endswith('.warc.gz') assert cdx['filename'].endswith('.warc.gz')
warcs = r.hgetall('USER:COLL:warc') warcs = r.hgetall('USER:COLL:warc')
full_path = self.root_dir + '/warcs/' + cdx['filename'] full_path = to_path(self.root_dir + '/warcs/' + cdx['filename'])
assert warcs == {cdx['filename'].encode('utf-8'): full_path.encode('utf-8')} assert warcs == {cdx['filename'].encode('utf-8'): full_path.encode('utf-8')}
def test_record_param_user_coll_same_dir(self): def test_record_param_user_coll_same_dir(self):
@ -353,7 +353,7 @@ class TestRecorder(LiveServerTests, FakeRedisTests, TempDirTests, BaseTestClass)
assert cdx['urlkey'] == 'org,httpbin)/user-agent' assert cdx['urlkey'] == 'org,httpbin)/user-agent'
assert cdx['mime'] == 'warc/revisit' assert cdx['mime'] == 'warc/revisit'
assert cdx['offset'] == '0' assert cdx['offset'] == '0'
assert cdx['filename'].startswith('USER/COLL/') assert cdx['filename'].startswith(to_path('USER/COLL/'))
assert cdx['filename'].endswith('.warc.gz') assert cdx['filename'].endswith('.warc.gz')
fullwarc = os.path.join(self.root_dir, 'warcs', cdx['filename']) fullwarc = os.path.join(self.root_dir, 'warcs', cdx['filename'])
@ -436,10 +436,13 @@ class TestRecorder(LiveServerTests, FakeRedisTests, TempDirTests, BaseTestClass)
assert os.path.isfile(path) assert os.path.isfile(path)
assert len(writer.fh_cache) == 1 assert len(writer.fh_cache) == 1
writer.close()
assert len(writer.fh_cache) == 0
def test_record_multiple_writes_keep_open(self): def test_record_multiple_writes_keep_open(self):
warc_path = to_path(self.root_dir + '/warcs/FOO/ABC-{hostname}-{timestamp}.warc.gz') warc_path = to_path(self.root_dir + '/warcs/FOO/ABC-{hostname}-{timestamp}.warc.gz')
rel_path = self.root_dir + '/warcs/' rel_path = to_path(self.root_dir + '/warcs/')
dedup_index = self._get_dedup_index(user=False) dedup_index = self._get_dedup_index(user=False)
@ -487,7 +490,7 @@ class TestRecorder(LiveServerTests, FakeRedisTests, TempDirTests, BaseTestClass)
assert len(writer.fh_cache) == 1 assert len(writer.fh_cache) == 1
writer.close_key(self.root_dir + '/warcs/FOO/') writer.close_key(to_path(self.root_dir + '/warcs/FOO/'))
assert len(writer.fh_cache) == 0 assert len(writer.fh_cache) == 0
@ -501,10 +504,13 @@ class TestRecorder(LiveServerTests, FakeRedisTests, TempDirTests, BaseTestClass)
warcs = r.hgetall('FOO:warc') warcs = r.hgetall('FOO:warc')
assert len(warcs) == 2 assert len(warcs) == 2
writer.close()
assert len(writer.fh_cache) == 0
def test_record_multiple_writes_rollover_idle(self): def test_record_multiple_writes_rollover_idle(self):
warc_path = to_path(self.root_dir + '/warcs/GOO/ABC-{hostname}-{timestamp}.warc.gz') warc_path = to_path(self.root_dir + '/warcs/GOO/ABC-{hostname}-{timestamp}.warc.gz')
rel_path = self.root_dir + '/warcs/' rel_path = to_path(self.root_dir + '/warcs/')
dedup_index = self._get_dedup_index(user=False) dedup_index = self._get_dedup_index(user=False)
@ -539,13 +545,16 @@ class TestRecorder(LiveServerTests, FakeRedisTests, TempDirTests, BaseTestClass)
self._test_all_warcs('/warcs/GOO/', 2) self._test_all_warcs('/warcs/GOO/', 2)
writer.close()
assert len(writer.fh_cache) == 0
def test_record_custom_record(self): def test_record_custom_record(self):
dedup_index = self._get_dedup_index(user=False) dedup_index = self._get_dedup_index(user=False)
warc_path = to_path(self.root_dir + '/warcs/meta/meta.warc.gz') warc_path = to_path(self.root_dir + '/warcs/meta/meta.warc.gz')
recorder_app = RecorderApp(self.upstream_url, writer = MultiFileWARCWriter(warc_path, dedup_index=dedup_index)
MultiFileWARCWriter(warc_path, dedup_index=dedup_index)) recorder_app = RecorderApp(self.upstream_url, writer)
req_url = '/live/resource/postreq?url=custom://httpbin.org&param.recorder.coll=META&put_record=resource' req_url = '/live/resource/postreq?url=custom://httpbin.org&param.recorder.coll=META&put_record=resource'
@ -568,7 +577,9 @@ class TestRecorder(LiveServerTests, FakeRedisTests, TempDirTests, BaseTestClass)
warcs = r.hgetall('META:warc') warcs = r.hgetall('META:warc')
assert len(warcs) == 1 assert len(warcs) == 1
with open(warcs[b'meta/meta.warc.gz'], 'rb') as fh: warc_key = os.path.join('meta', 'meta.warc.gz').encode('utf-8')
with open(warcs[warc_key], 'rb') as fh:
decomp = DecompressingBufferedReader(fh) decomp = DecompressingBufferedReader(fh)
record = ArcWarcRecordLoader().parse_record_stream(decomp, ensure_http_headers=True) record = ArcWarcRecordLoader().parse_record_stream(decomp, ensure_http_headers=True)
@ -592,6 +603,9 @@ class TestRecorder(LiveServerTests, FakeRedisTests, TempDirTests, BaseTestClass)
assert status_headers.get_header('Content-Type') == 'text/plain' assert status_headers.get_header('Content-Type') == 'text/plain'
assert status_headers.get_header('Content-Length') == str(len(buff)) assert status_headers.get_header('Content-Length') == str(len(buff))
writer.close()
assert len(writer.fh_cache) == 0
def test_record_video_metadata(self): def test_record_video_metadata(self):
warc_path = to_path(self.root_dir + '/warcs/{user}/{coll}/') warc_path = to_path(self.root_dir + '/warcs/{user}/{coll}/')

View File

@ -119,10 +119,13 @@ class BaseInsertView(object):
template_path = env.get('pywb.templates_dir') template_path = env.get('pywb.templates_dir')
if template_path: if template_path:
template_path = os.path.join(template_path, self.insert_file) # jinja paths are not os paths, always use '/' as separator
# https://github.com/pallets/jinja/issues/411
template_path = template_path + '/' + self.insert_file
try: try:
template = self.jenv.jinja_env.get_template(template_path) template = self.jenv.jinja_env.get_template(template_path)
except TemplateNotFound: except TemplateNotFound as te:
pass pass
if not template: if not template:

View File

@ -274,7 +274,7 @@ class BaseDirectoryIndexSource(BaseAggregator):
if rel_path == '.': if rel_path == '.':
full_name = name full_name = name
else: else:
full_name = rel_path + '/' + name full_name = os.path.join(rel_path, name)
yield full_name, FileIndexSource(filename) yield full_name, FileIndexSource(filename)
@ -294,6 +294,8 @@ class BaseDirectoryIndexSource(BaseAggregator):
@classmethod @classmethod
def init_from_string(cls, value): def init_from_string(cls, value):
if os.path.sep != '/':
value = value.replace('/', os.path.sep)
if '://' not in value and os.path.isdir(value): if '://' not in value and os.path.isdir(value):
return cls(value) return cls(value)

View File

@ -57,7 +57,7 @@ class TestDirAgg(TempDirTests, BaseTestClass):
def test_agg_collA_found(self): def test_agg_collA_found(self):
res, errs = self.dir_loader({'url': 'example.com/', 'param.coll': 'A'}) res, errs = self.dir_loader({'url': 'example.com/', 'param.coll': 'A'})
exp = [{'source': 'colls/A/indexes/example2.cdxj', 'timestamp': '20160225042329', 'filename': 'example2.warc.gz'}] exp = [{'source': to_path('colls/A/indexes/example2.cdxj'), 'timestamp': '20160225042329', 'filename': 'example2.warc.gz'}]
assert(to_json_list(res) == exp) assert(to_json_list(res) == exp)
assert(errs == {}) assert(errs == {})
@ -73,7 +73,7 @@ class TestDirAgg(TempDirTests, BaseTestClass):
def test_agg_collB_found(self): def test_agg_collB_found(self):
res, errs = self.dir_loader({'url': 'iana.org/', 'param.coll': 'B'}) res, errs = self.dir_loader({'url': 'iana.org/', 'param.coll': 'B'})
exp = [{'source': 'colls/B/indexes/iana.cdxj', 'timestamp': '20140126200624', 'filename': 'iana.warc.gz'}] exp = [{'source': to_path('colls/B/indexes/iana.cdxj'), 'timestamp': '20140126200624', 'filename': 'iana.warc.gz'}]
assert(to_json_list(res) == exp) assert(to_json_list(res) == exp)
assert(errs == {}) assert(errs == {})
@ -83,7 +83,7 @@ class TestDirAgg(TempDirTests, BaseTestClass):
agg_source = SimpleAggregator({'dir': self.dir_loader}) agg_source = SimpleAggregator({'dir': self.dir_loader})
res, errs = agg_source({'url': 'iana.org/', 'param.coll': 'B'}) res, errs = agg_source({'url': 'iana.org/', 'param.coll': 'B'})
exp = [{'source': 'dir:colls/B/indexes/iana.cdxj', 'timestamp': '20140126200624', 'filename': 'iana.warc.gz'}] exp = [{'source': to_path('dir:colls/B/indexes/iana.cdxj'), 'timestamp': '20140126200624', 'filename': 'iana.warc.gz'}]
assert(to_json_list(res) == exp) assert(to_json_list(res) == exp)
assert(errs == {}) assert(errs == {})
@ -93,9 +93,9 @@ class TestDirAgg(TempDirTests, BaseTestClass):
res, errs = self.dir_loader({'url': 'iana.org/', 'param.coll': '*'}) res, errs = self.dir_loader({'url': 'iana.org/', 'param.coll': '*'})
exp = [ exp = [
{'source': 'colls/B/indexes/iana.cdxj', 'timestamp': '20140126200624', 'filename': 'iana.warc.gz'}, {'source': to_path('colls/B/indexes/iana.cdxj'), 'timestamp': '20140126200624', 'filename': 'iana.warc.gz'},
{'source': 'colls/C/indexes/dupes.cdxj', 'timestamp': '20140127171238', 'filename': 'dupes.warc.gz'}, {'source': to_path('colls/C/indexes/dupes.cdxj'), 'timestamp': '20140127171238', 'filename': 'dupes.warc.gz'},
{'source': 'colls/C/indexes/dupes.cdxj', 'timestamp': '20140127171238', 'filename': 'dupes.warc.gz'}, {'source': to_path('colls/C/indexes/dupes.cdxj'), 'timestamp': '20140127171238', 'filename': 'dupes.warc.gz'},
] ]
assert(to_json_list(res) == exp) assert(to_json_list(res) == exp)
@ -106,9 +106,9 @@ class TestDirAgg(TempDirTests, BaseTestClass):
res, errs = self.dir_loader({'url': 'example.com/', 'param.coll': '*'}) res, errs = self.dir_loader({'url': 'example.com/', 'param.coll': '*'})
exp = [ exp = [
{'source': 'colls/C/indexes/dupes.cdxj', 'timestamp': '20140127171200', 'filename': 'dupes.warc.gz'}, {'source': to_path('colls/C/indexes/dupes.cdxj'), 'timestamp': '20140127171200', 'filename': 'dupes.warc.gz'},
{'source': 'colls/C/indexes/dupes.cdxj', 'timestamp': '20140127171251', 'filename': 'dupes.warc.gz'}, {'source': to_path('colls/C/indexes/dupes.cdxj'), 'timestamp': '20140127171251', 'filename': 'dupes.warc.gz'},
{'source': 'colls/A/indexes/example2.cdxj', 'timestamp': '20160225042329', 'filename': 'example2.warc.gz'} {'source': to_path('colls/A/indexes/example2.cdxj'), 'timestamp': '20160225042329', 'filename': 'example2.warc.gz'}
] ]
assert(to_json_list(res) == exp) assert(to_json_list(res) == exp)
@ -126,9 +126,9 @@ class TestDirAgg(TempDirTests, BaseTestClass):
{'source': 'ia', 'timestamp': '20100514231857', 'load_url': 'http://web.archive.org/web/20100514231857id_/http://example.com/'}, {'source': 'ia', 'timestamp': '20100514231857', 'load_url': 'http://web.archive.org/web/20100514231857id_/http://example.com/'},
{'source': 'ia', 'timestamp': '20100519202418', 'load_url': 'http://web.archive.org/web/20100519202418id_/http://example.com/'}, {'source': 'ia', 'timestamp': '20100519202418', 'load_url': 'http://web.archive.org/web/20100519202418id_/http://example.com/'},
{'source': 'ia', 'timestamp': '20100501123414', 'load_url': 'http://web.archive.org/web/20100501123414id_/http://example.com/'}, {'source': 'ia', 'timestamp': '20100501123414', 'load_url': 'http://web.archive.org/web/20100501123414id_/http://example.com/'},
{'source': 'local:colls/C/indexes/dupes.cdxj', 'timestamp': '20140127171200', 'filename': 'dupes.warc.gz'}, {'source': to_path('local:colls/C/indexes/dupes.cdxj'), 'timestamp': '20140127171200', 'filename': 'dupes.warc.gz'},
{'source': 'local:colls/C/indexes/dupes.cdxj', 'timestamp': '20140127171251', 'filename': 'dupes.warc.gz'}, {'source': to_path('local:colls/C/indexes/dupes.cdxj'), 'timestamp': '20140127171251', 'filename': 'dupes.warc.gz'},
{'source': 'local:colls/A/indexes/example2.cdxj', 'timestamp': '20160225042329', 'filename': 'example2.warc.gz'} {'source': to_path('local:colls/A/indexes/example2.cdxj'), 'timestamp': '20160225042329', 'filename': 'example2.warc.gz'}
] ]
assert(to_json_list(res) == exp) assert(to_json_list(res) == exp)
@ -156,9 +156,9 @@ class TestDirAgg(TempDirTests, BaseTestClass):
def test_agg_dir_sources_1(self): def test_agg_dir_sources_1(self):
res = self.dir_loader.get_source_list({'url': 'example.com/', 'param.coll': '*'}) res = self.dir_loader.get_source_list({'url': 'example.com/', 'param.coll': '*'})
exp = {'sources': {'colls/A/indexes/example2.cdxj': 'file', exp = {'sources': {to_path('colls/A/indexes/example2.cdxj'): 'file',
'colls/B/indexes/iana.cdxj': 'file', to_path('colls/B/indexes/iana.cdxj'): 'file',
'colls/C/indexes/dupes.cdxj': 'file'} to_path('colls/C/indexes/dupes.cdxj'): 'file'}
} }
assert(res == exp) assert(res == exp)
@ -166,8 +166,8 @@ class TestDirAgg(TempDirTests, BaseTestClass):
def test_agg_dir_sources_2(self): def test_agg_dir_sources_2(self):
res = self.dir_loader.get_source_list({'url': 'example.com/', 'param.coll': '[A,C]'}) res = self.dir_loader.get_source_list({'url': 'example.com/', 'param.coll': '[A,C]'})
exp = {'sources': {'colls/A/indexes/example2.cdxj': 'file', exp = {'sources': {to_path('colls/A/indexes/example2.cdxj'): 'file',
'colls/C/indexes/dupes.cdxj': 'file'} to_path('colls/C/indexes/dupes.cdxj'): 'file'}
} }
assert(res == exp) assert(res == exp)
@ -193,9 +193,9 @@ class TestDirAgg(TempDirTests, BaseTestClass):
def test_cache_dir_sources_1(self): def test_cache_dir_sources_1(self):
exp = {'sources': {'colls/A/indexes/example2.cdxj': 'file', exp = {'sources': {to_path('colls/A/indexes/example2.cdxj'): 'file',
'colls/B/indexes/iana.cdxj': 'file', to_path('colls/B/indexes/iana.cdxj'): 'file',
'colls/C/indexes/dupes.cdxj': 'file'} to_path('colls/C/indexes/dupes.cdxj'): 'file'}
} }
res = self.cache_dir_loader.get_source_list({'url': 'example.com/', 'param.coll': '*'}) res = self.cache_dir_loader.get_source_list({'url': 'example.com/', 'param.coll': '*'})
@ -204,7 +204,10 @@ class TestDirAgg(TempDirTests, BaseTestClass):
res = self.cache_dir_loader.get_source_list({'url': 'example.com/', 'param.coll': '*'}) res = self.cache_dir_loader.get_source_list({'url': 'example.com/', 'param.coll': '*'})
assert(res == exp) assert(res == exp)
new_file = os.path.join(self.root_dir, 'colls/C/indexes/empty.cdxj') new_file = os.path.join(self.root_dir, to_path('colls/C/indexes/empty.cdxj'))
# ensure new file is created at least a second later
time.sleep(1.0)
with open(new_file, 'a') as fh: with open(new_file, 'a') as fh:
os.utime(new_file, None) os.utime(new_file, None)
@ -212,5 +215,5 @@ class TestDirAgg(TempDirTests, BaseTestClass):
res = self.cache_dir_loader.get_source_list({'url': 'example.com/', 'param.coll': '*'}) res = self.cache_dir_loader.get_source_list({'url': 'example.com/', 'param.coll': '*'})
# New File Included # New File Included
exp['sources']['colls/C/indexes/empty.cdxj'] = 'file' exp['sources'][to_path('colls/C/indexes/empty.cdxj')] = 'file'
assert(res == exp) assert(res == exp)

View File

@ -36,7 +36,7 @@ aggs_inv = {'simple': SimpleAggregator(sources, invert_sources=True),
agg_tm = {'gevent': GeventTimeoutAggregator(sources, timeout=0.0)} agg_tm = {'gevent': GeventTimeoutAggregator(sources, timeout=0.0)}
nf = {'notfound': FileIndexSource(to_path('testdata/not-found-x'))} nf = {'notfound': FileIndexSource('testdata/not-found-x')}
agg_nf = {'simple': SimpleAggregator(nf), agg_nf = {'simple': SimpleAggregator(nf),
'gevent': GeventTimeoutAggregator(nf, timeout=5.0), 'gevent': GeventTimeoutAggregator(nf, timeout=5.0),
} }

View File

@ -47,7 +47,7 @@ def test_timeout_long_all_pass():
def test_timeout_slower_skipped_1(): def test_timeout_slower_skipped_1():
agg = GeventTimeoutAggregator(sources, timeout=0.49) agg = GeventTimeoutAggregator(sources, timeout=0.40)
res, errs = agg(dict(url='http://example.com/')) res, errs = agg(dict(url='http://example.com/'))
@ -58,8 +58,8 @@ def test_timeout_slower_skipped_1():
assert(errs == {'slower': 'timeout'}) assert(errs == {'slower': 'timeout'})
def test_timeout_slower_skipped_2(): def test_timeout_slower_all_skipped():
agg = GeventTimeoutAggregator(sources, timeout=0.19) agg = GeventTimeoutAggregator(sources, timeout=0.10)
res, errs = agg(dict(url='http://example.com/')) res, errs = agg(dict(url='http://example.com/'))
@ -74,8 +74,8 @@ def test_timeout_skipping():
assert(sources['slow'].calls == 3) assert(sources['slow'].calls == 3)
assert(sources['slower'].calls == 3) assert(sources['slower'].calls == 3)
agg = GeventTimeoutAggregator(sources, timeout=0.49, agg = GeventTimeoutAggregator(sources, timeout=0.40,
t_count=2, t_duration=2.0) t_count=2, t_duration=1.0)
exp = [{'source': 'slow', 'timestamp': '20160225042329'}] exp = [{'source': 'slow', 'timestamp': '20160225042329'}]
@ -107,7 +107,7 @@ def test_timeout_skipping():
assert(errs == {}) assert(errs == {})
time.sleep(2.01) time.sleep(1.5)
res, errs = agg(dict(url='http://example.com/')) res, errs = agg(dict(url='http://example.com/'))
assert(to_json_list(res, fields=['source', 'timestamp']) == exp) assert(to_json_list(res, fields=['source', 'timestamp']) == exp)

View File

@ -351,7 +351,15 @@ class ZipNumIndexSource(BaseIndexSource):
for line in BytesIO(buff): for line in BytesIO(buff):
yield line yield line
iter_ = itertools.chain(*map(decompress_block, ranges)) def iter_blocks(reader):
try:
for r in ranges:
yield decompress_block(r)
finally:
reader.close()
# iterate over all blocks
iter_ = itertools.chain.from_iterable(iter_blocks(reader))
# start bound # start bound
iter_ = linearsearch(iter_, query.key) iter_ = linearsearch(iter_, query.key)

View File

@ -51,8 +51,8 @@ class TestManagedColls(TempDirTests, BaseTestClass):
@classmethod @classmethod
def teardown_class(cls): def teardown_class(cls):
super(TestManagedColls, cls).teardown_class()
os.chdir(cls.orig_cwd) os.chdir(cls.orig_cwd)
super(TestManagedColls, cls).teardown_class()
def _check_dirs(self, base, dirlist): def _check_dirs(self, base, dirlist):
for dir_ in dirlist: for dir_ in dirlist: