1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-22 22:32:19 +01:00
pywb/test/test_dir_agg.py
Ilya Kreymer c88c5f4cca add new package setup!
add tests and testdata, splitting mem and dir agg tests
2016-02-26 18:25:10 -08:00

104 lines
2.4 KiB
Python

import tempfile
import os
import shutil
import json
from rezag.aggindexsource import DirectoryIndexAggregator, SimpleAggregator
#=============================================================================
root_dir = None
orig_cwd = None
dir_agg = None
def setup_module():
global root_dir
root_dir = tempfile.mkdtemp()
coll_A = to_path(root_dir + '/colls/A/indexes')
coll_B = to_path(root_dir + '/colls/B/indexes')
os.makedirs(coll_A)
os.makedirs(coll_B)
dir_prefix = to_path(root_dir)
dir_path ='colls/{coll}/indexes'
shutil.copy(to_path('testdata/example.cdxj'), coll_A)
shutil.copy(to_path('testdata/iana.cdxj'), coll_B)
global dir_agg
dir_agg = DirectoryIndexAggregator(dir_prefix, dir_path)
global orig_cwd
orig_cwd = os.getcwd()
os.chdir(root_dir)
# use actually set dir
root_dir = os.getcwd()
def teardown_module():
global orig_cwd
os.chdir(orig_cwd)
global root_dir
shutil.rmtree(root_dir)
def to_path(path):
if os.path.sep != '/':
path = path.replace('/', os.path.sep)
return path
def to_json_list(cdxlist, fields=['timestamp', 'load_url', 'filename', 'source']):
return list([json.loads(cdx.to_json(fields)) for cdx in cdxlist])
def test_agg_no_coll_set():
res = dir_agg(dict(url='example.com/'))
assert(to_json_list(res) == [])
def test_agg_collA_found():
res = dir_agg({'url': 'example.com/', 'param.coll': 'A'})
exp = [{'source': 'colls/A/indexes', 'timestamp': '20160225042329', 'filename': 'example.warc.gz'}]
assert(to_json_list(res) == exp)
def test_agg_collB():
res = dir_agg({'url': 'example.com/', 'param.coll': 'B'})
exp = []
assert(to_json_list(res) == exp)
def test_agg_collB_found():
res = dir_agg({'url': 'iana.org/', 'param.coll': 'B'})
exp = [{'source': 'colls/B/indexes', 'timestamp': '20140126200624', 'filename': 'iana.warc.gz'}]
assert(to_json_list(res) == exp)
def test_agg_all_found():
res = dir_agg({'url': 'iana.org/', 'param.coll': '*'})
exp = [{'source': 'colls/B/indexes', 'timestamp': '20140126200624', 'filename': 'iana.warc.gz'}]
assert(to_json_list(res) == exp)
def test_extra_agg_all():
agg_dir_agg = SimpleAggregator({'dir': dir_agg})
res = agg_dir_agg({'url': 'iana.org/', 'param.coll': '*'})
exp = [{'source': 'dir.colls/B/indexes', 'timestamp': '20140126200624', 'filename': 'iana.warc.gz'}]
assert(to_json_list(res) == exp)