mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-22 22:32:19 +01:00
104 lines
2.4 KiB
Python
104 lines
2.4 KiB
Python
import tempfile
|
|
import os
|
|
import shutil
|
|
import json
|
|
|
|
from rezag.aggindexsource import DirectoryIndexAggregator, SimpleAggregator
|
|
|
|
|
|
#=============================================================================
|
|
root_dir = None
|
|
orig_cwd = None
|
|
dir_agg = None
|
|
|
|
def setup_module():
|
|
global root_dir
|
|
root_dir = tempfile.mkdtemp()
|
|
|
|
coll_A = to_path(root_dir + '/colls/A/indexes')
|
|
coll_B = to_path(root_dir + '/colls/B/indexes')
|
|
|
|
os.makedirs(coll_A)
|
|
os.makedirs(coll_B)
|
|
|
|
dir_prefix = to_path(root_dir)
|
|
dir_path ='colls/{coll}/indexes'
|
|
|
|
shutil.copy(to_path('testdata/example.cdxj'), coll_A)
|
|
shutil.copy(to_path('testdata/iana.cdxj'), coll_B)
|
|
|
|
global dir_agg
|
|
dir_agg = DirectoryIndexAggregator(dir_prefix, dir_path)
|
|
|
|
global orig_cwd
|
|
orig_cwd = os.getcwd()
|
|
os.chdir(root_dir)
|
|
|
|
# use actually set dir
|
|
root_dir = os.getcwd()
|
|
|
|
def teardown_module():
|
|
global orig_cwd
|
|
os.chdir(orig_cwd)
|
|
|
|
global root_dir
|
|
shutil.rmtree(root_dir)
|
|
|
|
|
|
def to_path(path):
|
|
if os.path.sep != '/':
|
|
path = path.replace('/', os.path.sep)
|
|
|
|
return path
|
|
|
|
|
|
def to_json_list(cdxlist, fields=['timestamp', 'load_url', 'filename', 'source']):
|
|
return list([json.loads(cdx.to_json(fields)) for cdx in cdxlist])
|
|
|
|
|
|
def test_agg_no_coll_set():
|
|
res = dir_agg(dict(url='example.com/'))
|
|
assert(to_json_list(res) == [])
|
|
|
|
|
|
def test_agg_collA_found():
|
|
res = dir_agg({'url': 'example.com/', 'param.coll': 'A'})
|
|
|
|
exp = [{'source': 'colls/A/indexes', 'timestamp': '20160225042329', 'filename': 'example.warc.gz'}]
|
|
|
|
assert(to_json_list(res) == exp)
|
|
|
|
def test_agg_collB():
|
|
res = dir_agg({'url': 'example.com/', 'param.coll': 'B'})
|
|
|
|
exp = []
|
|
|
|
assert(to_json_list(res) == exp)
|
|
|
|
def test_agg_collB_found():
|
|
res = dir_agg({'url': 'iana.org/', 'param.coll': 'B'})
|
|
|
|
exp = [{'source': 'colls/B/indexes', 'timestamp': '20140126200624', 'filename': 'iana.warc.gz'}]
|
|
|
|
assert(to_json_list(res) == exp)
|
|
|
|
|
|
def test_agg_all_found():
|
|
res = dir_agg({'url': 'iana.org/', 'param.coll': '*'})
|
|
|
|
exp = [{'source': 'colls/B/indexes', 'timestamp': '20140126200624', 'filename': 'iana.warc.gz'}]
|
|
|
|
assert(to_json_list(res) == exp)
|
|
|
|
|
|
def test_extra_agg_all():
|
|
agg_dir_agg = SimpleAggregator({'dir': dir_agg})
|
|
res = agg_dir_agg({'url': 'iana.org/', 'param.coll': '*'})
|
|
|
|
exp = [{'source': 'dir.colls/B/indexes', 'timestamp': '20140126200624', 'filename': 'iana.warc.gz'}]
|
|
|
|
assert(to_json_list(res) == exp)
|
|
|
|
|
|
|