mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
metadata: add support for user-defined per-collection metadata! #78
metadata stored in wbrequest.user_metadata and available to all templates collections manager: refactor to use subparsers, add list collections and set metadata commands update tests for new commands index template: use user metadata title for collections listing search template: display all metadata and title, if available
This commit is contained in:
parent
b417b47835
commit
30454abb6b
@ -66,7 +66,8 @@ class ArchivalRouter(object):
|
||||
wburl_class=route.handler.get_wburl_type(),
|
||||
urlrewriter_class=self.urlrewriter_class,
|
||||
cookie_scope=route.cookie_scope,
|
||||
rewrite_opts=route.rewrite_opts)
|
||||
rewrite_opts=route.rewrite_opts,
|
||||
user_metadata=route.user_metadata)
|
||||
|
||||
# Allow for applying of additional filters
|
||||
route.apply_filters(wbrequest, matcher)
|
||||
@ -100,12 +101,15 @@ class Route(object):
|
||||
self.regex = re.compile(regex + lookahead)
|
||||
else:
|
||||
self.regex = re.compile('')
|
||||
|
||||
self.handler = handler
|
||||
self.request_class = request_class
|
||||
|
||||
# collection id from regex group (default 0)
|
||||
self.coll_group = coll_group
|
||||
self.cookie_scope = config.get('cookie_scope')
|
||||
self.rewrite_opts = config.get('rewrite_opts', {})
|
||||
self.user_metadata = config.get('metadata', {})
|
||||
self._custom_init(config)
|
||||
|
||||
def is_handling(self, request_uri):
|
||||
|
@ -40,7 +40,9 @@ class WbRequest(object):
|
||||
urlrewriter_class=None,
|
||||
is_proxy=False,
|
||||
cookie_scope=None,
|
||||
rewrite_opts={}):
|
||||
rewrite_opts={},
|
||||
user_metadata={},
|
||||
):
|
||||
|
||||
self.env = env
|
||||
|
||||
@ -96,6 +98,7 @@ class WbRequest(object):
|
||||
|
||||
self.query_filter = []
|
||||
self.custom_params = {}
|
||||
self.user_metadata = user_metadata
|
||||
|
||||
# PERF
|
||||
env['X_PERF'] = {}
|
||||
|
@ -9,6 +9,7 @@ from pywb.warc.cdxindexer import main as cdxindexer_main
|
||||
|
||||
from argparse import ArgumentParser, RawTextHelpFormatter
|
||||
import heapq
|
||||
import yaml
|
||||
|
||||
|
||||
#=============================================================================
|
||||
@ -19,7 +20,7 @@ simplify the creation and management of web archive collections
|
||||
It may be used via cmdline to setup and maintain the
|
||||
directory structure expected by pywb
|
||||
"""
|
||||
def __init__(self, coll_name, root_dir='collections'):
|
||||
def __init__(self, coll_name, root_dir='collections', must_exist=True):
|
||||
self.root_dir = root_dir
|
||||
self.default_config = load_yaml_config('pywb/default_config.yaml')
|
||||
self.coll_name = coll_name
|
||||
@ -30,6 +31,14 @@ directory structure expected by pywb
|
||||
self.cdx_dir = self._get_dir('index_paths')
|
||||
self.static_dir = self._get_dir('static_path')
|
||||
self.templates_dir = self._get_dir('templates_dir')
|
||||
if must_exist:
|
||||
self._assert_coll_exists()
|
||||
|
||||
def list_colls(self):
|
||||
print('Collections:')
|
||||
for d in os.listdir(self.root_dir):
|
||||
if os.path.isdir(os.path.join(self.root_dir, d)):
|
||||
print('- ' + d)
|
||||
|
||||
def _get_dir(self, name):
|
||||
return os.path.join(self.coll_dir,
|
||||
@ -50,18 +59,15 @@ directory structure expected by pywb
|
||||
self._create_dir(self.static_dir)
|
||||
self._create_dir(self.templates_dir)
|
||||
|
||||
def _assert_coll_exists(self):
|
||||
if not os.path.isdir(self.coll_dir):
|
||||
raise IOError('Collection {0} does not exist'.
|
||||
format(self.coll_name))
|
||||
|
||||
def add_warcs(self, warcs):
|
||||
if not os.path.isdir(self.warc_dir):
|
||||
if not os.path.isdir(self.coll_dir):
|
||||
raise IOError('Collection {0} does not exist'.
|
||||
format(self.coll_name))
|
||||
else:
|
||||
raise IOError('Directory {0} does not exist'.
|
||||
format(self.warc_dir))
|
||||
|
||||
if not warcs:
|
||||
logging.info('No WARCs specified')
|
||||
return
|
||||
raise IOError('Directory {0} does not exist'.
|
||||
format(self.warc_dir))
|
||||
|
||||
full_paths = []
|
||||
for filename in warcs:
|
||||
@ -99,9 +105,6 @@ directory structure expected by pywb
|
||||
self._index_merge_warcs(filtered_warcs)
|
||||
|
||||
def _index_merge_warcs(self, new_warcs):
|
||||
if not new_warcs:
|
||||
return
|
||||
|
||||
cdx_file = os.path.join(self.cdx_dir, 'index.cdx')
|
||||
|
||||
# no existing file, just reindex all
|
||||
@ -128,50 +131,109 @@ directory structure expected by pywb
|
||||
os.rename(merged_file, cdx_file)
|
||||
os.remove(temp_file)
|
||||
|
||||
def set_metadata(self, namevalue_pairs):
|
||||
metadata_yaml = os.path.join(self.coll_dir, 'metadata.yaml')
|
||||
metadata = None
|
||||
if os.path.isfile(metadata_yaml):
|
||||
with open(metadata_yaml) as fh:
|
||||
metadata = yaml.safe_load(fh)
|
||||
|
||||
if not metadata:
|
||||
metadata = {}
|
||||
|
||||
msg = 'Metadata params must be in the form "name=value"'
|
||||
for pair in namevalue_pairs:
|
||||
v = pair.split('=', 1)
|
||||
if len(v) != 2:
|
||||
raise ValueError(msg)
|
||||
|
||||
metadata[v[0]] = v[1]
|
||||
|
||||
with open(metadata_yaml, 'w+b') as fh:
|
||||
fh.write(yaml.dump(metadata, default_flow_style=False))
|
||||
|
||||
|
||||
#=============================================================================
|
||||
def main(args=None):
|
||||
description = """
|
||||
Create manage file based web archive collections
|
||||
"""
|
||||
|
||||
epilog = """
|
||||
Some examples:
|
||||
|
||||
* Create new collection 'my_coll'
|
||||
{0} create my_coll
|
||||
|
||||
* Add warc mywarc1.warc.gz to my_coll (The warc will be copied to the collecton directory)
|
||||
{0} add my_coll mywarc1.warc.gz
|
||||
|
||||
""".format(os.path.basename(sys.argv[0]))
|
||||
#format(os.path.basename(sys.argv[0]))
|
||||
|
||||
logging.basicConfig(format='%(asctime)s: [%(levelname)s]: %(message)s',
|
||||
level=logging.DEBUG)
|
||||
|
||||
parser = ArgumentParser(description=description,
|
||||
epilog=epilog,
|
||||
#epilog=epilog,
|
||||
formatter_class=RawTextHelpFormatter)
|
||||
|
||||
group = parser.add_mutually_exclusive_group()
|
||||
group.add_argument('--init', action='store_true')
|
||||
group.add_argument('--addwarc', action='store_true')
|
||||
group.add_argument('--reindex', action='store_true')
|
||||
group.add_argument('--index-warcs', action='store_true')
|
||||
subparsers = parser.add_subparsers(dest='type')
|
||||
|
||||
parser.add_argument('name')
|
||||
parser.add_argument('files', nargs='*')
|
||||
# Init Coll
|
||||
def do_init(r):
|
||||
m = CollectionsManager(r.coll_name, must_exist=False)
|
||||
m.add_collection()
|
||||
|
||||
init_help = 'Init new collection, create all collection directories'
|
||||
init = subparsers.add_parser('init', help=init_help)
|
||||
init.add_argument('coll_name')
|
||||
init.set_defaults(func=do_init)
|
||||
|
||||
# List Colls
|
||||
def do_list(r):
|
||||
m = CollectionsManager('', must_exist=False)
|
||||
m.list_colls()
|
||||
|
||||
list_help = 'List Collections'
|
||||
listcmd = subparsers.add_parser('list', help=list_help)
|
||||
listcmd.set_defaults(func=do_list)
|
||||
|
||||
# Add Warcs
|
||||
def do_add(r):
|
||||
m = CollectionsManager(r.coll_name)
|
||||
m.add_warcs(r.files)
|
||||
|
||||
addwarc_help = 'Copy ARCS/WARCS to collection directory and reindex'
|
||||
addwarc = subparsers.add_parser('add', help=addwarc_help)
|
||||
addwarc.add_argument('coll_name')
|
||||
addwarc.add_argument('files', nargs='+')
|
||||
addwarc.set_defaults(func=do_add)
|
||||
|
||||
|
||||
# Reindex All
|
||||
def do_reindex(r):
|
||||
m = CollectionsManager(r.coll_name)
|
||||
m.reindex()
|
||||
|
||||
reindex_help = 'Re-Index entire collection'
|
||||
reindex = subparsers.add_parser('reindex', help=reindex_help)
|
||||
reindex.add_argument('coll_name')
|
||||
reindex.set_defaults(func=do_reindex)
|
||||
|
||||
# Index warcs
|
||||
def do_index(r):
|
||||
m = CollectionsManager(r.coll_name)
|
||||
m.index_merge(r.files)
|
||||
|
||||
indexwarcs_help = 'Index specified ARC/WARC files in the collection'
|
||||
indexwarcs = subparsers.add_parser('index', help=indexwarcs_help)
|
||||
indexwarcs.add_argument('coll_name')
|
||||
indexwarcs.add_argument('files', nargs='+')
|
||||
indexwarcs.set_defaults(func=do_index)
|
||||
|
||||
# Set metadata
|
||||
def do_metadata(r):
|
||||
m = CollectionsManager(r.coll_name)
|
||||
m.set_metadata(r.set)
|
||||
|
||||
metadata_help = 'Set Metadata'
|
||||
metadata = subparsers.add_parser('metadata', help=metadata_help)
|
||||
metadata.add_argument('coll_name')
|
||||
metadata.add_argument('--set', nargs='+')
|
||||
metadata.set_defaults(func=do_metadata)
|
||||
|
||||
r = parser.parse_args(args=args)
|
||||
|
||||
m = CollectionsManager(r.name)
|
||||
if r.init:
|
||||
m.add_collection()
|
||||
elif r.addwarc:
|
||||
m.add_warcs(r.files)
|
||||
elif r.index_warcs:
|
||||
m.index_merge(r.files)
|
||||
elif r.reindex:
|
||||
m.reindex()
|
||||
r.func(r)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
@ -1,22 +1,17 @@
|
||||
<h2>pywb Sample Home Page</h2>
|
||||
<h2>pywb Wayback Machine</h2>
|
||||
|
||||
The following archive collections are available:
|
||||
This archive contains the following collections:
|
||||
|
||||
<ul>
|
||||
{% for route in routes %}
|
||||
{% if route | is_wb_handler %}
|
||||
<li><a href="{{ '/' + route.path }}">{{ '/' + route.path }}</a>: {{ route | string }}</li>
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
</ul>
|
||||
|
||||
Other endpoints in this deployment:
|
||||
|
||||
<ul>
|
||||
{% for route in routes %}
|
||||
{% if not route | is_wb_handler %}
|
||||
<li><b>{{ '/' + route.path }}</b> - {{ route | string }}</li>
|
||||
{% endif %}
|
||||
<li>
|
||||
<a href="{{ '/' + route.path }}">{{ '/' + route.path }}</a>
|
||||
{% if route.user_metadata.title is defined %}
|
||||
({{ route.user_metadata.title }})
|
||||
{% endif %}
|
||||
</li>
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
</ul>
|
||||
|
||||
|
@ -1,6 +1,17 @@
|
||||
<h2>pywb Search Page</h2>
|
||||
Search Archived Content:
|
||||
<h2>{{ wbrequest.user_metadata.title if wbrequest.user_metadata.title else wbrequest.coll }} Search Page</h2>
|
||||
|
||||
<div>
|
||||
<table style="text-align: left">
|
||||
{% for key, val in wbrequest.user_metadata.iteritems() %}
|
||||
<tr><th>{{ key }}:</th><td>{{ val }}</td>
|
||||
{% endfor %}
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<p>
|
||||
Search this collection by url:
|
||||
<form onsubmit="url = document.getElementById('search').value; if (url != '') { document.location.href = '{{ wbrequest.wb_prefix }}' + '*/' + url; } return false;">
|
||||
<input id="search" name="search" placeholder="Enter url to search"/>
|
||||
<button type="submit">Search</button>
|
||||
</form>
|
||||
</p>
|
||||
|
@ -143,13 +143,13 @@ class DirectoryCollsLoader(object):
|
||||
if not os.path.isdir(full):
|
||||
continue
|
||||
|
||||
coll = self.load_dir(full, name)
|
||||
if coll:
|
||||
colls[name] = coll
|
||||
coll_config = self.load_coll_dir(full, name)
|
||||
if coll_config:
|
||||
colls[name] = coll_config
|
||||
|
||||
return colls
|
||||
|
||||
def _add_if_exists(self, coll, root_dir, dir_key, required=False):
|
||||
def _add_dir_if_exists(self, coll, root_dir, dir_key, required=False):
|
||||
if dir_key in coll:
|
||||
# already set
|
||||
return False
|
||||
@ -168,18 +168,26 @@ class DirectoryCollsLoader(object):
|
||||
else:
|
||||
return False
|
||||
|
||||
def load_dir(self, root_dir, name):
|
||||
config_file = os.path.join(root_dir, 'config.yaml')
|
||||
if os.path.isfile(config_file):
|
||||
coll = load_yaml_config(config_file)
|
||||
def load_yaml_file(self, root_dir, filename):
|
||||
filename = os.path.join(root_dir, filename)
|
||||
if os.path.isfile(filename):
|
||||
return load_yaml_config(filename)
|
||||
else:
|
||||
coll = {}
|
||||
return {}
|
||||
|
||||
self._add_if_exists(coll, root_dir, 'index_paths', True)
|
||||
self._add_if_exists(coll, root_dir, 'archive_paths', True)
|
||||
def load_coll_dir(self, root_dir, name):
|
||||
# Load config.yaml
|
||||
coll_config = self.load_yaml_file(root_dir, 'config.yaml')
|
||||
|
||||
if self._add_if_exists(coll, root_dir, 'static_path', False):
|
||||
self.static_routes['static/' + name] = coll['static_path']
|
||||
# Load metadata.yaml
|
||||
metadata = self.load_yaml_file(root_dir, 'metadata.yaml')
|
||||
coll_config['metadata'] = metadata
|
||||
|
||||
self._add_dir_if_exists(coll_config, root_dir, 'index_paths', True)
|
||||
self._add_dir_if_exists(coll_config, root_dir, 'archive_paths', True)
|
||||
|
||||
if self._add_dir_if_exists(coll_config, root_dir, 'static_path', False):
|
||||
self.static_routes['static/' + name] = coll_config['static_path']
|
||||
|
||||
# Add templates
|
||||
templates_dir = self.config.get('paths').get('templates_dir')
|
||||
@ -187,15 +195,15 @@ class DirectoryCollsLoader(object):
|
||||
template_dir = os.path.join(root_dir, templates_dir)
|
||||
if template_dir:
|
||||
for tname, tfile in self.config.get('paths')['template_files'].iteritems():
|
||||
if tname in coll:
|
||||
if tname in coll_config:
|
||||
# Already set
|
||||
continue
|
||||
|
||||
full = os.path.join(template_dir, tfile)
|
||||
if os.path.isfile(full):
|
||||
coll[tname] = full
|
||||
coll_config[tname] = full
|
||||
|
||||
return coll
|
||||
return coll_config
|
||||
|
||||
|
||||
#=================================================================
|
||||
|
@ -5,6 +5,7 @@ from pywb.framework.memento import make_timemap, LINK_FORMAT
|
||||
import urlparse
|
||||
import urllib
|
||||
import logging
|
||||
import json
|
||||
|
||||
from os import path
|
||||
from itertools import imap
|
||||
@ -59,6 +60,11 @@ def is_wb_handler(obj):
|
||||
return obj.handler.__class__.__name__ == "WBHandler"
|
||||
|
||||
|
||||
@template_filter()
|
||||
def jsonify(obj):
|
||||
return json.dumps(obj)
|
||||
|
||||
|
||||
#=================================================================
|
||||
class J2TemplateView(object):
|
||||
env_globals = {'static_path': 'static/default',
|
||||
|
3
setup.py
3
setup.py
@ -42,7 +42,7 @@ setup(
|
||||
long_description=long_description,
|
||||
license='GPL',
|
||||
packages=find_packages(),
|
||||
#include_package_data=True,
|
||||
zip_safe=True,
|
||||
provides=[
|
||||
'pywb',
|
||||
'pywb.utils',
|
||||
@ -92,7 +92,6 @@ setup(
|
||||
proxy-cert-auth = pywb.framework.certauth:main
|
||||
wb-manager = pywb.manager.manager:main
|
||||
""",
|
||||
zip_safe=True,
|
||||
classifiers=[
|
||||
'Development Status :: 4 - Beta',
|
||||
'Environment :: Web Environment',
|
||||
|
@ -1,9 +1,12 @@
|
||||
import os
|
||||
import tempfile
|
||||
import shutil
|
||||
import sys
|
||||
|
||||
import webtest
|
||||
|
||||
from io import BytesIO
|
||||
|
||||
from pywb.webapp.pywb_init import create_wb_router
|
||||
from pywb.manager.manager import main
|
||||
|
||||
@ -53,7 +56,7 @@ class TestManagedColls(object):
|
||||
def test_create_first_coll(self):
|
||||
""" Test first collection creation, with all required dirs
|
||||
"""
|
||||
main(['--init', 'test'])
|
||||
main(['init', 'test'])
|
||||
|
||||
colls = os.path.join(self.root_dir, 'collections')
|
||||
assert os.path.isdir(colls)
|
||||
@ -68,7 +71,7 @@ class TestManagedColls(object):
|
||||
"""
|
||||
warc1 = os.path.join(get_test_dir(), 'warcs', 'example.warc.gz')
|
||||
|
||||
main(['--addwarc', 'test', warc1])
|
||||
main(['add', 'test', warc1])
|
||||
|
||||
self._create_app()
|
||||
resp = self.testapp.get('/test/20140103030321/http://example.com?example=1')
|
||||
@ -79,9 +82,9 @@ class TestManagedColls(object):
|
||||
"""
|
||||
warc1 = os.path.join(get_test_dir(), 'warcs', 'example.warc.gz')
|
||||
|
||||
main(['--init', 'foo'])
|
||||
main(['init', 'foo'])
|
||||
|
||||
main(['--addwarc', 'foo', warc1])
|
||||
main(['add', 'foo', warc1])
|
||||
|
||||
self._create_app()
|
||||
resp = self.testapp.get('/foo/20140103030321/http://example.com?example=1')
|
||||
@ -93,17 +96,14 @@ class TestManagedColls(object):
|
||||
warc1 = os.path.join(get_test_dir(), 'warcs', 'iana.warc.gz')
|
||||
warc2 = os.path.join(get_test_dir(), 'warcs', 'example-extra.warc')
|
||||
|
||||
main(['--addwarc', 'test', warc1, warc2])
|
||||
main(['add', 'test', warc1, warc2])
|
||||
|
||||
# Spurrious file in collections
|
||||
with open(os.path.join(self.root_dir, 'collections', 'blah'), 'w+b') as fh:
|
||||
fh.write('foo\n')
|
||||
|
||||
with raises(IOError):
|
||||
main(['--addwarc', 'test', 'non-existent-file.warc.gz'])
|
||||
|
||||
# check adding no warc -- no op
|
||||
main(['--addwarc', 'test'])
|
||||
main(['add', 'test', 'non-existent-file.warc.gz'])
|
||||
|
||||
# check new cdx
|
||||
self._create_app()
|
||||
@ -116,7 +116,7 @@ class TestManagedColls(object):
|
||||
Ensure CDX is relative to root archive dir, test replay
|
||||
"""
|
||||
|
||||
main(['--init', 'nested'])
|
||||
main(['init', 'nested'])
|
||||
|
||||
nested_root = os.path.join(self.root_dir, 'collections', 'nested', 'warcs')
|
||||
nested_a = os.path.join(nested_root, 'A')
|
||||
@ -131,7 +131,7 @@ class TestManagedColls(object):
|
||||
shutil.copy2(warc1, nested_a)
|
||||
shutil.copy2(warc2, nested_b)
|
||||
|
||||
main(['--index-warcs',
|
||||
main(['index',
|
||||
'nested',
|
||||
os.path.join(nested_a, 'iana.warc.gz'),
|
||||
os.path.join(nested_b, 'example.warc.gz')
|
||||
@ -162,7 +162,7 @@ class TestManagedColls(object):
|
||||
|
||||
shutil.copy(orig, bak)
|
||||
|
||||
main(['--reindex', 'test'])
|
||||
main(['reindex', 'test'])
|
||||
|
||||
with open(orig) as orig_fh:
|
||||
merged_cdx = orig_fh.read()
|
||||
@ -187,6 +187,39 @@ class TestManagedColls(object):
|
||||
assert resp.content_type == 'application/javascript'
|
||||
assert '/* Some JS File */' in resp.body
|
||||
|
||||
def test_add_title_metadata_index_page(self):
|
||||
""" Test adding title metadata to a collection, test
|
||||
retrieval on default index page
|
||||
"""
|
||||
main(['metadata', 'foo', '--set', 'title=Collection Title'])
|
||||
|
||||
self._create_app()
|
||||
resp = self.testapp.get('/')
|
||||
assert resp.status_int == 200
|
||||
assert resp.content_type == 'text/html'
|
||||
assert '(Collection Title)' in resp.body
|
||||
|
||||
def test_other_metadata_search_page(self):
|
||||
main(['metadata', 'foo', '--set',
|
||||
'desc=Some Description Text',
|
||||
'other=custom value'])
|
||||
|
||||
with raises(ValueError):
|
||||
main(['metadata', 'foo', '--set', 'name_only'])
|
||||
|
||||
self._create_app()
|
||||
resp = self.testapp.get('/foo/')
|
||||
assert resp.status_int == 200
|
||||
assert resp.content_type == 'text/html'
|
||||
|
||||
assert 'Collection Title' in resp.body
|
||||
|
||||
assert 'desc' in resp.body
|
||||
assert 'Some Description Text' in resp.body
|
||||
|
||||
assert 'other' in resp.body
|
||||
assert 'custom value' in resp.body
|
||||
|
||||
def test_custom_template_search(self):
|
||||
""" Test manually added custom search template search.html
|
||||
"""
|
||||
@ -219,7 +252,6 @@ class TestManagedColls(object):
|
||||
assert resp.content_type == 'text/html'
|
||||
assert 'config.yaml overriden search page' in resp.body
|
||||
|
||||
|
||||
def test_no_templates(self):
|
||||
""" Test removing templates dir, using default template again
|
||||
"""
|
||||
@ -232,28 +264,45 @@ class TestManagedColls(object):
|
||||
assert resp.content_type == 'text/html'
|
||||
assert 'pywb custom search page' not in resp.body
|
||||
|
||||
def test_list_colls(self):
|
||||
""" Test collection listing, printed to stdout
|
||||
"""
|
||||
orig_stdout = sys.stdout
|
||||
buff = BytesIO()
|
||||
sys.stdout = buff
|
||||
main(['list'])
|
||||
sys.stdout = orig_stdout
|
||||
|
||||
output = buff.getvalue().splitlines()
|
||||
assert len(output) == 4
|
||||
assert 'Collections' in output[0]
|
||||
assert 'foo' in output[1]
|
||||
assert 'nested' in output[2]
|
||||
assert 'test' in output[3]
|
||||
|
||||
def test_err_no_such_coll(self):
|
||||
""" Test error adding warc to non-existant collection
|
||||
"""
|
||||
warc1 = os.path.join(get_test_dir(), 'warcs', 'example.warc.gz')
|
||||
|
||||
with raises(IOError):
|
||||
main(['--addwarc', 'bar', warc1])
|
||||
main(['add', 'bar', warc1])
|
||||
|
||||
def test_err_wrong_warcs(self):
|
||||
warc1 = os.path.join(get_test_dir(), 'warcs', 'example.warc.gz')
|
||||
invalid_warc = os.path.join(self.root_dir, 'collections', 'test', 'warcs', 'invalid.warc.gz')
|
||||
|
||||
# Empty
|
||||
main(['--index-warcs', 'test'])
|
||||
# Empty warc list, argparse calls exit
|
||||
with raises(SystemExit):
|
||||
main(['index', 'test'])
|
||||
|
||||
# Wrong paths not in collection
|
||||
with raises(IOError):
|
||||
main(['--index-warcs', 'test', warc1])
|
||||
main(['index', 'test', warc1])
|
||||
|
||||
# Non-existent
|
||||
with raises(IOError):
|
||||
main(['--index-warcs', 'test', invalid_warc])
|
||||
main(['index', 'test', invalid_warc])
|
||||
|
||||
def test_err_missing_dirs(self):
|
||||
""" Test various errors with missing warcs dir,
|
||||
@ -266,7 +315,7 @@ class TestManagedColls(object):
|
||||
shutil.rmtree(warcs_path)
|
||||
|
||||
with raises(IOError):
|
||||
main(['--addwarc', 'foo', 'somewarc'])
|
||||
main(['add', 'foo', 'somewarc'])
|
||||
|
||||
# No CDX
|
||||
cdx_path = os.path.join(colls, 'foo', 'cdx')
|
||||
|
Loading…
x
Reference in New Issue
Block a user