mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
metadata: add support for user-defined per-collection metadata! #78
metadata stored in wbrequest.user_metadata and available to all templates collections manager: refactor to use subparsers, add list collections and set metadata commands update tests for new commands index template: use user metadata title for collections listing search template: display all metadata and title, if available
This commit is contained in:
parent
b417b47835
commit
30454abb6b
@ -66,7 +66,8 @@ class ArchivalRouter(object):
|
|||||||
wburl_class=route.handler.get_wburl_type(),
|
wburl_class=route.handler.get_wburl_type(),
|
||||||
urlrewriter_class=self.urlrewriter_class,
|
urlrewriter_class=self.urlrewriter_class,
|
||||||
cookie_scope=route.cookie_scope,
|
cookie_scope=route.cookie_scope,
|
||||||
rewrite_opts=route.rewrite_opts)
|
rewrite_opts=route.rewrite_opts,
|
||||||
|
user_metadata=route.user_metadata)
|
||||||
|
|
||||||
# Allow for applying of additional filters
|
# Allow for applying of additional filters
|
||||||
route.apply_filters(wbrequest, matcher)
|
route.apply_filters(wbrequest, matcher)
|
||||||
@ -100,12 +101,15 @@ class Route(object):
|
|||||||
self.regex = re.compile(regex + lookahead)
|
self.regex = re.compile(regex + lookahead)
|
||||||
else:
|
else:
|
||||||
self.regex = re.compile('')
|
self.regex = re.compile('')
|
||||||
|
|
||||||
self.handler = handler
|
self.handler = handler
|
||||||
self.request_class = request_class
|
self.request_class = request_class
|
||||||
|
|
||||||
# collection id from regex group (default 0)
|
# collection id from regex group (default 0)
|
||||||
self.coll_group = coll_group
|
self.coll_group = coll_group
|
||||||
self.cookie_scope = config.get('cookie_scope')
|
self.cookie_scope = config.get('cookie_scope')
|
||||||
self.rewrite_opts = config.get('rewrite_opts', {})
|
self.rewrite_opts = config.get('rewrite_opts', {})
|
||||||
|
self.user_metadata = config.get('metadata', {})
|
||||||
self._custom_init(config)
|
self._custom_init(config)
|
||||||
|
|
||||||
def is_handling(self, request_uri):
|
def is_handling(self, request_uri):
|
||||||
|
@ -40,7 +40,9 @@ class WbRequest(object):
|
|||||||
urlrewriter_class=None,
|
urlrewriter_class=None,
|
||||||
is_proxy=False,
|
is_proxy=False,
|
||||||
cookie_scope=None,
|
cookie_scope=None,
|
||||||
rewrite_opts={}):
|
rewrite_opts={},
|
||||||
|
user_metadata={},
|
||||||
|
):
|
||||||
|
|
||||||
self.env = env
|
self.env = env
|
||||||
|
|
||||||
@ -96,6 +98,7 @@ class WbRequest(object):
|
|||||||
|
|
||||||
self.query_filter = []
|
self.query_filter = []
|
||||||
self.custom_params = {}
|
self.custom_params = {}
|
||||||
|
self.user_metadata = user_metadata
|
||||||
|
|
||||||
# PERF
|
# PERF
|
||||||
env['X_PERF'] = {}
|
env['X_PERF'] = {}
|
||||||
|
@ -9,6 +9,7 @@ from pywb.warc.cdxindexer import main as cdxindexer_main
|
|||||||
|
|
||||||
from argparse import ArgumentParser, RawTextHelpFormatter
|
from argparse import ArgumentParser, RawTextHelpFormatter
|
||||||
import heapq
|
import heapq
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
|
||||||
#=============================================================================
|
#=============================================================================
|
||||||
@ -19,7 +20,7 @@ simplify the creation and management of web archive collections
|
|||||||
It may be used via cmdline to setup and maintain the
|
It may be used via cmdline to setup and maintain the
|
||||||
directory structure expected by pywb
|
directory structure expected by pywb
|
||||||
"""
|
"""
|
||||||
def __init__(self, coll_name, root_dir='collections'):
|
def __init__(self, coll_name, root_dir='collections', must_exist=True):
|
||||||
self.root_dir = root_dir
|
self.root_dir = root_dir
|
||||||
self.default_config = load_yaml_config('pywb/default_config.yaml')
|
self.default_config = load_yaml_config('pywb/default_config.yaml')
|
||||||
self.coll_name = coll_name
|
self.coll_name = coll_name
|
||||||
@ -30,6 +31,14 @@ directory structure expected by pywb
|
|||||||
self.cdx_dir = self._get_dir('index_paths')
|
self.cdx_dir = self._get_dir('index_paths')
|
||||||
self.static_dir = self._get_dir('static_path')
|
self.static_dir = self._get_dir('static_path')
|
||||||
self.templates_dir = self._get_dir('templates_dir')
|
self.templates_dir = self._get_dir('templates_dir')
|
||||||
|
if must_exist:
|
||||||
|
self._assert_coll_exists()
|
||||||
|
|
||||||
|
def list_colls(self):
|
||||||
|
print('Collections:')
|
||||||
|
for d in os.listdir(self.root_dir):
|
||||||
|
if os.path.isdir(os.path.join(self.root_dir, d)):
|
||||||
|
print('- ' + d)
|
||||||
|
|
||||||
def _get_dir(self, name):
|
def _get_dir(self, name):
|
||||||
return os.path.join(self.coll_dir,
|
return os.path.join(self.coll_dir,
|
||||||
@ -50,18 +59,15 @@ directory structure expected by pywb
|
|||||||
self._create_dir(self.static_dir)
|
self._create_dir(self.static_dir)
|
||||||
self._create_dir(self.templates_dir)
|
self._create_dir(self.templates_dir)
|
||||||
|
|
||||||
|
def _assert_coll_exists(self):
|
||||||
|
if not os.path.isdir(self.coll_dir):
|
||||||
|
raise IOError('Collection {0} does not exist'.
|
||||||
|
format(self.coll_name))
|
||||||
|
|
||||||
def add_warcs(self, warcs):
|
def add_warcs(self, warcs):
|
||||||
if not os.path.isdir(self.warc_dir):
|
if not os.path.isdir(self.warc_dir):
|
||||||
if not os.path.isdir(self.coll_dir):
|
raise IOError('Directory {0} does not exist'.
|
||||||
raise IOError('Collection {0} does not exist'.
|
format(self.warc_dir))
|
||||||
format(self.coll_name))
|
|
||||||
else:
|
|
||||||
raise IOError('Directory {0} does not exist'.
|
|
||||||
format(self.warc_dir))
|
|
||||||
|
|
||||||
if not warcs:
|
|
||||||
logging.info('No WARCs specified')
|
|
||||||
return
|
|
||||||
|
|
||||||
full_paths = []
|
full_paths = []
|
||||||
for filename in warcs:
|
for filename in warcs:
|
||||||
@ -99,9 +105,6 @@ directory structure expected by pywb
|
|||||||
self._index_merge_warcs(filtered_warcs)
|
self._index_merge_warcs(filtered_warcs)
|
||||||
|
|
||||||
def _index_merge_warcs(self, new_warcs):
|
def _index_merge_warcs(self, new_warcs):
|
||||||
if not new_warcs:
|
|
||||||
return
|
|
||||||
|
|
||||||
cdx_file = os.path.join(self.cdx_dir, 'index.cdx')
|
cdx_file = os.path.join(self.cdx_dir, 'index.cdx')
|
||||||
|
|
||||||
# no existing file, just reindex all
|
# no existing file, just reindex all
|
||||||
@ -128,50 +131,109 @@ directory structure expected by pywb
|
|||||||
os.rename(merged_file, cdx_file)
|
os.rename(merged_file, cdx_file)
|
||||||
os.remove(temp_file)
|
os.remove(temp_file)
|
||||||
|
|
||||||
|
def set_metadata(self, namevalue_pairs):
|
||||||
|
metadata_yaml = os.path.join(self.coll_dir, 'metadata.yaml')
|
||||||
|
metadata = None
|
||||||
|
if os.path.isfile(metadata_yaml):
|
||||||
|
with open(metadata_yaml) as fh:
|
||||||
|
metadata = yaml.safe_load(fh)
|
||||||
|
|
||||||
|
if not metadata:
|
||||||
|
metadata = {}
|
||||||
|
|
||||||
|
msg = 'Metadata params must be in the form "name=value"'
|
||||||
|
for pair in namevalue_pairs:
|
||||||
|
v = pair.split('=', 1)
|
||||||
|
if len(v) != 2:
|
||||||
|
raise ValueError(msg)
|
||||||
|
|
||||||
|
metadata[v[0]] = v[1]
|
||||||
|
|
||||||
|
with open(metadata_yaml, 'w+b') as fh:
|
||||||
|
fh.write(yaml.dump(metadata, default_flow_style=False))
|
||||||
|
|
||||||
|
|
||||||
|
#=============================================================================
|
||||||
def main(args=None):
|
def main(args=None):
|
||||||
description = """
|
description = """
|
||||||
Create manage file based web archive collections
|
Create manage file based web archive collections
|
||||||
"""
|
"""
|
||||||
|
#format(os.path.basename(sys.argv[0]))
|
||||||
epilog = """
|
|
||||||
Some examples:
|
|
||||||
|
|
||||||
* Create new collection 'my_coll'
|
|
||||||
{0} create my_coll
|
|
||||||
|
|
||||||
* Add warc mywarc1.warc.gz to my_coll (The warc will be copied to the collecton directory)
|
|
||||||
{0} add my_coll mywarc1.warc.gz
|
|
||||||
|
|
||||||
""".format(os.path.basename(sys.argv[0]))
|
|
||||||
|
|
||||||
logging.basicConfig(format='%(asctime)s: [%(levelname)s]: %(message)s',
|
logging.basicConfig(format='%(asctime)s: [%(levelname)s]: %(message)s',
|
||||||
level=logging.DEBUG)
|
level=logging.DEBUG)
|
||||||
|
|
||||||
parser = ArgumentParser(description=description,
|
parser = ArgumentParser(description=description,
|
||||||
epilog=epilog,
|
#epilog=epilog,
|
||||||
formatter_class=RawTextHelpFormatter)
|
formatter_class=RawTextHelpFormatter)
|
||||||
|
|
||||||
group = parser.add_mutually_exclusive_group()
|
subparsers = parser.add_subparsers(dest='type')
|
||||||
group.add_argument('--init', action='store_true')
|
|
||||||
group.add_argument('--addwarc', action='store_true')
|
|
||||||
group.add_argument('--reindex', action='store_true')
|
|
||||||
group.add_argument('--index-warcs', action='store_true')
|
|
||||||
|
|
||||||
parser.add_argument('name')
|
# Init Coll
|
||||||
parser.add_argument('files', nargs='*')
|
def do_init(r):
|
||||||
|
m = CollectionsManager(r.coll_name, must_exist=False)
|
||||||
|
m.add_collection()
|
||||||
|
|
||||||
|
init_help = 'Init new collection, create all collection directories'
|
||||||
|
init = subparsers.add_parser('init', help=init_help)
|
||||||
|
init.add_argument('coll_name')
|
||||||
|
init.set_defaults(func=do_init)
|
||||||
|
|
||||||
|
# List Colls
|
||||||
|
def do_list(r):
|
||||||
|
m = CollectionsManager('', must_exist=False)
|
||||||
|
m.list_colls()
|
||||||
|
|
||||||
|
list_help = 'List Collections'
|
||||||
|
listcmd = subparsers.add_parser('list', help=list_help)
|
||||||
|
listcmd.set_defaults(func=do_list)
|
||||||
|
|
||||||
|
# Add Warcs
|
||||||
|
def do_add(r):
|
||||||
|
m = CollectionsManager(r.coll_name)
|
||||||
|
m.add_warcs(r.files)
|
||||||
|
|
||||||
|
addwarc_help = 'Copy ARCS/WARCS to collection directory and reindex'
|
||||||
|
addwarc = subparsers.add_parser('add', help=addwarc_help)
|
||||||
|
addwarc.add_argument('coll_name')
|
||||||
|
addwarc.add_argument('files', nargs='+')
|
||||||
|
addwarc.set_defaults(func=do_add)
|
||||||
|
|
||||||
|
|
||||||
|
# Reindex All
|
||||||
|
def do_reindex(r):
|
||||||
|
m = CollectionsManager(r.coll_name)
|
||||||
|
m.reindex()
|
||||||
|
|
||||||
|
reindex_help = 'Re-Index entire collection'
|
||||||
|
reindex = subparsers.add_parser('reindex', help=reindex_help)
|
||||||
|
reindex.add_argument('coll_name')
|
||||||
|
reindex.set_defaults(func=do_reindex)
|
||||||
|
|
||||||
|
# Index warcs
|
||||||
|
def do_index(r):
|
||||||
|
m = CollectionsManager(r.coll_name)
|
||||||
|
m.index_merge(r.files)
|
||||||
|
|
||||||
|
indexwarcs_help = 'Index specified ARC/WARC files in the collection'
|
||||||
|
indexwarcs = subparsers.add_parser('index', help=indexwarcs_help)
|
||||||
|
indexwarcs.add_argument('coll_name')
|
||||||
|
indexwarcs.add_argument('files', nargs='+')
|
||||||
|
indexwarcs.set_defaults(func=do_index)
|
||||||
|
|
||||||
|
# Set metadata
|
||||||
|
def do_metadata(r):
|
||||||
|
m = CollectionsManager(r.coll_name)
|
||||||
|
m.set_metadata(r.set)
|
||||||
|
|
||||||
|
metadata_help = 'Set Metadata'
|
||||||
|
metadata = subparsers.add_parser('metadata', help=metadata_help)
|
||||||
|
metadata.add_argument('coll_name')
|
||||||
|
metadata.add_argument('--set', nargs='+')
|
||||||
|
metadata.set_defaults(func=do_metadata)
|
||||||
|
|
||||||
r = parser.parse_args(args=args)
|
r = parser.parse_args(args=args)
|
||||||
|
r.func(r)
|
||||||
m = CollectionsManager(r.name)
|
|
||||||
if r.init:
|
|
||||||
m.add_collection()
|
|
||||||
elif r.addwarc:
|
|
||||||
m.add_warcs(r.files)
|
|
||||||
elif r.index_warcs:
|
|
||||||
m.index_merge(r.files)
|
|
||||||
elif r.reindex:
|
|
||||||
m.reindex()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
@ -1,22 +1,17 @@
|
|||||||
<h2>pywb Sample Home Page</h2>
|
<h2>pywb Wayback Machine</h2>
|
||||||
|
|
||||||
The following archive collections are available:
|
This archive contains the following collections:
|
||||||
|
|
||||||
<ul>
|
<ul>
|
||||||
{% for route in routes %}
|
{% for route in routes %}
|
||||||
{% if route | is_wb_handler %}
|
{% if route | is_wb_handler %}
|
||||||
<li><a href="{{ '/' + route.path }}">{{ '/' + route.path }}</a>: {{ route | string }}</li>
|
<li>
|
||||||
{% endif %}
|
<a href="{{ '/' + route.path }}">{{ '/' + route.path }}</a>
|
||||||
{% endfor %}
|
{% if route.user_metadata.title is defined %}
|
||||||
</ul>
|
({{ route.user_metadata.title }})
|
||||||
|
{% endif %}
|
||||||
Other endpoints in this deployment:
|
</li>
|
||||||
|
{% endif %}
|
||||||
<ul>
|
|
||||||
{% for route in routes %}
|
|
||||||
{% if not route | is_wb_handler %}
|
|
||||||
<li><b>{{ '/' + route.path }}</b> - {{ route | string }}</li>
|
|
||||||
{% endif %}
|
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</ul>
|
</ul>
|
||||||
|
|
||||||
|
@ -1,6 +1,17 @@
|
|||||||
<h2>pywb Search Page</h2>
|
<h2>{{ wbrequest.user_metadata.title if wbrequest.user_metadata.title else wbrequest.coll }} Search Page</h2>
|
||||||
Search Archived Content:
|
|
||||||
|
<div>
|
||||||
|
<table style="text-align: left">
|
||||||
|
{% for key, val in wbrequest.user_metadata.iteritems() %}
|
||||||
|
<tr><th>{{ key }}:</th><td>{{ val }}</td>
|
||||||
|
{% endfor %}
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Search this collection by url:
|
||||||
<form onsubmit="url = document.getElementById('search').value; if (url != '') { document.location.href = '{{ wbrequest.wb_prefix }}' + '*/' + url; } return false;">
|
<form onsubmit="url = document.getElementById('search').value; if (url != '') { document.location.href = '{{ wbrequest.wb_prefix }}' + '*/' + url; } return false;">
|
||||||
<input id="search" name="search" placeholder="Enter url to search"/>
|
<input id="search" name="search" placeholder="Enter url to search"/>
|
||||||
<button type="submit">Search</button>
|
<button type="submit">Search</button>
|
||||||
</form>
|
</form>
|
||||||
|
</p>
|
||||||
|
@ -143,13 +143,13 @@ class DirectoryCollsLoader(object):
|
|||||||
if not os.path.isdir(full):
|
if not os.path.isdir(full):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
coll = self.load_dir(full, name)
|
coll_config = self.load_coll_dir(full, name)
|
||||||
if coll:
|
if coll_config:
|
||||||
colls[name] = coll
|
colls[name] = coll_config
|
||||||
|
|
||||||
return colls
|
return colls
|
||||||
|
|
||||||
def _add_if_exists(self, coll, root_dir, dir_key, required=False):
|
def _add_dir_if_exists(self, coll, root_dir, dir_key, required=False):
|
||||||
if dir_key in coll:
|
if dir_key in coll:
|
||||||
# already set
|
# already set
|
||||||
return False
|
return False
|
||||||
@ -168,18 +168,26 @@ class DirectoryCollsLoader(object):
|
|||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def load_dir(self, root_dir, name):
|
def load_yaml_file(self, root_dir, filename):
|
||||||
config_file = os.path.join(root_dir, 'config.yaml')
|
filename = os.path.join(root_dir, filename)
|
||||||
if os.path.isfile(config_file):
|
if os.path.isfile(filename):
|
||||||
coll = load_yaml_config(config_file)
|
return load_yaml_config(filename)
|
||||||
else:
|
else:
|
||||||
coll = {}
|
return {}
|
||||||
|
|
||||||
self._add_if_exists(coll, root_dir, 'index_paths', True)
|
def load_coll_dir(self, root_dir, name):
|
||||||
self._add_if_exists(coll, root_dir, 'archive_paths', True)
|
# Load config.yaml
|
||||||
|
coll_config = self.load_yaml_file(root_dir, 'config.yaml')
|
||||||
|
|
||||||
if self._add_if_exists(coll, root_dir, 'static_path', False):
|
# Load metadata.yaml
|
||||||
self.static_routes['static/' + name] = coll['static_path']
|
metadata = self.load_yaml_file(root_dir, 'metadata.yaml')
|
||||||
|
coll_config['metadata'] = metadata
|
||||||
|
|
||||||
|
self._add_dir_if_exists(coll_config, root_dir, 'index_paths', True)
|
||||||
|
self._add_dir_if_exists(coll_config, root_dir, 'archive_paths', True)
|
||||||
|
|
||||||
|
if self._add_dir_if_exists(coll_config, root_dir, 'static_path', False):
|
||||||
|
self.static_routes['static/' + name] = coll_config['static_path']
|
||||||
|
|
||||||
# Add templates
|
# Add templates
|
||||||
templates_dir = self.config.get('paths').get('templates_dir')
|
templates_dir = self.config.get('paths').get('templates_dir')
|
||||||
@ -187,15 +195,15 @@ class DirectoryCollsLoader(object):
|
|||||||
template_dir = os.path.join(root_dir, templates_dir)
|
template_dir = os.path.join(root_dir, templates_dir)
|
||||||
if template_dir:
|
if template_dir:
|
||||||
for tname, tfile in self.config.get('paths')['template_files'].iteritems():
|
for tname, tfile in self.config.get('paths')['template_files'].iteritems():
|
||||||
if tname in coll:
|
if tname in coll_config:
|
||||||
# Already set
|
# Already set
|
||||||
continue
|
continue
|
||||||
|
|
||||||
full = os.path.join(template_dir, tfile)
|
full = os.path.join(template_dir, tfile)
|
||||||
if os.path.isfile(full):
|
if os.path.isfile(full):
|
||||||
coll[tname] = full
|
coll_config[tname] = full
|
||||||
|
|
||||||
return coll
|
return coll_config
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
|
@ -5,6 +5,7 @@ from pywb.framework.memento import make_timemap, LINK_FORMAT
|
|||||||
import urlparse
|
import urlparse
|
||||||
import urllib
|
import urllib
|
||||||
import logging
|
import logging
|
||||||
|
import json
|
||||||
|
|
||||||
from os import path
|
from os import path
|
||||||
from itertools import imap
|
from itertools import imap
|
||||||
@ -59,6 +60,11 @@ def is_wb_handler(obj):
|
|||||||
return obj.handler.__class__.__name__ == "WBHandler"
|
return obj.handler.__class__.__name__ == "WBHandler"
|
||||||
|
|
||||||
|
|
||||||
|
@template_filter()
|
||||||
|
def jsonify(obj):
|
||||||
|
return json.dumps(obj)
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class J2TemplateView(object):
|
class J2TemplateView(object):
|
||||||
env_globals = {'static_path': 'static/default',
|
env_globals = {'static_path': 'static/default',
|
||||||
|
3
setup.py
3
setup.py
@ -42,7 +42,7 @@ setup(
|
|||||||
long_description=long_description,
|
long_description=long_description,
|
||||||
license='GPL',
|
license='GPL',
|
||||||
packages=find_packages(),
|
packages=find_packages(),
|
||||||
#include_package_data=True,
|
zip_safe=True,
|
||||||
provides=[
|
provides=[
|
||||||
'pywb',
|
'pywb',
|
||||||
'pywb.utils',
|
'pywb.utils',
|
||||||
@ -92,7 +92,6 @@ setup(
|
|||||||
proxy-cert-auth = pywb.framework.certauth:main
|
proxy-cert-auth = pywb.framework.certauth:main
|
||||||
wb-manager = pywb.manager.manager:main
|
wb-manager = pywb.manager.manager:main
|
||||||
""",
|
""",
|
||||||
zip_safe=True,
|
|
||||||
classifiers=[
|
classifiers=[
|
||||||
'Development Status :: 4 - Beta',
|
'Development Status :: 4 - Beta',
|
||||||
'Environment :: Web Environment',
|
'Environment :: Web Environment',
|
||||||
|
@ -1,9 +1,12 @@
|
|||||||
import os
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
import shutil
|
import shutil
|
||||||
|
import sys
|
||||||
|
|
||||||
import webtest
|
import webtest
|
||||||
|
|
||||||
|
from io import BytesIO
|
||||||
|
|
||||||
from pywb.webapp.pywb_init import create_wb_router
|
from pywb.webapp.pywb_init import create_wb_router
|
||||||
from pywb.manager.manager import main
|
from pywb.manager.manager import main
|
||||||
|
|
||||||
@ -53,7 +56,7 @@ class TestManagedColls(object):
|
|||||||
def test_create_first_coll(self):
|
def test_create_first_coll(self):
|
||||||
""" Test first collection creation, with all required dirs
|
""" Test first collection creation, with all required dirs
|
||||||
"""
|
"""
|
||||||
main(['--init', 'test'])
|
main(['init', 'test'])
|
||||||
|
|
||||||
colls = os.path.join(self.root_dir, 'collections')
|
colls = os.path.join(self.root_dir, 'collections')
|
||||||
assert os.path.isdir(colls)
|
assert os.path.isdir(colls)
|
||||||
@ -68,7 +71,7 @@ class TestManagedColls(object):
|
|||||||
"""
|
"""
|
||||||
warc1 = os.path.join(get_test_dir(), 'warcs', 'example.warc.gz')
|
warc1 = os.path.join(get_test_dir(), 'warcs', 'example.warc.gz')
|
||||||
|
|
||||||
main(['--addwarc', 'test', warc1])
|
main(['add', 'test', warc1])
|
||||||
|
|
||||||
self._create_app()
|
self._create_app()
|
||||||
resp = self.testapp.get('/test/20140103030321/http://example.com?example=1')
|
resp = self.testapp.get('/test/20140103030321/http://example.com?example=1')
|
||||||
@ -79,9 +82,9 @@ class TestManagedColls(object):
|
|||||||
"""
|
"""
|
||||||
warc1 = os.path.join(get_test_dir(), 'warcs', 'example.warc.gz')
|
warc1 = os.path.join(get_test_dir(), 'warcs', 'example.warc.gz')
|
||||||
|
|
||||||
main(['--init', 'foo'])
|
main(['init', 'foo'])
|
||||||
|
|
||||||
main(['--addwarc', 'foo', warc1])
|
main(['add', 'foo', warc1])
|
||||||
|
|
||||||
self._create_app()
|
self._create_app()
|
||||||
resp = self.testapp.get('/foo/20140103030321/http://example.com?example=1')
|
resp = self.testapp.get('/foo/20140103030321/http://example.com?example=1')
|
||||||
@ -93,17 +96,14 @@ class TestManagedColls(object):
|
|||||||
warc1 = os.path.join(get_test_dir(), 'warcs', 'iana.warc.gz')
|
warc1 = os.path.join(get_test_dir(), 'warcs', 'iana.warc.gz')
|
||||||
warc2 = os.path.join(get_test_dir(), 'warcs', 'example-extra.warc')
|
warc2 = os.path.join(get_test_dir(), 'warcs', 'example-extra.warc')
|
||||||
|
|
||||||
main(['--addwarc', 'test', warc1, warc2])
|
main(['add', 'test', warc1, warc2])
|
||||||
|
|
||||||
# Spurrious file in collections
|
# Spurrious file in collections
|
||||||
with open(os.path.join(self.root_dir, 'collections', 'blah'), 'w+b') as fh:
|
with open(os.path.join(self.root_dir, 'collections', 'blah'), 'w+b') as fh:
|
||||||
fh.write('foo\n')
|
fh.write('foo\n')
|
||||||
|
|
||||||
with raises(IOError):
|
with raises(IOError):
|
||||||
main(['--addwarc', 'test', 'non-existent-file.warc.gz'])
|
main(['add', 'test', 'non-existent-file.warc.gz'])
|
||||||
|
|
||||||
# check adding no warc -- no op
|
|
||||||
main(['--addwarc', 'test'])
|
|
||||||
|
|
||||||
# check new cdx
|
# check new cdx
|
||||||
self._create_app()
|
self._create_app()
|
||||||
@ -116,7 +116,7 @@ class TestManagedColls(object):
|
|||||||
Ensure CDX is relative to root archive dir, test replay
|
Ensure CDX is relative to root archive dir, test replay
|
||||||
"""
|
"""
|
||||||
|
|
||||||
main(['--init', 'nested'])
|
main(['init', 'nested'])
|
||||||
|
|
||||||
nested_root = os.path.join(self.root_dir, 'collections', 'nested', 'warcs')
|
nested_root = os.path.join(self.root_dir, 'collections', 'nested', 'warcs')
|
||||||
nested_a = os.path.join(nested_root, 'A')
|
nested_a = os.path.join(nested_root, 'A')
|
||||||
@ -131,7 +131,7 @@ class TestManagedColls(object):
|
|||||||
shutil.copy2(warc1, nested_a)
|
shutil.copy2(warc1, nested_a)
|
||||||
shutil.copy2(warc2, nested_b)
|
shutil.copy2(warc2, nested_b)
|
||||||
|
|
||||||
main(['--index-warcs',
|
main(['index',
|
||||||
'nested',
|
'nested',
|
||||||
os.path.join(nested_a, 'iana.warc.gz'),
|
os.path.join(nested_a, 'iana.warc.gz'),
|
||||||
os.path.join(nested_b, 'example.warc.gz')
|
os.path.join(nested_b, 'example.warc.gz')
|
||||||
@ -162,7 +162,7 @@ class TestManagedColls(object):
|
|||||||
|
|
||||||
shutil.copy(orig, bak)
|
shutil.copy(orig, bak)
|
||||||
|
|
||||||
main(['--reindex', 'test'])
|
main(['reindex', 'test'])
|
||||||
|
|
||||||
with open(orig) as orig_fh:
|
with open(orig) as orig_fh:
|
||||||
merged_cdx = orig_fh.read()
|
merged_cdx = orig_fh.read()
|
||||||
@ -187,6 +187,39 @@ class TestManagedColls(object):
|
|||||||
assert resp.content_type == 'application/javascript'
|
assert resp.content_type == 'application/javascript'
|
||||||
assert '/* Some JS File */' in resp.body
|
assert '/* Some JS File */' in resp.body
|
||||||
|
|
||||||
|
def test_add_title_metadata_index_page(self):
|
||||||
|
""" Test adding title metadata to a collection, test
|
||||||
|
retrieval on default index page
|
||||||
|
"""
|
||||||
|
main(['metadata', 'foo', '--set', 'title=Collection Title'])
|
||||||
|
|
||||||
|
self._create_app()
|
||||||
|
resp = self.testapp.get('/')
|
||||||
|
assert resp.status_int == 200
|
||||||
|
assert resp.content_type == 'text/html'
|
||||||
|
assert '(Collection Title)' in resp.body
|
||||||
|
|
||||||
|
def test_other_metadata_search_page(self):
|
||||||
|
main(['metadata', 'foo', '--set',
|
||||||
|
'desc=Some Description Text',
|
||||||
|
'other=custom value'])
|
||||||
|
|
||||||
|
with raises(ValueError):
|
||||||
|
main(['metadata', 'foo', '--set', 'name_only'])
|
||||||
|
|
||||||
|
self._create_app()
|
||||||
|
resp = self.testapp.get('/foo/')
|
||||||
|
assert resp.status_int == 200
|
||||||
|
assert resp.content_type == 'text/html'
|
||||||
|
|
||||||
|
assert 'Collection Title' in resp.body
|
||||||
|
|
||||||
|
assert 'desc' in resp.body
|
||||||
|
assert 'Some Description Text' in resp.body
|
||||||
|
|
||||||
|
assert 'other' in resp.body
|
||||||
|
assert 'custom value' in resp.body
|
||||||
|
|
||||||
def test_custom_template_search(self):
|
def test_custom_template_search(self):
|
||||||
""" Test manually added custom search template search.html
|
""" Test manually added custom search template search.html
|
||||||
"""
|
"""
|
||||||
@ -219,7 +252,6 @@ class TestManagedColls(object):
|
|||||||
assert resp.content_type == 'text/html'
|
assert resp.content_type == 'text/html'
|
||||||
assert 'config.yaml overriden search page' in resp.body
|
assert 'config.yaml overriden search page' in resp.body
|
||||||
|
|
||||||
|
|
||||||
def test_no_templates(self):
|
def test_no_templates(self):
|
||||||
""" Test removing templates dir, using default template again
|
""" Test removing templates dir, using default template again
|
||||||
"""
|
"""
|
||||||
@ -232,28 +264,45 @@ class TestManagedColls(object):
|
|||||||
assert resp.content_type == 'text/html'
|
assert resp.content_type == 'text/html'
|
||||||
assert 'pywb custom search page' not in resp.body
|
assert 'pywb custom search page' not in resp.body
|
||||||
|
|
||||||
|
def test_list_colls(self):
|
||||||
|
""" Test collection listing, printed to stdout
|
||||||
|
"""
|
||||||
|
orig_stdout = sys.stdout
|
||||||
|
buff = BytesIO()
|
||||||
|
sys.stdout = buff
|
||||||
|
main(['list'])
|
||||||
|
sys.stdout = orig_stdout
|
||||||
|
|
||||||
|
output = buff.getvalue().splitlines()
|
||||||
|
assert len(output) == 4
|
||||||
|
assert 'Collections' in output[0]
|
||||||
|
assert 'foo' in output[1]
|
||||||
|
assert 'nested' in output[2]
|
||||||
|
assert 'test' in output[3]
|
||||||
|
|
||||||
def test_err_no_such_coll(self):
|
def test_err_no_such_coll(self):
|
||||||
""" Test error adding warc to non-existant collection
|
""" Test error adding warc to non-existant collection
|
||||||
"""
|
"""
|
||||||
warc1 = os.path.join(get_test_dir(), 'warcs', 'example.warc.gz')
|
warc1 = os.path.join(get_test_dir(), 'warcs', 'example.warc.gz')
|
||||||
|
|
||||||
with raises(IOError):
|
with raises(IOError):
|
||||||
main(['--addwarc', 'bar', warc1])
|
main(['add', 'bar', warc1])
|
||||||
|
|
||||||
def test_err_wrong_warcs(self):
|
def test_err_wrong_warcs(self):
|
||||||
warc1 = os.path.join(get_test_dir(), 'warcs', 'example.warc.gz')
|
warc1 = os.path.join(get_test_dir(), 'warcs', 'example.warc.gz')
|
||||||
invalid_warc = os.path.join(self.root_dir, 'collections', 'test', 'warcs', 'invalid.warc.gz')
|
invalid_warc = os.path.join(self.root_dir, 'collections', 'test', 'warcs', 'invalid.warc.gz')
|
||||||
|
|
||||||
# Empty
|
# Empty warc list, argparse calls exit
|
||||||
main(['--index-warcs', 'test'])
|
with raises(SystemExit):
|
||||||
|
main(['index', 'test'])
|
||||||
|
|
||||||
# Wrong paths not in collection
|
# Wrong paths not in collection
|
||||||
with raises(IOError):
|
with raises(IOError):
|
||||||
main(['--index-warcs', 'test', warc1])
|
main(['index', 'test', warc1])
|
||||||
|
|
||||||
# Non-existent
|
# Non-existent
|
||||||
with raises(IOError):
|
with raises(IOError):
|
||||||
main(['--index-warcs', 'test', invalid_warc])
|
main(['index', 'test', invalid_warc])
|
||||||
|
|
||||||
def test_err_missing_dirs(self):
|
def test_err_missing_dirs(self):
|
||||||
""" Test various errors with missing warcs dir,
|
""" Test various errors with missing warcs dir,
|
||||||
@ -266,7 +315,7 @@ class TestManagedColls(object):
|
|||||||
shutil.rmtree(warcs_path)
|
shutil.rmtree(warcs_path)
|
||||||
|
|
||||||
with raises(IOError):
|
with raises(IOError):
|
||||||
main(['--addwarc', 'foo', 'somewarc'])
|
main(['add', 'foo', 'somewarc'])
|
||||||
|
|
||||||
# No CDX
|
# No CDX
|
||||||
cdx_path = os.path.join(colls, 'foo', 'cdx')
|
cdx_path = os.path.join(colls, 'foo', 'cdx')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user