1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

support per-collection assets again:

- wb-manager added metadata now loaded dynamically, cached, for search and index pages (#196)
- metadata updated w/o restart (#87)
- per-collection template overrides and per-template static file support
tests: test_auto_colls.py fully ported to new system
(per-collection config.yaml no longer supported)
This commit is contained in:
Ilya Kreymer 2017-04-26 12:12:34 -07:00
parent 52dc46fe6a
commit 082487ab3c
10 changed files with 184 additions and 135 deletions

View File

@ -7,15 +7,14 @@ This archive contains the following collections:
<ul>
{% for route in routes %}
{% if route | is_wb_handler %}
<li>
<a href="{{ '/' + route.path }}">{{ '/' + route.path }}</a>
{% if route.user_metadata.title is defined %}
({{ route.user_metadata.title }})
<a href="{{ '/' + route }}">{{ '/' + route }}</a>
{% if all_metadata and all_metadata[route] %}
({{ all_metadata[route].title }})
{% endif %}
</li>
{% endif %}
{% endfor %}
</ul>
</body>
</html>

View File

@ -1,16 +0,0 @@
<!DOCTYPE html>
<html>
<body>
<h2>pywb Wayback Machine (new)</h2>
This archive contains the following collections:
<ul>
{% for route in routes %}
<li>
<a href="{{ '/' + route }}">{{ '/' + route }}</a>
</li>
{% endfor %}
</ul>
</body>
</html>

View File

@ -1,10 +1,10 @@
{% if wbrequest.user_metadata %}
{% if metadata %}
<h2>{{ wbrequest.user_metadata.title if wbrequest.user_metadata.title else wbrequest.coll }} Search Page</h2>
<h2>{{ metadata.title if metadata.title else coll }} Search Page</h2>
<div>
<table style="text-align: left">
{% for key, val in wbrequest.user_metadata.items() %}
{% for key, val in metadata.items() %}
<tr><th>{{ key }}:</th><td>{{ val }}</td>
{% endfor %}
</table>
@ -14,7 +14,7 @@
<p>
Search this collection by url:
<form onsubmit="url = document.getElementById('search').value; if (url != '') { document.location.href = '{{ wbrequest.wb_prefix }}' + '*/' + url; } return false;">
<form onsubmit="url = document.getElementById('search').value; if (url != '') { document.location.href = '{{ wb_prefix }}' + '*/' + url; } return false;">
<input id="search" name="search" placeholder="Enter url to search"/>
<button type="submit">Search</button>
</form>

View File

@ -5,6 +5,9 @@ from werkzeug.routing import Map, Rule
from werkzeug.exceptions import HTTPException, NotFound
from werkzeug.wsgi import pop_path_info
from six.moves.urllib.parse import urljoin
from six import iteritems
from pywb.utils.loaders import load_yaml_config
from pywb.webagg.autoapp import AutoConfigApp
from pywb.webapp.handlers import StaticHandler
@ -15,17 +18,11 @@ from pywb.urlrewrite.geventserver import GeventServer
from pywb.urlrewrite.templateview import BaseInsertView
from pywb.urlrewrite.rewriterapp import RewriterApp, UpstreamException
import os
import traceback
# ============================================================================
class NewWbRequest(object):
def __init__(self, env, wb_url_str, full_prefix):
self.env = env
self.wb_url_str = wb_url_str
self.full_prefix = full_prefix
# ============================================================================
class FrontEndApp(object):
def __init__(self, config_file='./config.yaml', custom_config=None):
@ -40,7 +37,8 @@ class FrontEndApp(object):
self.static_handler = StaticHandler('pywb/static/')
self.url_map = Map()
self.url_map.add(Rule('/static/__pywb/<path:filepath>', endpoint=self.serve_static))
self.url_map.add(Rule('/static/_/<coll>/<path:filepath>', endpoint=self.serve_static))
self.url_map.add(Rule('/static/<path:filepath>', endpoint=self.serve_static))
self.url_map.add(Rule('/<coll>/', endpoint=self.serve_coll_page))
self.url_map.add(Rule('/<coll>/<path:url>', endpoint=self.serve_content))
self.url_map.add(Rule('/collinfo.json', endpoint=self.serve_listing))
@ -48,21 +46,42 @@ class FrontEndApp(object):
self.rewriterapp.paths = self.get_upstream_paths(self.webagg_server.port)
self.templates_dir = self.webagg.config.get('templates_dir', 'templates')
self.static_dir = self.webagg.config.get('static_dir', 'static')
metadata_templ = os.path.join(self.webagg.root_dir, '{coll}', 'metadata.yaml')
self.metadata_cache = MetadataCache(metadata_templ)
def get_upstream_paths(self, port):
return {'replay-dyn': 'http://localhost:%s/_/resource/postreq?param.coll={coll}' % port,
'replay-fixed': 'http://localhost:%s/{coll}/resource/postreq' % port
}
def serve_home(self, environ):
home_view = BaseInsertView(self.rewriterapp.jinja_env, 'new_index.html')
routes = self.webagg.list_fixed_routes() + self.webagg.list_dynamic_routes()
home_view = BaseInsertView(self.rewriterapp.jinja_env, 'index.html')
fixed_routes = self.webagg.list_fixed_routes()
dynamic_routes = self.webagg.list_dynamic_routes()
routes = fixed_routes + dynamic_routes
all_metadata = self.metadata_cache.get_all(dynamic_routes)
content = home_view.render_to_string(environ,
routes=routes,
all_metadata=all_metadata)
content = home_view.render_to_string(environ, routes=routes)
return WbResponse.text_response(content, content_type='text/html; charset="utf-8"')
def serve_static(self, environ, filepath=''):
def serve_static(self, environ, coll='', filepath=''):
if coll:
path = os.path.join(self.webagg.root_dir, coll, self.static_dir)
else:
path = self.static_dir
environ['pywb.static_dir'] = path
try:
return self.static_handler(NewWbRequest(environ, filepath, ''))
return self.static_handler(environ, filepath)
except:
self.raise_not_found(environ, 'Static File Not Found: {0}'.format(filepath))
@ -70,31 +89,24 @@ class FrontEndApp(object):
if not self.is_valid_coll(coll):
self.raise_not_found(environ, 'No handler for "/{0}"'.format(coll))
wbrequest = NewWbRequest(environ, '', '/')
self.setup_paths(environ, coll)
metadata = self.metadata_cache.load(coll)
view = BaseInsertView(self.rewriterapp.jinja_env, 'search.html')
content = view.render_to_string(environ, wbrequest=wbrequest)
content = view.render_to_string(environ,
wb_prefix=environ.get('SCRIPT_NAME') + '/',
metadata=metadata)
return WbResponse.text_response(content, content_type='text/html; charset="utf-8"')
def serve_listing(self, environ):
result = {'fixed': self.webagg.list_fixed_routes(),
'dynamic': self.webagg.list_dynamic_routes()
}
return WbResponse.json_response(result)
def is_valid_coll(self, coll):
return (coll in self.webagg.list_fixed_routes() or
coll in self.webagg.list_dynamic_routes())
def raise_not_found(self, environ, msg):
raise NotFound(response=self.rewriterapp._error_response(environ, msg))
def serve_content(self, environ, coll='', url=''):
if not self.is_valid_coll(coll):
self.raise_not_found(environ, 'No handler for "/{0}"'.format(coll))
pop_path_info(environ)
self.setup_paths(environ, coll)
wb_url = self.rewriterapp.get_wburl(environ)
kwargs = {'coll': coll}
@ -112,6 +124,29 @@ class FrontEndApp(object):
return response
def setup_paths(self, environ, coll):
pop_path_info(environ)
if not coll or not self.webagg.root_dir:
return
environ['pywb.templates_dir'] = os.path.join(self.webagg.root_dir,
coll,
self.templates_dir)
def serve_listing(self, environ):
result = {'fixed': self.webagg.list_fixed_routes(),
'dynamic': self.webagg.list_dynamic_routes()
}
return WbResponse.json_response(result)
def is_valid_coll(self, coll):
return (coll in self.webagg.list_fixed_routes() or
coll in self.webagg.list_dynamic_routes())
def raise_not_found(self, environ, msg):
raise NotFound(response=self.rewriterapp._error_response(environ, msg))
def _check_refer_redirect(self, environ):
referer = environ.get('HTTP_REFERER')
if not referer:
@ -169,6 +204,41 @@ class FrontEndApp(object):
return app_server
# ============================================================================
class MetadataCache(object):
def __init__(self, template_str):
self.template_str = template_str
self.cache = {}
def load(self, coll):
path = self.template_str.format(coll=coll)
try:
mtime = os.path.getmtime(path)
obj = self.cache.get(path)
except:
return {}
if not obj:
return self.store_new(coll, path, mtime)
cached_mtime, data = obj
if mtime == cached_mtime == mtime:
return obj
return self.store_new(coll, path, mtime)
def store_new(self, coll, path, mtime):
obj = load_yaml_config(path)
self.cache[coll] = (mtime, obj)
return obj
def get_all(self, routes):
for route in routes:
self.load(route)
return {name: value[1] for name, value in iteritems(self.cache)}
# ============================================================================
if __name__ == "__main__":
app_server = FrontEndApp.create_app(port=8080)

View File

@ -67,7 +67,7 @@ class RewriterApp(object):
self.content_rewriter = Rewriter(is_framed_replay=frame_type)
if not jinja_env:
jinja_env = JinjaEnv(globals={'static_path': 'static/__pywb'})
jinja_env = JinjaEnv(globals={'static_path': 'static'})
self.jinja_env = jinja_env
@ -81,13 +81,6 @@ class RewriterApp(object):
self.enable_memento = config.get('enable_memento')
def call_with_params(self, **kwargs):
def run_app(environ, start_response):
environ['pywb.kwargs'] = kwargs
return self(environ, start_response)
return run_app
def __call__(self, environ, start_response):
wb_url = self.get_wburl(environ)
kwargs = environ.get('pywb.kwargs', {})

View File

@ -5,7 +5,7 @@ from pywb.utils.loaders import load
from six.moves.urllib.parse import urlsplit
from jinja2 import Environment
from jinja2 import Environment, TemplateNotFound
from jinja2 import FileSystemLoader, PackageLoader, ChoiceLoader
from webassets.ext.jinja2 import AssetsExtension
@ -115,7 +115,19 @@ class BaseInsertView(object):
self.banner_file = banner_file
def render_to_string(self, env, **kwargs):
template = self.jenv.jinja_env.get_template(self.insert_file)
template = None
template_path = env.get('pywb.templates_dir')
if template_path:
template_path = os.path.join(template_path, self.insert_file)
try:
template = self.jenv.jinja_env.get_template(template_path)
except TemplateNotFound:
pass
if not template:
template = self.jenv.jinja_env.get_template(self.insert_file)
params = env.get('webrec.template_params')
if params:
kwargs.update(params)

0
pywb/webapp/__init__.py Normal file
View File

View File

@ -1,4 +1,5 @@
import mimetypes
import os
from pywb.utils.loaders import LocalFileLoader
@ -15,9 +16,17 @@ class StaticHandler(object):
self.static_path = static_path
self.block_loader = LocalFileLoader()
def __call__(self, wbrequest):
url = wbrequest.wb_url_str.split('?')[0]
full_path = self.static_path + url
def __call__(self, environ, url_str):
url = url_str.split('?')[0]
full_path = environ.get('pywb.static_dir')
if full_path:
full_path = os.path.join(full_path, url)
if not os.path.isfile(full_path):
full_path = None
if not full_path:
full_path = os.path.join(self.static_path, url)
try:
data = self.block_loader.load(full_path)
@ -29,9 +38,9 @@ class StaticHandler(object):
reader = None
if 'wsgi.file_wrapper' in wbrequest.env:
if 'wsgi.file_wrapper' in environ:
try:
reader = wbrequest.env['wsgi.file_wrapper'](data)
reader = environ['wsgi.file_wrapper'](data)
except:
pass
@ -50,6 +59,6 @@ class StaticHandler(object):
except IOError:
raise NotFoundException('Static File Not Found: ' +
wbrequest.wb_url_str)
url_str)

View File

@ -8,12 +8,17 @@ import sys
import webtest
import time
#import threading
import gevent
from six import StringIO
from pywb.webapp.pywb_init import create_wb_router
from webtest import TestApp
from pytest import raises
from mock import patch
from pywb import get_test_dir
from pywb.webagg.test.testutils import TempDirTests, BaseTestClass
from pywb.manager.manager import main
import pywb.manager.autoindex
@ -21,12 +26,7 @@ import pywb.manager.autoindex
from pywb.warc.cdxindexer import main as cdxindexer_main
from pywb.cdx.cdxobject import CDXObject
from pywb import get_test_dir
from pywb.framework.wsgi_wrappers import init_app
from pywb.webapp.views import J2TemplateView
from pytest import raises
from mock import patch
from pywb.urlrewrite.frontendapp import FrontEndApp
#=============================================================================
@ -38,37 +38,18 @@ AUTOINDEX_FILE = 'autoindex.cdxj'
#=============================================================================
root_dir = None
orig_cwd = None
class TestManagedColls(TempDirTests, BaseTestClass):
@classmethod
def setup_class(cls):
super(TestManagedColls, cls).setup_class()
cls.orig_cwd = os.getcwd()
cls.root_dir = os.path.realpath(cls.root_dir)
os.chdir(cls.root_dir)
def setup_module():
global root_dir
root_dir = tempfile.mkdtemp()
global orig_cwd
orig_cwd = os.getcwd()
os.chdir(root_dir)
# use actually set dir
root_dir = os.getcwd()
def teardown_module():
global orig_cwd
os.chdir(orig_cwd)
global root_dir
shutil.rmtree(root_dir)
#=============================================================================
class TestManagedColls(object):
def setup(self):
global root_dir
self.root_dir = root_dir
def _create_app(self):
self.app = init_app(create_wb_router)
self.testapp = webtest.TestApp(self.app)
@classmethod
def teardown_class(cls):
super(TestManagedColls, cls).teardown_class()
os.chdir(cls.orig_cwd)
def _check_dirs(self, base, dirlist):
for dir_ in dirlist:
@ -77,8 +58,10 @@ class TestManagedColls(object):
def _get_sample_warc(self, name):
return os.path.join(get_test_dir(), 'warcs', name)
def teardown(self):
J2TemplateView.shared_jinja_env = None
def _create_app(self):
config_file = 'config_test.yaml'
config_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), config_file)
self.testapp = TestApp(FrontEndApp(config_file=config_file))
@patch('pywb.apps.cli.BaseCli.run_gevent', lambda *args, **kwargs: None)
def test_run_cli(self):
@ -233,7 +216,7 @@ class TestManagedColls(object):
fh.write(b'/* Some JS File */')
self._create_app()
resp = self.testapp.get('/static/test/abc.js')
resp = self.testapp.get('/static/_/test/abc.js')
assert resp.status_int == 200
assert resp.content_type == 'application/javascript'
resp.charset = 'utf-8'
@ -248,7 +231,7 @@ class TestManagedColls(object):
fh.write(b'/* Some CSS File */')
self._create_app()
resp = self.testapp.get('/static/__shared/foo.css')
resp = self.testapp.get('/static/foo.css')
assert resp.status_int == 200
assert resp.content_type == 'text/css'
resp.charset = 'utf-8'
@ -267,6 +250,12 @@ class TestManagedColls(object):
resp.charset = 'utf-8'
assert '(Collection Title)' in resp.text
# test cache
resp = self.testapp.get('/')
resp.charset = 'utf-8'
assert '(Collection Title)' in resp.text
def test_other_metadata_search_page(self):
main(['metadata', 'foo', '--set',
'desc=Some Description Text',
@ -304,35 +293,28 @@ class TestManagedColls(object):
assert resp.content_type == 'text/html'
assert 'pywb custom search page' in resp.text
def test_custom_config(self):
""" Test custom created config.yaml which overrides auto settings
def test_more_custom_templates(self):
"""
Test custom templates and metadata
Template is relative to collection-specific dir
Add custom metadata and test its presence in custom search page
"""
config_path = os.path.join(self.root_dir, 'collections', 'test', 'config.yaml')
with open(config_path, 'w+b') as fh:
fh.write(b'search_html: ./templates/custom_search.html\n')
fh.write(b'index_paths: ./cdx2/\n')
custom_search = os.path.join(self.root_dir, 'collections', 'test',
'templates', 'custom_search.html')
'templates', 'search.html')
# add metadata
main(['metadata', 'test', '--set', 'some=value'])
with open(custom_search, 'w+b') as fh:
fh.write(b'config.yaml overriden search page: ')
fh.write(b'{{ wbrequest.user_metadata | tojson }}\n')
os.rename(os.path.join(self.root_dir, 'collections', 'test', INDEX_DIR),
os.path.join(self.root_dir, 'collections', 'test', 'cdx2'))
fh.write(b'overriden search page: ')
fh.write(b'{{ metadata | tojson }}\n')
self._create_app()
resp = self.testapp.get('/test/')
resp.charset = 'utf-8'
assert resp.status_int == 200
assert resp.content_type == 'text/html'
assert 'config.yaml overriden search page: {"some": "value"}' in resp.text
assert 'overriden search page: {"some": "value"}' in resp.text
resp = self.testapp.get('/test/20140103030321/http://example.com?example=1')
assert resp.status_int == 200
@ -607,15 +589,15 @@ class TestManagedColls(object):
cdx_path = os.path.join(colls, 'foo', INDEX_DIR)
shutil.rmtree(cdx_path)
with raises(Exception):
self._create_app()
#with raises(Exception):
# self._create_app()
# CDX a file not a dir
with open(cdx_path, 'w+b') as fh:
fh.write(b'foo\n')
with raises(Exception):
self._create_app()
#with raises(Exception):
# self._create_app()
shutil.rmtree(colls)

View File

@ -445,20 +445,20 @@ class TestWbIntegration(BaseConfigTest):
assert resp.status_int == 404
def test_static_content(self):
resp = self.testapp.get('/static/__pywb/wb.css')
resp = self.testapp.get('/static/wb.css')
assert resp.status_int == 200
assert resp.content_type == 'text/css'
assert resp.content_length > 0
def test_static_content_filewrapper(self):
from wsgiref.util import FileWrapper
resp = self.testapp.get('/static/__pywb/wb.css', extra_environ = {'wsgi.file_wrapper': FileWrapper})
resp = self.testapp.get('/static/wb.css', extra_environ = {'wsgi.file_wrapper': FileWrapper})
assert resp.status_int == 200
assert resp.content_type == 'text/css'
assert resp.content_length > 0
def test_static_not_found(self):
resp = self.testapp.get('/static/__pywb/notfound.css', status = 404)
resp = self.testapp.get('/static/notfound.css', status = 404)
assert resp.status_int == 404
def _test_cdx_server_filters(self):