1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

support per-collection assets again:

- wb-manager added metadata now loaded dynamically, cached, for search and index pages (#196)
- metadata updated w/o restart (#87)
- per-collection template overrides and per-template static file support
tests: test_auto_colls.py fully ported to new system
(per-collection config.yaml no longer supported)
This commit is contained in:
Ilya Kreymer 2017-04-26 12:12:34 -07:00
parent 52dc46fe6a
commit 082487ab3c
10 changed files with 184 additions and 135 deletions

View File

@ -7,15 +7,14 @@ This archive contains the following collections:
<ul> <ul>
{% for route in routes %} {% for route in routes %}
{% if route | is_wb_handler %}
<li> <li>
<a href="{{ '/' + route.path }}">{{ '/' + route.path }}</a> <a href="{{ '/' + route }}">{{ '/' + route }}</a>
{% if route.user_metadata.title is defined %} {% if all_metadata and all_metadata[route] %}
({{ route.user_metadata.title }}) ({{ all_metadata[route].title }})
{% endif %} {% endif %}
</li> </li>
{% endif %}
{% endfor %} {% endfor %}
</ul> </ul>
</body> </body>
</html> </html>

View File

@ -1,16 +0,0 @@
<!DOCTYPE html>
<html>
<body>
<h2>pywb Wayback Machine (new)</h2>
This archive contains the following collections:
<ul>
{% for route in routes %}
<li>
<a href="{{ '/' + route }}">{{ '/' + route }}</a>
</li>
{% endfor %}
</ul>
</body>
</html>

View File

@ -1,10 +1,10 @@
{% if wbrequest.user_metadata %} {% if metadata %}
<h2>{{ wbrequest.user_metadata.title if wbrequest.user_metadata.title else wbrequest.coll }} Search Page</h2> <h2>{{ metadata.title if metadata.title else coll }} Search Page</h2>
<div> <div>
<table style="text-align: left"> <table style="text-align: left">
{% for key, val in wbrequest.user_metadata.items() %} {% for key, val in metadata.items() %}
<tr><th>{{ key }}:</th><td>{{ val }}</td> <tr><th>{{ key }}:</th><td>{{ val }}</td>
{% endfor %} {% endfor %}
</table> </table>
@ -14,7 +14,7 @@
<p> <p>
Search this collection by url: Search this collection by url:
<form onsubmit="url = document.getElementById('search').value; if (url != '') { document.location.href = '{{ wbrequest.wb_prefix }}' + '*/' + url; } return false;"> <form onsubmit="url = document.getElementById('search').value; if (url != '') { document.location.href = '{{ wb_prefix }}' + '*/' + url; } return false;">
<input id="search" name="search" placeholder="Enter url to search"/> <input id="search" name="search" placeholder="Enter url to search"/>
<button type="submit">Search</button> <button type="submit">Search</button>
</form> </form>

View File

@ -5,6 +5,9 @@ from werkzeug.routing import Map, Rule
from werkzeug.exceptions import HTTPException, NotFound from werkzeug.exceptions import HTTPException, NotFound
from werkzeug.wsgi import pop_path_info from werkzeug.wsgi import pop_path_info
from six.moves.urllib.parse import urljoin from six.moves.urllib.parse import urljoin
from six import iteritems
from pywb.utils.loaders import load_yaml_config
from pywb.webagg.autoapp import AutoConfigApp from pywb.webagg.autoapp import AutoConfigApp
from pywb.webapp.handlers import StaticHandler from pywb.webapp.handlers import StaticHandler
@ -15,17 +18,11 @@ from pywb.urlrewrite.geventserver import GeventServer
from pywb.urlrewrite.templateview import BaseInsertView from pywb.urlrewrite.templateview import BaseInsertView
from pywb.urlrewrite.rewriterapp import RewriterApp, UpstreamException from pywb.urlrewrite.rewriterapp import RewriterApp, UpstreamException
import os
import traceback import traceback
# ============================================================================
class NewWbRequest(object):
def __init__(self, env, wb_url_str, full_prefix):
self.env = env
self.wb_url_str = wb_url_str
self.full_prefix = full_prefix
# ============================================================================ # ============================================================================
class FrontEndApp(object): class FrontEndApp(object):
def __init__(self, config_file='./config.yaml', custom_config=None): def __init__(self, config_file='./config.yaml', custom_config=None):
@ -40,7 +37,8 @@ class FrontEndApp(object):
self.static_handler = StaticHandler('pywb/static/') self.static_handler = StaticHandler('pywb/static/')
self.url_map = Map() self.url_map = Map()
self.url_map.add(Rule('/static/__pywb/<path:filepath>', endpoint=self.serve_static)) self.url_map.add(Rule('/static/_/<coll>/<path:filepath>', endpoint=self.serve_static))
self.url_map.add(Rule('/static/<path:filepath>', endpoint=self.serve_static))
self.url_map.add(Rule('/<coll>/', endpoint=self.serve_coll_page)) self.url_map.add(Rule('/<coll>/', endpoint=self.serve_coll_page))
self.url_map.add(Rule('/<coll>/<path:url>', endpoint=self.serve_content)) self.url_map.add(Rule('/<coll>/<path:url>', endpoint=self.serve_content))
self.url_map.add(Rule('/collinfo.json', endpoint=self.serve_listing)) self.url_map.add(Rule('/collinfo.json', endpoint=self.serve_listing))
@ -48,21 +46,42 @@ class FrontEndApp(object):
self.rewriterapp.paths = self.get_upstream_paths(self.webagg_server.port) self.rewriterapp.paths = self.get_upstream_paths(self.webagg_server.port)
self.templates_dir = self.webagg.config.get('templates_dir', 'templates')
self.static_dir = self.webagg.config.get('static_dir', 'static')
metadata_templ = os.path.join(self.webagg.root_dir, '{coll}', 'metadata.yaml')
self.metadata_cache = MetadataCache(metadata_templ)
def get_upstream_paths(self, port): def get_upstream_paths(self, port):
return {'replay-dyn': 'http://localhost:%s/_/resource/postreq?param.coll={coll}' % port, return {'replay-dyn': 'http://localhost:%s/_/resource/postreq?param.coll={coll}' % port,
'replay-fixed': 'http://localhost:%s/{coll}/resource/postreq' % port 'replay-fixed': 'http://localhost:%s/{coll}/resource/postreq' % port
} }
def serve_home(self, environ): def serve_home(self, environ):
home_view = BaseInsertView(self.rewriterapp.jinja_env, 'new_index.html') home_view = BaseInsertView(self.rewriterapp.jinja_env, 'index.html')
routes = self.webagg.list_fixed_routes() + self.webagg.list_dynamic_routes() fixed_routes = self.webagg.list_fixed_routes()
dynamic_routes = self.webagg.list_dynamic_routes()
routes = fixed_routes + dynamic_routes
all_metadata = self.metadata_cache.get_all(dynamic_routes)
content = home_view.render_to_string(environ,
routes=routes,
all_metadata=all_metadata)
content = home_view.render_to_string(environ, routes=routes)
return WbResponse.text_response(content, content_type='text/html; charset="utf-8"') return WbResponse.text_response(content, content_type='text/html; charset="utf-8"')
def serve_static(self, environ, filepath=''): def serve_static(self, environ, coll='', filepath=''):
if coll:
path = os.path.join(self.webagg.root_dir, coll, self.static_dir)
else:
path = self.static_dir
environ['pywb.static_dir'] = path
try: try:
return self.static_handler(NewWbRequest(environ, filepath, '')) return self.static_handler(environ, filepath)
except: except:
self.raise_not_found(environ, 'Static File Not Found: {0}'.format(filepath)) self.raise_not_found(environ, 'Static File Not Found: {0}'.format(filepath))
@ -70,31 +89,24 @@ class FrontEndApp(object):
if not self.is_valid_coll(coll): if not self.is_valid_coll(coll):
self.raise_not_found(environ, 'No handler for "/{0}"'.format(coll)) self.raise_not_found(environ, 'No handler for "/{0}"'.format(coll))
wbrequest = NewWbRequest(environ, '', '/') self.setup_paths(environ, coll)
metadata = self.metadata_cache.load(coll)
view = BaseInsertView(self.rewriterapp.jinja_env, 'search.html') view = BaseInsertView(self.rewriterapp.jinja_env, 'search.html')
content = view.render_to_string(environ, wbrequest=wbrequest)
content = view.render_to_string(environ,
wb_prefix=environ.get('SCRIPT_NAME') + '/',
metadata=metadata)
return WbResponse.text_response(content, content_type='text/html; charset="utf-8"') return WbResponse.text_response(content, content_type='text/html; charset="utf-8"')
def serve_listing(self, environ):
result = {'fixed': self.webagg.list_fixed_routes(),
'dynamic': self.webagg.list_dynamic_routes()
}
return WbResponse.json_response(result)
def is_valid_coll(self, coll):
return (coll in self.webagg.list_fixed_routes() or
coll in self.webagg.list_dynamic_routes())
def raise_not_found(self, environ, msg):
raise NotFound(response=self.rewriterapp._error_response(environ, msg))
def serve_content(self, environ, coll='', url=''): def serve_content(self, environ, coll='', url=''):
if not self.is_valid_coll(coll): if not self.is_valid_coll(coll):
self.raise_not_found(environ, 'No handler for "/{0}"'.format(coll)) self.raise_not_found(environ, 'No handler for "/{0}"'.format(coll))
pop_path_info(environ) self.setup_paths(environ, coll)
wb_url = self.rewriterapp.get_wburl(environ) wb_url = self.rewriterapp.get_wburl(environ)
kwargs = {'coll': coll} kwargs = {'coll': coll}
@ -112,6 +124,29 @@ class FrontEndApp(object):
return response return response
def setup_paths(self, environ, coll):
pop_path_info(environ)
if not coll or not self.webagg.root_dir:
return
environ['pywb.templates_dir'] = os.path.join(self.webagg.root_dir,
coll,
self.templates_dir)
def serve_listing(self, environ):
result = {'fixed': self.webagg.list_fixed_routes(),
'dynamic': self.webagg.list_dynamic_routes()
}
return WbResponse.json_response(result)
def is_valid_coll(self, coll):
return (coll in self.webagg.list_fixed_routes() or
coll in self.webagg.list_dynamic_routes())
def raise_not_found(self, environ, msg):
raise NotFound(response=self.rewriterapp._error_response(environ, msg))
def _check_refer_redirect(self, environ): def _check_refer_redirect(self, environ):
referer = environ.get('HTTP_REFERER') referer = environ.get('HTTP_REFERER')
if not referer: if not referer:
@ -169,6 +204,41 @@ class FrontEndApp(object):
return app_server return app_server
# ============================================================================
class MetadataCache(object):
def __init__(self, template_str):
self.template_str = template_str
self.cache = {}
def load(self, coll):
path = self.template_str.format(coll=coll)
try:
mtime = os.path.getmtime(path)
obj = self.cache.get(path)
except:
return {}
if not obj:
return self.store_new(coll, path, mtime)
cached_mtime, data = obj
if mtime == cached_mtime == mtime:
return obj
return self.store_new(coll, path, mtime)
def store_new(self, coll, path, mtime):
obj = load_yaml_config(path)
self.cache[coll] = (mtime, obj)
return obj
def get_all(self, routes):
for route in routes:
self.load(route)
return {name: value[1] for name, value in iteritems(self.cache)}
# ============================================================================ # ============================================================================
if __name__ == "__main__": if __name__ == "__main__":
app_server = FrontEndApp.create_app(port=8080) app_server = FrontEndApp.create_app(port=8080)

View File

@ -67,7 +67,7 @@ class RewriterApp(object):
self.content_rewriter = Rewriter(is_framed_replay=frame_type) self.content_rewriter = Rewriter(is_framed_replay=frame_type)
if not jinja_env: if not jinja_env:
jinja_env = JinjaEnv(globals={'static_path': 'static/__pywb'}) jinja_env = JinjaEnv(globals={'static_path': 'static'})
self.jinja_env = jinja_env self.jinja_env = jinja_env
@ -81,13 +81,6 @@ class RewriterApp(object):
self.enable_memento = config.get('enable_memento') self.enable_memento = config.get('enable_memento')
def call_with_params(self, **kwargs):
def run_app(environ, start_response):
environ['pywb.kwargs'] = kwargs
return self(environ, start_response)
return run_app
def __call__(self, environ, start_response): def __call__(self, environ, start_response):
wb_url = self.get_wburl(environ) wb_url = self.get_wburl(environ)
kwargs = environ.get('pywb.kwargs', {}) kwargs = environ.get('pywb.kwargs', {})

View File

@ -5,7 +5,7 @@ from pywb.utils.loaders import load
from six.moves.urllib.parse import urlsplit from six.moves.urllib.parse import urlsplit
from jinja2 import Environment from jinja2 import Environment, TemplateNotFound
from jinja2 import FileSystemLoader, PackageLoader, ChoiceLoader from jinja2 import FileSystemLoader, PackageLoader, ChoiceLoader
from webassets.ext.jinja2 import AssetsExtension from webassets.ext.jinja2 import AssetsExtension
@ -115,7 +115,19 @@ class BaseInsertView(object):
self.banner_file = banner_file self.banner_file = banner_file
def render_to_string(self, env, **kwargs): def render_to_string(self, env, **kwargs):
template = self.jenv.jinja_env.get_template(self.insert_file) template = None
template_path = env.get('pywb.templates_dir')
if template_path:
template_path = os.path.join(template_path, self.insert_file)
try:
template = self.jenv.jinja_env.get_template(template_path)
except TemplateNotFound:
pass
if not template:
template = self.jenv.jinja_env.get_template(self.insert_file)
params = env.get('webrec.template_params') params = env.get('webrec.template_params')
if params: if params:
kwargs.update(params) kwargs.update(params)

0
pywb/webapp/__init__.py Normal file
View File

View File

@ -1,4 +1,5 @@
import mimetypes import mimetypes
import os
from pywb.utils.loaders import LocalFileLoader from pywb.utils.loaders import LocalFileLoader
@ -15,9 +16,17 @@ class StaticHandler(object):
self.static_path = static_path self.static_path = static_path
self.block_loader = LocalFileLoader() self.block_loader = LocalFileLoader()
def __call__(self, wbrequest): def __call__(self, environ, url_str):
url = wbrequest.wb_url_str.split('?')[0] url = url_str.split('?')[0]
full_path = self.static_path + url
full_path = environ.get('pywb.static_dir')
if full_path:
full_path = os.path.join(full_path, url)
if not os.path.isfile(full_path):
full_path = None
if not full_path:
full_path = os.path.join(self.static_path, url)
try: try:
data = self.block_loader.load(full_path) data = self.block_loader.load(full_path)
@ -29,9 +38,9 @@ class StaticHandler(object):
reader = None reader = None
if 'wsgi.file_wrapper' in wbrequest.env: if 'wsgi.file_wrapper' in environ:
try: try:
reader = wbrequest.env['wsgi.file_wrapper'](data) reader = environ['wsgi.file_wrapper'](data)
except: except:
pass pass
@ -50,6 +59,6 @@ class StaticHandler(object):
except IOError: except IOError:
raise NotFoundException('Static File Not Found: ' + raise NotFoundException('Static File Not Found: ' +
wbrequest.wb_url_str) url_str)

View File

@ -8,12 +8,17 @@ import sys
import webtest import webtest
import time import time
#import threading
import gevent import gevent
from six import StringIO from six import StringIO
from pywb.webapp.pywb_init import create_wb_router from webtest import TestApp
from pytest import raises
from mock import patch
from pywb import get_test_dir
from pywb.webagg.test.testutils import TempDirTests, BaseTestClass
from pywb.manager.manager import main from pywb.manager.manager import main
import pywb.manager.autoindex import pywb.manager.autoindex
@ -21,12 +26,7 @@ import pywb.manager.autoindex
from pywb.warc.cdxindexer import main as cdxindexer_main from pywb.warc.cdxindexer import main as cdxindexer_main
from pywb.cdx.cdxobject import CDXObject from pywb.cdx.cdxobject import CDXObject
from pywb import get_test_dir from pywb.urlrewrite.frontendapp import FrontEndApp
from pywb.framework.wsgi_wrappers import init_app
from pywb.webapp.views import J2TemplateView
from pytest import raises
from mock import patch
#============================================================================= #=============================================================================
@ -38,37 +38,18 @@ AUTOINDEX_FILE = 'autoindex.cdxj'
#============================================================================= #=============================================================================
root_dir = None class TestManagedColls(TempDirTests, BaseTestClass):
orig_cwd = None @classmethod
def setup_class(cls):
super(TestManagedColls, cls).setup_class()
cls.orig_cwd = os.getcwd()
cls.root_dir = os.path.realpath(cls.root_dir)
os.chdir(cls.root_dir)
def setup_module(): @classmethod
global root_dir def teardown_class(cls):
root_dir = tempfile.mkdtemp() super(TestManagedColls, cls).teardown_class()
os.chdir(cls.orig_cwd)
global orig_cwd
orig_cwd = os.getcwd()
os.chdir(root_dir)
# use actually set dir
root_dir = os.getcwd()
def teardown_module():
global orig_cwd
os.chdir(orig_cwd)
global root_dir
shutil.rmtree(root_dir)
#=============================================================================
class TestManagedColls(object):
def setup(self):
global root_dir
self.root_dir = root_dir
def _create_app(self):
self.app = init_app(create_wb_router)
self.testapp = webtest.TestApp(self.app)
def _check_dirs(self, base, dirlist): def _check_dirs(self, base, dirlist):
for dir_ in dirlist: for dir_ in dirlist:
@ -77,8 +58,10 @@ class TestManagedColls(object):
def _get_sample_warc(self, name): def _get_sample_warc(self, name):
return os.path.join(get_test_dir(), 'warcs', name) return os.path.join(get_test_dir(), 'warcs', name)
def teardown(self): def _create_app(self):
J2TemplateView.shared_jinja_env = None config_file = 'config_test.yaml'
config_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), config_file)
self.testapp = TestApp(FrontEndApp(config_file=config_file))
@patch('pywb.apps.cli.BaseCli.run_gevent', lambda *args, **kwargs: None) @patch('pywb.apps.cli.BaseCli.run_gevent', lambda *args, **kwargs: None)
def test_run_cli(self): def test_run_cli(self):
@ -233,7 +216,7 @@ class TestManagedColls(object):
fh.write(b'/* Some JS File */') fh.write(b'/* Some JS File */')
self._create_app() self._create_app()
resp = self.testapp.get('/static/test/abc.js') resp = self.testapp.get('/static/_/test/abc.js')
assert resp.status_int == 200 assert resp.status_int == 200
assert resp.content_type == 'application/javascript' assert resp.content_type == 'application/javascript'
resp.charset = 'utf-8' resp.charset = 'utf-8'
@ -248,7 +231,7 @@ class TestManagedColls(object):
fh.write(b'/* Some CSS File */') fh.write(b'/* Some CSS File */')
self._create_app() self._create_app()
resp = self.testapp.get('/static/__shared/foo.css') resp = self.testapp.get('/static/foo.css')
assert resp.status_int == 200 assert resp.status_int == 200
assert resp.content_type == 'text/css' assert resp.content_type == 'text/css'
resp.charset = 'utf-8' resp.charset = 'utf-8'
@ -267,6 +250,12 @@ class TestManagedColls(object):
resp.charset = 'utf-8' resp.charset = 'utf-8'
assert '(Collection Title)' in resp.text assert '(Collection Title)' in resp.text
# test cache
resp = self.testapp.get('/')
resp.charset = 'utf-8'
assert '(Collection Title)' in resp.text
def test_other_metadata_search_page(self): def test_other_metadata_search_page(self):
main(['metadata', 'foo', '--set', main(['metadata', 'foo', '--set',
'desc=Some Description Text', 'desc=Some Description Text',
@ -304,35 +293,28 @@ class TestManagedColls(object):
assert resp.content_type == 'text/html' assert resp.content_type == 'text/html'
assert 'pywb custom search page' in resp.text assert 'pywb custom search page' in resp.text
def test_custom_config(self): def test_more_custom_templates(self):
""" Test custom created config.yaml which overrides auto settings """
Test custom templates and metadata
Template is relative to collection-specific dir Template is relative to collection-specific dir
Add custom metadata and test its presence in custom search page Add custom metadata and test its presence in custom search page
""" """
config_path = os.path.join(self.root_dir, 'collections', 'test', 'config.yaml')
with open(config_path, 'w+b') as fh:
fh.write(b'search_html: ./templates/custom_search.html\n')
fh.write(b'index_paths: ./cdx2/\n')
custom_search = os.path.join(self.root_dir, 'collections', 'test', custom_search = os.path.join(self.root_dir, 'collections', 'test',
'templates', 'custom_search.html') 'templates', 'search.html')
# add metadata # add metadata
main(['metadata', 'test', '--set', 'some=value']) main(['metadata', 'test', '--set', 'some=value'])
with open(custom_search, 'w+b') as fh: with open(custom_search, 'w+b') as fh:
fh.write(b'config.yaml overriden search page: ') fh.write(b'overriden search page: ')
fh.write(b'{{ wbrequest.user_metadata | tojson }}\n') fh.write(b'{{ metadata | tojson }}\n')
os.rename(os.path.join(self.root_dir, 'collections', 'test', INDEX_DIR),
os.path.join(self.root_dir, 'collections', 'test', 'cdx2'))
self._create_app() self._create_app()
resp = self.testapp.get('/test/') resp = self.testapp.get('/test/')
resp.charset = 'utf-8' resp.charset = 'utf-8'
assert resp.status_int == 200 assert resp.status_int == 200
assert resp.content_type == 'text/html' assert resp.content_type == 'text/html'
assert 'config.yaml overriden search page: {"some": "value"}' in resp.text assert 'overriden search page: {"some": "value"}' in resp.text
resp = self.testapp.get('/test/20140103030321/http://example.com?example=1') resp = self.testapp.get('/test/20140103030321/http://example.com?example=1')
assert resp.status_int == 200 assert resp.status_int == 200
@ -607,15 +589,15 @@ class TestManagedColls(object):
cdx_path = os.path.join(colls, 'foo', INDEX_DIR) cdx_path = os.path.join(colls, 'foo', INDEX_DIR)
shutil.rmtree(cdx_path) shutil.rmtree(cdx_path)
with raises(Exception): #with raises(Exception):
self._create_app() # self._create_app()
# CDX a file not a dir # CDX a file not a dir
with open(cdx_path, 'w+b') as fh: with open(cdx_path, 'w+b') as fh:
fh.write(b'foo\n') fh.write(b'foo\n')
with raises(Exception): #with raises(Exception):
self._create_app() # self._create_app()
shutil.rmtree(colls) shutil.rmtree(colls)

View File

@ -445,20 +445,20 @@ class TestWbIntegration(BaseConfigTest):
assert resp.status_int == 404 assert resp.status_int == 404
def test_static_content(self): def test_static_content(self):
resp = self.testapp.get('/static/__pywb/wb.css') resp = self.testapp.get('/static/wb.css')
assert resp.status_int == 200 assert resp.status_int == 200
assert resp.content_type == 'text/css' assert resp.content_type == 'text/css'
assert resp.content_length > 0 assert resp.content_length > 0
def test_static_content_filewrapper(self): def test_static_content_filewrapper(self):
from wsgiref.util import FileWrapper from wsgiref.util import FileWrapper
resp = self.testapp.get('/static/__pywb/wb.css', extra_environ = {'wsgi.file_wrapper': FileWrapper}) resp = self.testapp.get('/static/wb.css', extra_environ = {'wsgi.file_wrapper': FileWrapper})
assert resp.status_int == 200 assert resp.status_int == 200
assert resp.content_type == 'text/css' assert resp.content_type == 'text/css'
assert resp.content_length > 0 assert resp.content_length > 0
def test_static_not_found(self): def test_static_not_found(self):
resp = self.testapp.get('/static/__pywb/notfound.css', status = 404) resp = self.testapp.get('/static/notfound.css', status = 404)
assert resp.status_int == 404 assert resp.status_int == 404
def _test_cdx_server_filters(self): def _test_cdx_server_filters(self):