1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-23 14:42:24 +01:00
pywb/pywb/rewrite/templateview.py
John Berlin ec0df7b9ae Refactor of auto-fetch worker system with support for proxy mode, fixes https://github.com/webrecorder/pywb/issues/371: (#379)
- Split wombat and auto-fetch worker into two files (proxy mode and non-proxy mode)
- Renamed preservationWorker to autoFetchWorker in order to better convey what it does
- Root config file control over including wombat and auto-fetch worker in proxy or non-proxy mode
- Added additional proxy mode + auto-fetch worker only route for fetching the auto-fetch worker code nicely for CORS
- templateview: add 'tobool' formatter to more cleanly format python bools to JS 'true'/'false'
- proxy options: config and command line: 
  'use_auto_fetch_worker' and '--proxy-with-auto-fetch'
  'use_wombat' and '--proxy-with-wombat'
- head_insert.html: only include wombat in proxy mode when use_wombat or use_auto_fetch_worker are set.
- wombatProxyMode.js: slimmed down wombat for proxy mode only including auto-fetch support.
- more consistent naming: rename 'preserveWorker' and 'autoArchive' to 'auto-fetch'

Updated tests:
- test_wbrequestresponse.py: added tests covering constructor defaults, _init_derived, options_response, json_response, encode_stream, text_stream
- test_auto_colls.py: fixed broken test test_more_custom_templates, reason using ujson now not json so spacing was off
- test_proxy.py: updated existing tests to reflect splitting wombat into proxy and non-proxy mode, added tests covering auto-fetch worker specific endpoints in proxy mode
removed duplicate addons key in .travis.yml
- test_cli.py: updated to properly test the cli with these changes
added ultrajon dep to tests_require in setup.py to reflect its usage by wbrequestresponse.py

Fully documented:
- cli.py
- frontendapp.py
- templateview.py
- wbrequestresponse.py

Removed duplicate addons key in .travis.yml
Added ultrajson dependency to tests_require in setup.py to reflect its usage by wbrequestresponse.py

Fixes #371
2018-10-03 16:27:49 -04:00

373 lines
14 KiB
Python

from warcio.timeutils import timestamp_to_datetime, timestamp_to_sec
from warcio.timeutils import timestamp_now
from pywb.utils.loaders import load
from six.moves.urllib.parse import urlsplit
from jinja2 import Environment, TemplateNotFound
from jinja2 import FileSystemLoader, PackageLoader, ChoiceLoader
from webassets.ext.jinja2 import AssetsExtension
from webassets.loaders import YAMLLoader
from webassets.env import Resolver
from pkg_resources import resource_filename
import os
try:
import ujson as json
except ImportError: # pragma: no cover
import json
# ============================================================================
class RelEnvironment(Environment):
"""Override join_path() to enable relative template paths."""
def join_path(self, template, parent):
return os.path.join(os.path.dirname(parent), template)
# ============================================================================
class JinjaEnv(object):
"""Pywb JinjaEnv class that provides utility functions used by the templates,
configured template loaders and template paths, and contains the actual Jinja
env used by each template."""
def __init__(self, paths=None,
packages=None,
assets_path=None,
globals=None,
overlay=None,
extensions=None,
env_template_params_key='pywb.template_params',
env_template_dir_key='pywb.templates_dir'):
"""Construct a new JinjaEnv.
:param list[str] paths: List of paths to search for templates
:param list[str] packages: List of assets package names
:param str assets_path: Path to a yaml file containing assets
:param dict[str, str] globals: Dictionary of additional globals available during template rendering
:param overlay:
:param list extensions: List of webassets extension classes
:param str env_template_params_key: The full pywb package key for the template params
:param str env_template_dir_key: The full pywb package key for the template directory
"""
if paths is None:
paths = ['templates', '.', '/']
if packages is None:
packages = ['pywb']
self._init_filters()
loader = ChoiceLoader(self._make_loaders(paths, packages))
self.env_template_params_key = env_template_params_key
self.env_template_dir_key = env_template_dir_key
extensions = extensions or []
if assets_path:
extensions.append(AssetsExtension)
if overlay:
jinja_env = overlay.jinja_env.overlay(loader=loader,
trim_blocks=True,
extensions=extensions)
else:
jinja_env = RelEnvironment(loader=loader,
trim_blocks=True,
extensions=extensions)
jinja_env.filters.update(self.filters)
if globals:
jinja_env.globals.update(globals)
self.jinja_env = jinja_env
# init assets
if assets_path:
assets_loader = YAMLLoader(load(assets_path))
assets_env = assets_loader.load_environment()
assets_env.resolver = PkgResResolver()
jinja_env.assets_environment = assets_env
def _make_loaders(self, paths, packages):
"""Initialize the template loaders based on the supplied paths and packages.
:param list[str] paths: List of paths to search for templates
:param list[str] packages: List of assets package names
:return: A list of loaders to be used for loading the template assets
:rtype: list[FileSystemLoader|PackageLoader]
"""
loaders = []
# add loaders for paths
for path in paths:
loaders.append(FileSystemLoader(path))
# add loaders for all specified packages
for package in packages:
loaders.append(PackageLoader(package))
return loaders
def template_filter(self, param=None):
"""Returns a decorator that adds the wrapped function to dictionary of template filters.
The wrapped function is keyed by either the supplied param (if supplied)
or by the wrapped functions name.
:param param: Optional name to use instead of the name of the function to be wrapped
:return: A decorator to wrap a template filter function
:rtype: callable
"""
def deco(func):
name = param or func.__name__
self.filters[name] = func
return func
return deco
def _init_filters(self):
"""Initialize the default pywb provided Jninja filters available during template rendering"""
self.filters = {}
@self.template_filter()
def format_ts(value, format_='%a, %b %d %Y %H:%M:%S'):
"""Formats the supplied timestamp using format_
:param str value: The timestamp to be formatted
:param str format_: The format string
:return: The correctly formatted timestamp as determined by format_
:rtype: str
"""
if format_ == '%s':
return timestamp_to_sec(value)
else:
value = timestamp_to_datetime(value)
return value.strftime(format_)
@self.template_filter('urlsplit')
def get_urlsplit(url):
"""Splits the supplied URL
:param str url: The url to be split
:return: The split url
:rtype: urllib.parse.SplitResult
"""
split = urlsplit(url)
return split
@self.template_filter()
def tojson(obj):
"""Converts the supplied object/array/any to a JSON string if it can be JSONified
:param any obj: The value to be converted to a JSON string
:return: The JSON string representation of the supplied value
:rtype: str
"""
return json.dumps(obj)
@self.template_filter()
def tobool(bool_val):
"""Converts a python boolean to a JS "true" or "false" string
:param any obj: A value to be evaluated as a boolean
:return: The string "true" or "false" to be inserted into JS
"""
return 'true' if bool_val else 'false'
# ============================================================================
class BaseInsertView(object):
"""Base class of all template views used by Pywb"""
def __init__(self, jenv, insert_file, banner_view=None):
"""Create a new BaseInsertView.
:param JinjaEnv jenv: The instance of pywb.rewrite.templateview.JinjaEnv to be used
:param str insert_file: The name of the template file
:param BaseInsertView banner_view: The banner_view property of pywb.apps.RewriterApp
"""
self.jenv = jenv
self.insert_file = insert_file
self.banner_view = banner_view
def render_to_string(self, env, **kwargs):
"""Render this template.
:param dict env: The WSGI environment associated with the request causing this template to be rendered
:param any kwargs: The keyword arguments to be supplied to the Jninja template render method
:return: The rendered template
:rtype: str
"""
template = None
template_path = env.get(self.jenv.env_template_dir_key)
if template_path:
# jinja paths are not os paths, always use '/' as separator
# https://github.com/pallets/jinja/issues/411
template_path = template_path + '/' + self.insert_file
try:
template = self.jenv.jinja_env.get_template(template_path)
except TemplateNotFound as te:
pass
if not template:
template = self.jenv.jinja_env.get_template(self.insert_file)
params = env.get(self.jenv.env_template_params_key)
if params:
kwargs.update(params)
kwargs['env'] = env
kwargs['static_prefix'] = env.get('pywb.host_prefix', '') + env.get('pywb.app_prefix', '') + '/static'
return template.render(**kwargs)
# ============================================================================
class HeadInsertView(BaseInsertView):
"""The template view class associated with rendering the HTML inserted
into the head of the pages replayed (WB Insert)."""
def create_insert_func(self, wb_url,
wb_prefix,
host_prefix,
top_url,
env,
is_framed,
coll='',
include_ts=True,
**kwargs):
"""Create the function used to render the header insert template for the current request.
:param rewrite.wburl.WbUrl wb_url: The WbUrl for the request this template is being rendered for
:param str wb_prefix: The URL prefix pywb is serving the content using (e.g. http://localhost:8080/live/)
:param str host_prefix: The host URL prefix pywb is running on (e.g. http://localhost:8080)
:param str top_url: The full URL for this request (e.g. http://localhost:8080/live/http://example.com)
:param dict env: The WSGI environment dictionary for this request
:param bool is_framed: Is pywb or a specific collection running in framed mode
:param str coll: The name of the collection this request is associated with
:param bool include_ts: Should a timestamp be included in the rendered template
:param kwargs: Additional keyword arguments to be supplied to the Jninja template render method
:return: A function to be used to render the header insert for the request this template is being rendered for
:rtype: callable
"""
params = kwargs
params['host_prefix'] = host_prefix
params['wb_prefix'] = wb_prefix
params['wb_url'] = wb_url
params['top_url'] = top_url
params['coll'] = coll
params['is_framed'] = is_framed
def make_head_insert(rule, cdx):
params['wombat_ts'] = cdx['timestamp'] if include_ts else ''
params['wombat_sec'] = timestamp_to_sec(cdx['timestamp'])
params['is_live'] = cdx.get('is_live')
if self.banner_view:
banner_html = self.banner_view.render_to_string(env, cdx=cdx, **params)
params['banner_html'] = banner_html
return self.render_to_string(env, cdx=cdx, **params)
return make_head_insert
# ============================================================================
class TopFrameView(BaseInsertView):
"""The template view class associated with rendering the replay iframe"""
def get_top_frame(self, wb_url,
wb_prefix,
host_prefix,
env,
frame_mod,
replay_mod,
coll='',
extra_params=None):
"""
:param rewrite.wburl.WbUrl wb_url: The WbUrl for the request this template is being rendered for
:param str wb_prefix: The URL prefix pywb is serving the content using (e.g. http://localhost:8080/live/)
:param str host_prefix: The host URL prefix pywb is running on (e.g. http://localhost:8080)
:param dict env: The WSGI environment dictionary for the request this template is being rendered for
:param str frame_mod: The modifier to be used for framing (e.g. if_)
:param str replay_mod: The modifier to be used in the URL of the page being replayed (e.g. mp_)
:param str coll: The name of the collection this template is being rendered for
:param dict extra_params: Additional parameters to be supplied to the Jninja template render method
:return: The frame insert string
:rtype: str
"""
embed_url = wb_url.to_str(mod=replay_mod)
if wb_url.timestamp:
timestamp = wb_url.timestamp
else:
timestamp = timestamp_now()
is_proxy = 'wsgiprox.proxy_host' in env
params = {'host_prefix': host_prefix,
'wb_prefix': wb_prefix,
'wb_url': wb_url,
'coll': coll,
'options': {'frame_mod': frame_mod,
'replay_mod': replay_mod},
'embed_url': embed_url,
'is_proxy': is_proxy,
'timestamp': timestamp,
'url': wb_url.get_url()
}
if extra_params:
params.update(extra_params)
if self.banner_view:
banner_html = self.banner_view.render_to_string(env, **params)
params['banner_html'] = banner_html
return self.render_to_string(env, **params)
# ============================================================================
class PkgResResolver(Resolver):
"""Class for resolving pywb package resources when install via pypi or setup.py"""
def get_pkg_path(self, item):
"""Get the package path for the
:param str item: A resources full package path
:return: The netloc and path from the items package path
:rtype: tuple[str, str]
"""
if not isinstance(item, str):
return None
parts = urlsplit(item)
if parts.scheme == 'pkg' and parts.netloc:
return (parts.netloc, parts.path)
return None
def resolve_source(self, ctx, item):
pkg = self.get_pkg_path(item)
if pkg:
filename = resource_filename(pkg[0], pkg[1])
if filename:
return filename
return super(PkgResResolver, self).resolve_source(ctx, item)