1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

add support for pywb static content routes (seperate from uwsgi)

adding StaticHandler and loading templates and static resources from current package
add default template and static data to be included in the pywb package
add test for custom static route
This commit is contained in:
Ilya Kreymer 2014-02-07 19:32:58 -08:00
parent 00a7691f69
commit b11f4fad93
17 changed files with 121 additions and 37 deletions

View File

@ -151,7 +151,7 @@ the location of those files.
#### SURT
By default, pywb expects the cdx files to be Sort-Friendly-Url-Transform (SURT) ordering.
By default, pywb expects the cdx files to be Sort-friendly URL Reordering Transform (SURT) ordering.
This is an ordering that transforms: `example.com` -> `com,example)/` to faciliate better search.
It is recommended for future indexing, but is not required.

View File

@ -21,7 +21,9 @@ collections:
#
# * Set to true if cdxs start with surts: com,example)/
# * Set to false if cdx start with urls: example.com)/
surt_ordered: true
#
# default:
# surt_ordered: true
# list of paths prefixes for pywb look to 'resolve' WARC and ARC filenames
# in the cdx to their absolute path
@ -34,30 +36,33 @@ surt_ordered: true
archive_paths: ./sample_archive/warcs/
# ==== Optional UI: HTML/Jinja2 Templates ====
# The following are default settings -- uncomment to change
# Set to '' to disable the ui
# ==== UI: HTML/Jinja2 Templates ====
# template for <head> insert into replayed html content
head_insert_html: ./ui/head_insert.html
#head_insert_html: ui/head_insert.html
# template to for 'calendar' query,
# eg, a listing of captures in response to a ../*/<url>
#
# may be a simple listing or a more complex 'calendar' UI
# if omitted, will list raw cdx in plain text
query_html: ./ui/query.html
#query_html: ui/query.html
# template for search page, which is displayed when no search url is entered
# in a collection
search_html: ./ui/search.html
#search_html: ui/search.html
# template for home page.
# if no other route is set, this will be rendered at /, /index.htm and /index.html
home_html: ./ui/index.html
#home_html: ui/index.html
# error page temlpate for may formatting error message and details
# if omitted, a text response is returned
error_html: ./ui/error.html
#error_html: ui/error.html
# ==== Other Paths ====
@ -69,17 +74,18 @@ error_html: ./ui/error.html
# to http://localhost:8080/pywb/image.gif
#
hostpaths: ['http://localhost:8080/']
#hostpaths: ['http://localhost:8080/']
# Custom path for serving html content
# Default is hostname[0] + '/static/'
#static_path: /static/
# List of route names:
# <route>: <package or file path>
static_routes:
static: static/
# ==== New / Experimental Settings ====
# Not yet production ready -- used primarily for testing
# Enable simple http proxy mode
enable_http_proxy: false
#enable_http_proxy: false
# enable cdx server api for querying cdx directly (experimental)
enable_cdx_api: false
#enable_cdx_api: false

View File

@ -4,7 +4,10 @@ import urlparse
from wbrequestresponse import WbResponse
from wburl import WbUrl
from wbexceptions import WbException
from wbexceptions import WbException, NotFoundException
import pkgutil
import mimetypes
class BaseHandler:
@ -98,6 +101,46 @@ class CDXHandler(BaseHandler):
def __str__(self):
return 'CDX Server: ' + str(self.cdx_reader)
#=================================================================
# Static Content Handler
#=================================================================
class StaticHandler(BaseHandler):
def __init__(self, static_path, pkg = __package__):
mimetypes.init()
self.static_path = static_path
self.pkg = pkg
def __call__(self, wbrequest):
full_path = self.static_path + wbrequest.wb_url_str
try:
if full_path.startswith('.') or full_path.startswith('file://'):
data = open(full_path, 'rb')
else:
data = pkgutil.get_data(self.pkg, full_path)
if 'wsgi.file_wrapper' in wbrequest.env:
reader = wbrequest.env['wsgi.file_wrapper'](data)
else:
reader = iter(lambda: data.read(), '')
content_type, _ = mimetypes.guess_type(full_path)
return WbResponse.text_stream(data, content_type = content_type)
except IOError:
raise NotFoundException('Static File Not Found: ' + wbrequest.wb_url_str)
@staticmethod
def get_wburl_type():
return None
def __str__(self):
return 'Static files from ' + self.static_path
#=================================================================
# Debug Handlers
#=================================================================

View File

@ -7,6 +7,14 @@ import config_utils
import logging
import proxy
#=================================================================
DEFAULT_HEAD_INSERT = 'ui/head_insert.html'
DEFAULT_QUERY = 'ui/query.html'
DEFAULT_SEARCH = 'ui/search.html'
DEFAULT_INDEX = 'ui/index.html'
DEFAULT_ERROR = 'ui/error.html'
#=================================================================
## Reference non-YAML config
#=================================================================
@ -39,10 +47,11 @@ def pywb_config_manual(config = {}):
wb_handler = config_utils.create_wb_handler(
cdx_source = cdx_source,
archive_paths = route_config.get('archive_paths', './sample_archive/warcs/'),
head_html = route_config.get('head_insert_html'),
query_html = route_config.get('query_html'),
search_html = route_config.get('search_html'),
static_path = route_config.get('static_path', hostpaths[0] + 'static/')
head_html = route_config.get('head_insert_html', DEFAULT_HEAD_INSERT),
query_html = route_config.get('query_html', DEFAULT_QUERY),
search_html = route_config.get('search_html', DEFAULT_SEARCH),
static_path = hostpaths[0] + route_config.get('static_path', 'static/')
)
logging.info('Adding Collection: ' + name)
@ -56,6 +65,12 @@ def pywb_config_manual(config = {}):
if config.get('debug_echo_req', False):
routes.append(archivalrouter.Route('echo_req', handlers.DebugEchoHandler()))
static_routes = config.get('static_routes', {'static': 'static/'})
for static_name, static_path in static_routes.iteritems():
routes.append(archivalrouter.Route(static_name, handlers.StaticHandler(static_path)))
# Check for new proxy mode!
if config.get('enable_http_proxy', False):
router = proxy.ProxyArchivalRouter
@ -70,8 +85,8 @@ def pywb_config_manual(config = {}):
# (See archivalrouter.ReferRedirect)
hostpaths = hostpaths,
home_view = config_utils.load_template_file(config.get('home_html'), 'Home Page'),
error_view = config_utils.load_template_file(config.get('error_html'), 'Error Page')
home_view = config_utils.load_template_file(config.get('home_html', DEFAULT_INDEX), 'Home Page'),
error_view = config_utils.load_template_file(config.get('error_html', DEFAULT_ERROR), 'Error Page')
)

View File

@ -6,7 +6,7 @@ import time
from os import path
from itertools import imap
from jinja2 import Environment, FileSystemLoader
from jinja2 import Environment, FileSystemLoader, PackageLoader
#=================================================================
@ -31,7 +31,12 @@ class J2TemplateView:
def make_jinja_env(self, template_dir):
jinja_env = Environment(loader = FileSystemLoader(template_dir), trim_blocks = True)
if template_dir.startswith('.') or template_dir.startswith('file://'):
loader = FileSystemLoader(template_dir)
else:
loader = PackageLoader(__package__, template_dir)
jinja_env = Environment(loader = loader, trim_blocks = True)
jinja_env.filters['format_ts'] = J2TemplateView.format_ts
return jinja_env

View File

@ -75,14 +75,17 @@ class WbRequest:
self.wb_prefix = wb_prefix if not use_abs_prefix else WbRequest.make_abs_prefix(env, wb_prefix)
if not wb_url_str:
wb_url_str = '/'
# wb_url present and not root page
if wb_url_str != '/' and wb_url_str != '' and wburl_class:
if wb_url_str != '/' and wburl_class:
self.wb_url_str = wb_url_str
self.wb_url = wburl_class(wb_url_str)
self.urlrewriter = url_rewriter_class(self.wb_url, self.wb_prefix)
else:
# no wb_url, just store blank
self.wb_url_str = '/'
# no wb_url, just store blank wb_url
self.wb_url_str = wb_url_str
self.wb_url = None
self.urlrewriter = None

View File

@ -88,7 +88,14 @@ class TestWb:
assert 'Mon, Jan 27 2014 17:12:51' in resp.body
assert '/pywb/20140127171251/http://www.iana.org/domains/example' in resp.body
# XX: Doesn't work as webtest does not support proxy mode
def test_static_content(self):
resp = self.testapp.get('/test-static/wb.css')
assert resp.status_int == 200
assert resp.content_type == 'text/css'
assert resp.content_length > 0
# XX: Doesn't work as webtest does not support proxy mode
# need a way to test
#def test_proxy_replay(self):
#resp = self.testapp.get('http://www.iana.org/domains/idn-tables')

3
run.sh
View File

@ -12,7 +12,8 @@ mypath=$(cd `dirname $0` && pwd)
app="pywb.wbapp"
params="--static-map /static=$mypath/static --http-socket :8080 -b 65536"
params="--http-socket :8080 -b 65536"
#params="--static-map /static=$mypath/static --http-socket :8080 -b 65536"
if [ -z "$1" ]; then
# Standard root config

View File

@ -11,6 +11,8 @@ setuptools.setup(name='pywb',
long_description=open('README.md').read(),
license='GPL',
packages=['pywb'],
provides=['pywb'],
package_data={'pywb': ['ui/*', 'static/*']},
install_requires=['uwsgi', 'rfc3987', 'chardet', 'redis', 'jinja2', 'surt', 'pyyaml', 'WebTest'],
tests_require=['WebTest', 'pytest'],
zip_safe=False)

View File

@ -38,27 +38,27 @@ archive_paths: ./sample_archive/warcs/
# ==== Optional UI: HTML/Jinja2 Templates ====
# template for <head> insert into replayed html content
head_insert_html: ./ui/head_insert.html
head_insert_html: ui/head_insert.html
# template to for 'calendar' query,
# eg, a listing of captures in response to a ../*/<url>
#
# may be a simple listing or a more complex 'calendar' UI
# if omitted, will list raw cdx in plain text
query_html: ./ui/query.html
query_html: ui/query.html
# template for search page, which is displayed when no search url is entered
# in a collection
search_html: ./ui/search.html
search_html: ui/search.html
# template for home page.
# if no other route is set, this will be rendered at /, /index.htm and /index.html
home_html: ./ui/index.html
home_html: ui/index.html
# error page temlpate for may formatting error message and details
# if omitted, a text response is returned
error_html: ./ui/error.html
error_html: ui/error.html
# ==== Other Paths ====
@ -70,11 +70,13 @@ error_html: ./ui/error.html
# to http://localhost:8080/pywb/image.gif
#
hostpaths: ['http://localhost:8080/']
#hostpaths: ['http://localhost:8080/']
# List of route names:
# <route>: <package or file path>
static_routes:
test-static: static/
# Custom path for serving html content
# Default is hostname[0] + '/static/'
#static_path: /static/
# ==== New / Experimental Settings ====
# Not yet production ready -- used primarily for testing