mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
add support for pywb static content routes (seperate from uwsgi)
adding StaticHandler and loading templates and static resources from current package add default template and static data to be included in the pywb package add test for custom static route
This commit is contained in:
parent
00a7691f69
commit
b11f4fad93
@ -151,7 +151,7 @@ the location of those files.
|
||||
|
||||
#### SURT
|
||||
|
||||
By default, pywb expects the cdx files to be Sort-Friendly-Url-Transform (SURT) ordering.
|
||||
By default, pywb expects the cdx files to be Sort-friendly URL Reordering Transform (SURT) ordering.
|
||||
This is an ordering that transforms: `example.com` -> `com,example)/` to faciliate better search.
|
||||
It is recommended for future indexing, but is not required.
|
||||
|
||||
|
32
config.yaml
32
config.yaml
@ -21,7 +21,9 @@ collections:
|
||||
#
|
||||
# * Set to true if cdxs start with surts: com,example)/
|
||||
# * Set to false if cdx start with urls: example.com)/
|
||||
surt_ordered: true
|
||||
#
|
||||
# default:
|
||||
# surt_ordered: true
|
||||
|
||||
# list of paths prefixes for pywb look to 'resolve' WARC and ARC filenames
|
||||
# in the cdx to their absolute path
|
||||
@ -34,30 +36,33 @@ surt_ordered: true
|
||||
|
||||
archive_paths: ./sample_archive/warcs/
|
||||
|
||||
# ==== Optional UI: HTML/Jinja2 Templates ====
|
||||
# The following are default settings -- uncomment to change
|
||||
# Set to '' to disable the ui
|
||||
|
||||
# ==== UI: HTML/Jinja2 Templates ====
|
||||
|
||||
# template for <head> insert into replayed html content
|
||||
head_insert_html: ./ui/head_insert.html
|
||||
#head_insert_html: ui/head_insert.html
|
||||
|
||||
# template to for 'calendar' query,
|
||||
# eg, a listing of captures in response to a ../*/<url>
|
||||
#
|
||||
# may be a simple listing or a more complex 'calendar' UI
|
||||
# if omitted, will list raw cdx in plain text
|
||||
query_html: ./ui/query.html
|
||||
#query_html: ui/query.html
|
||||
|
||||
# template for search page, which is displayed when no search url is entered
|
||||
# in a collection
|
||||
search_html: ./ui/search.html
|
||||
#search_html: ui/search.html
|
||||
|
||||
# template for home page.
|
||||
# if no other route is set, this will be rendered at /, /index.htm and /index.html
|
||||
home_html: ./ui/index.html
|
||||
#home_html: ui/index.html
|
||||
|
||||
|
||||
# error page temlpate for may formatting error message and details
|
||||
# if omitted, a text response is returned
|
||||
error_html: ./ui/error.html
|
||||
#error_html: ui/error.html
|
||||
|
||||
# ==== Other Paths ====
|
||||
|
||||
@ -69,17 +74,18 @@ error_html: ./ui/error.html
|
||||
# to http://localhost:8080/pywb/image.gif
|
||||
#
|
||||
|
||||
hostpaths: ['http://localhost:8080/']
|
||||
#hostpaths: ['http://localhost:8080/']
|
||||
|
||||
# Custom path for serving html content
|
||||
# Default is hostname[0] + '/static/'
|
||||
#static_path: /static/
|
||||
# List of route names:
|
||||
# <route>: <package or file path>
|
||||
static_routes:
|
||||
static: static/
|
||||
|
||||
# ==== New / Experimental Settings ====
|
||||
# Not yet production ready -- used primarily for testing
|
||||
|
||||
# Enable simple http proxy mode
|
||||
enable_http_proxy: false
|
||||
#enable_http_proxy: false
|
||||
|
||||
# enable cdx server api for querying cdx directly (experimental)
|
||||
enable_cdx_api: false
|
||||
#enable_cdx_api: false
|
||||
|
@ -4,7 +4,10 @@ import urlparse
|
||||
|
||||
from wbrequestresponse import WbResponse
|
||||
from wburl import WbUrl
|
||||
from wbexceptions import WbException
|
||||
from wbexceptions import WbException, NotFoundException
|
||||
|
||||
import pkgutil
|
||||
import mimetypes
|
||||
|
||||
|
||||
class BaseHandler:
|
||||
@ -98,6 +101,46 @@ class CDXHandler(BaseHandler):
|
||||
def __str__(self):
|
||||
return 'CDX Server: ' + str(self.cdx_reader)
|
||||
|
||||
|
||||
#=================================================================
|
||||
# Static Content Handler
|
||||
#=================================================================
|
||||
class StaticHandler(BaseHandler):
|
||||
def __init__(self, static_path, pkg = __package__):
|
||||
mimetypes.init()
|
||||
|
||||
self.static_path = static_path
|
||||
self.pkg = pkg
|
||||
|
||||
def __call__(self, wbrequest):
|
||||
full_path = self.static_path + wbrequest.wb_url_str
|
||||
|
||||
try:
|
||||
if full_path.startswith('.') or full_path.startswith('file://'):
|
||||
data = open(full_path, 'rb')
|
||||
else:
|
||||
data = pkgutil.get_data(self.pkg, full_path)
|
||||
|
||||
if 'wsgi.file_wrapper' in wbrequest.env:
|
||||
reader = wbrequest.env['wsgi.file_wrapper'](data)
|
||||
else:
|
||||
reader = iter(lambda: data.read(), '')
|
||||
|
||||
content_type, _ = mimetypes.guess_type(full_path)
|
||||
|
||||
return WbResponse.text_stream(data, content_type = content_type)
|
||||
|
||||
except IOError:
|
||||
raise NotFoundException('Static File Not Found: ' + wbrequest.wb_url_str)
|
||||
|
||||
@staticmethod
|
||||
def get_wburl_type():
|
||||
return None
|
||||
|
||||
def __str__(self):
|
||||
return 'Static files from ' + self.static_path
|
||||
|
||||
|
||||
#=================================================================
|
||||
# Debug Handlers
|
||||
#=================================================================
|
||||
|
@ -7,6 +7,14 @@ import config_utils
|
||||
import logging
|
||||
import proxy
|
||||
|
||||
#=================================================================
|
||||
DEFAULT_HEAD_INSERT = 'ui/head_insert.html'
|
||||
DEFAULT_QUERY = 'ui/query.html'
|
||||
DEFAULT_SEARCH = 'ui/search.html'
|
||||
DEFAULT_INDEX = 'ui/index.html'
|
||||
DEFAULT_ERROR = 'ui/error.html'
|
||||
|
||||
|
||||
#=================================================================
|
||||
## Reference non-YAML config
|
||||
#=================================================================
|
||||
@ -39,10 +47,11 @@ def pywb_config_manual(config = {}):
|
||||
wb_handler = config_utils.create_wb_handler(
|
||||
cdx_source = cdx_source,
|
||||
archive_paths = route_config.get('archive_paths', './sample_archive/warcs/'),
|
||||
head_html = route_config.get('head_insert_html'),
|
||||
query_html = route_config.get('query_html'),
|
||||
search_html = route_config.get('search_html'),
|
||||
static_path = route_config.get('static_path', hostpaths[0] + 'static/')
|
||||
head_html = route_config.get('head_insert_html', DEFAULT_HEAD_INSERT),
|
||||
query_html = route_config.get('query_html', DEFAULT_QUERY),
|
||||
search_html = route_config.get('search_html', DEFAULT_SEARCH),
|
||||
|
||||
static_path = hostpaths[0] + route_config.get('static_path', 'static/')
|
||||
)
|
||||
|
||||
logging.info('Adding Collection: ' + name)
|
||||
@ -56,6 +65,12 @@ def pywb_config_manual(config = {}):
|
||||
if config.get('debug_echo_req', False):
|
||||
routes.append(archivalrouter.Route('echo_req', handlers.DebugEchoHandler()))
|
||||
|
||||
|
||||
static_routes = config.get('static_routes', {'static': 'static/'})
|
||||
|
||||
for static_name, static_path in static_routes.iteritems():
|
||||
routes.append(archivalrouter.Route(static_name, handlers.StaticHandler(static_path)))
|
||||
|
||||
# Check for new proxy mode!
|
||||
if config.get('enable_http_proxy', False):
|
||||
router = proxy.ProxyArchivalRouter
|
||||
@ -70,8 +85,8 @@ def pywb_config_manual(config = {}):
|
||||
# (See archivalrouter.ReferRedirect)
|
||||
hostpaths = hostpaths,
|
||||
|
||||
home_view = config_utils.load_template_file(config.get('home_html'), 'Home Page'),
|
||||
error_view = config_utils.load_template_file(config.get('error_html'), 'Error Page')
|
||||
home_view = config_utils.load_template_file(config.get('home_html', DEFAULT_INDEX), 'Home Page'),
|
||||
error_view = config_utils.load_template_file(config.get('error_html', DEFAULT_ERROR), 'Error Page')
|
||||
)
|
||||
|
||||
|
||||
|
@ -6,7 +6,7 @@ import time
|
||||
|
||||
from os import path
|
||||
from itertools import imap
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
from jinja2 import Environment, FileSystemLoader, PackageLoader
|
||||
|
||||
|
||||
#=================================================================
|
||||
@ -31,7 +31,12 @@ class J2TemplateView:
|
||||
|
||||
|
||||
def make_jinja_env(self, template_dir):
|
||||
jinja_env = Environment(loader = FileSystemLoader(template_dir), trim_blocks = True)
|
||||
if template_dir.startswith('.') or template_dir.startswith('file://'):
|
||||
loader = FileSystemLoader(template_dir)
|
||||
else:
|
||||
loader = PackageLoader(__package__, template_dir)
|
||||
|
||||
jinja_env = Environment(loader = loader, trim_blocks = True)
|
||||
jinja_env.filters['format_ts'] = J2TemplateView.format_ts
|
||||
return jinja_env
|
||||
|
||||
|
@ -75,14 +75,17 @@ class WbRequest:
|
||||
|
||||
self.wb_prefix = wb_prefix if not use_abs_prefix else WbRequest.make_abs_prefix(env, wb_prefix)
|
||||
|
||||
if not wb_url_str:
|
||||
wb_url_str = '/'
|
||||
|
||||
# wb_url present and not root page
|
||||
if wb_url_str != '/' and wb_url_str != '' and wburl_class:
|
||||
if wb_url_str != '/' and wburl_class:
|
||||
self.wb_url_str = wb_url_str
|
||||
self.wb_url = wburl_class(wb_url_str)
|
||||
self.urlrewriter = url_rewriter_class(self.wb_url, self.wb_prefix)
|
||||
else:
|
||||
# no wb_url, just store blank
|
||||
self.wb_url_str = '/'
|
||||
# no wb_url, just store blank wb_url
|
||||
self.wb_url_str = wb_url_str
|
||||
self.wb_url = None
|
||||
self.urlrewriter = None
|
||||
|
||||
|
@ -88,7 +88,14 @@ class TestWb:
|
||||
assert 'Mon, Jan 27 2014 17:12:51' in resp.body
|
||||
assert '/pywb/20140127171251/http://www.iana.org/domains/example' in resp.body
|
||||
|
||||
# XX: Doesn't work as webtest does not support proxy mode
|
||||
def test_static_content(self):
|
||||
resp = self.testapp.get('/test-static/wb.css')
|
||||
assert resp.status_int == 200
|
||||
assert resp.content_type == 'text/css'
|
||||
assert resp.content_length > 0
|
||||
|
||||
|
||||
# XX: Doesn't work as webtest does not support proxy mode
|
||||
# need a way to test
|
||||
#def test_proxy_replay(self):
|
||||
#resp = self.testapp.get('http://www.iana.org/domains/idn-tables')
|
||||
|
3
run.sh
3
run.sh
@ -12,7 +12,8 @@ mypath=$(cd `dirname $0` && pwd)
|
||||
|
||||
app="pywb.wbapp"
|
||||
|
||||
params="--static-map /static=$mypath/static --http-socket :8080 -b 65536"
|
||||
params="--http-socket :8080 -b 65536"
|
||||
#params="--static-map /static=$mypath/static --http-socket :8080 -b 65536"
|
||||
|
||||
if [ -z "$1" ]; then
|
||||
# Standard root config
|
||||
|
2
setup.py
2
setup.py
@ -11,6 +11,8 @@ setuptools.setup(name='pywb',
|
||||
long_description=open('README.md').read(),
|
||||
license='GPL',
|
||||
packages=['pywb'],
|
||||
provides=['pywb'],
|
||||
package_data={'pywb': ['ui/*', 'static/*']},
|
||||
install_requires=['uwsgi', 'rfc3987', 'chardet', 'redis', 'jinja2', 'surt', 'pyyaml', 'WebTest'],
|
||||
tests_require=['WebTest', 'pytest'],
|
||||
zip_safe=False)
|
||||
|
@ -38,27 +38,27 @@ archive_paths: ./sample_archive/warcs/
|
||||
# ==== Optional UI: HTML/Jinja2 Templates ====
|
||||
|
||||
# template for <head> insert into replayed html content
|
||||
head_insert_html: ./ui/head_insert.html
|
||||
head_insert_html: ui/head_insert.html
|
||||
|
||||
# template to for 'calendar' query,
|
||||
# eg, a listing of captures in response to a ../*/<url>
|
||||
#
|
||||
# may be a simple listing or a more complex 'calendar' UI
|
||||
# if omitted, will list raw cdx in plain text
|
||||
query_html: ./ui/query.html
|
||||
query_html: ui/query.html
|
||||
|
||||
# template for search page, which is displayed when no search url is entered
|
||||
# in a collection
|
||||
search_html: ./ui/search.html
|
||||
search_html: ui/search.html
|
||||
|
||||
# template for home page.
|
||||
# if no other route is set, this will be rendered at /, /index.htm and /index.html
|
||||
home_html: ./ui/index.html
|
||||
home_html: ui/index.html
|
||||
|
||||
|
||||
# error page temlpate for may formatting error message and details
|
||||
# if omitted, a text response is returned
|
||||
error_html: ./ui/error.html
|
||||
error_html: ui/error.html
|
||||
|
||||
# ==== Other Paths ====
|
||||
|
||||
@ -70,11 +70,13 @@ error_html: ./ui/error.html
|
||||
# to http://localhost:8080/pywb/image.gif
|
||||
#
|
||||
|
||||
hostpaths: ['http://localhost:8080/']
|
||||
#hostpaths: ['http://localhost:8080/']
|
||||
|
||||
# List of route names:
|
||||
# <route>: <package or file path>
|
||||
static_routes:
|
||||
test-static: static/
|
||||
|
||||
# Custom path for serving html content
|
||||
# Default is hostname[0] + '/static/'
|
||||
#static_path: /static/
|
||||
|
||||
# ==== New / Experimental Settings ====
|
||||
# Not yet production ready -- used primarily for testing
|
||||
|
Loading…
x
Reference in New Issue
Block a user