1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

add support for pywb static content routes (seperate from uwsgi)

adding StaticHandler and loading templates and static resources from current package
add default template and static data to be included in the pywb package
add test for custom static route
This commit is contained in:
Ilya Kreymer 2014-02-07 19:32:58 -08:00
parent 00a7691f69
commit b11f4fad93
17 changed files with 121 additions and 37 deletions

View File

@ -151,7 +151,7 @@ the location of those files.
#### SURT #### SURT
By default, pywb expects the cdx files to be Sort-Friendly-Url-Transform (SURT) ordering. By default, pywb expects the cdx files to be Sort-friendly URL Reordering Transform (SURT) ordering.
This is an ordering that transforms: `example.com` -> `com,example)/` to faciliate better search. This is an ordering that transforms: `example.com` -> `com,example)/` to faciliate better search.
It is recommended for future indexing, but is not required. It is recommended for future indexing, but is not required.

View File

@ -21,7 +21,9 @@ collections:
# #
# * Set to true if cdxs start with surts: com,example)/ # * Set to true if cdxs start with surts: com,example)/
# * Set to false if cdx start with urls: example.com)/ # * Set to false if cdx start with urls: example.com)/
surt_ordered: true #
# default:
# surt_ordered: true
# list of paths prefixes for pywb look to 'resolve' WARC and ARC filenames # list of paths prefixes for pywb look to 'resolve' WARC and ARC filenames
# in the cdx to their absolute path # in the cdx to their absolute path
@ -34,30 +36,33 @@ surt_ordered: true
archive_paths: ./sample_archive/warcs/ archive_paths: ./sample_archive/warcs/
# ==== Optional UI: HTML/Jinja2 Templates ==== # The following are default settings -- uncomment to change
# Set to '' to disable the ui
# ==== UI: HTML/Jinja2 Templates ====
# template for <head> insert into replayed html content # template for <head> insert into replayed html content
head_insert_html: ./ui/head_insert.html #head_insert_html: ui/head_insert.html
# template to for 'calendar' query, # template to for 'calendar' query,
# eg, a listing of captures in response to a ../*/<url> # eg, a listing of captures in response to a ../*/<url>
# #
# may be a simple listing or a more complex 'calendar' UI # may be a simple listing or a more complex 'calendar' UI
# if omitted, will list raw cdx in plain text # if omitted, will list raw cdx in plain text
query_html: ./ui/query.html #query_html: ui/query.html
# template for search page, which is displayed when no search url is entered # template for search page, which is displayed when no search url is entered
# in a collection # in a collection
search_html: ./ui/search.html #search_html: ui/search.html
# template for home page. # template for home page.
# if no other route is set, this will be rendered at /, /index.htm and /index.html # if no other route is set, this will be rendered at /, /index.htm and /index.html
home_html: ./ui/index.html #home_html: ui/index.html
# error page temlpate for may formatting error message and details # error page temlpate for may formatting error message and details
# if omitted, a text response is returned # if omitted, a text response is returned
error_html: ./ui/error.html #error_html: ui/error.html
# ==== Other Paths ==== # ==== Other Paths ====
@ -69,17 +74,18 @@ error_html: ./ui/error.html
# to http://localhost:8080/pywb/image.gif # to http://localhost:8080/pywb/image.gif
# #
hostpaths: ['http://localhost:8080/'] #hostpaths: ['http://localhost:8080/']
# Custom path for serving html content # List of route names:
# Default is hostname[0] + '/static/' # <route>: <package or file path>
#static_path: /static/ static_routes:
static: static/
# ==== New / Experimental Settings ==== # ==== New / Experimental Settings ====
# Not yet production ready -- used primarily for testing # Not yet production ready -- used primarily for testing
# Enable simple http proxy mode # Enable simple http proxy mode
enable_http_proxy: false #enable_http_proxy: false
# enable cdx server api for querying cdx directly (experimental) # enable cdx server api for querying cdx directly (experimental)
enable_cdx_api: false #enable_cdx_api: false

View File

@ -4,7 +4,10 @@ import urlparse
from wbrequestresponse import WbResponse from wbrequestresponse import WbResponse
from wburl import WbUrl from wburl import WbUrl
from wbexceptions import WbException from wbexceptions import WbException, NotFoundException
import pkgutil
import mimetypes
class BaseHandler: class BaseHandler:
@ -98,6 +101,46 @@ class CDXHandler(BaseHandler):
def __str__(self): def __str__(self):
return 'CDX Server: ' + str(self.cdx_reader) return 'CDX Server: ' + str(self.cdx_reader)
#=================================================================
# Static Content Handler
#=================================================================
class StaticHandler(BaseHandler):
def __init__(self, static_path, pkg = __package__):
mimetypes.init()
self.static_path = static_path
self.pkg = pkg
def __call__(self, wbrequest):
full_path = self.static_path + wbrequest.wb_url_str
try:
if full_path.startswith('.') or full_path.startswith('file://'):
data = open(full_path, 'rb')
else:
data = pkgutil.get_data(self.pkg, full_path)
if 'wsgi.file_wrapper' in wbrequest.env:
reader = wbrequest.env['wsgi.file_wrapper'](data)
else:
reader = iter(lambda: data.read(), '')
content_type, _ = mimetypes.guess_type(full_path)
return WbResponse.text_stream(data, content_type = content_type)
except IOError:
raise NotFoundException('Static File Not Found: ' + wbrequest.wb_url_str)
@staticmethod
def get_wburl_type():
return None
def __str__(self):
return 'Static files from ' + self.static_path
#================================================================= #=================================================================
# Debug Handlers # Debug Handlers
#================================================================= #=================================================================

View File

@ -7,6 +7,14 @@ import config_utils
import logging import logging
import proxy import proxy
#=================================================================
DEFAULT_HEAD_INSERT = 'ui/head_insert.html'
DEFAULT_QUERY = 'ui/query.html'
DEFAULT_SEARCH = 'ui/search.html'
DEFAULT_INDEX = 'ui/index.html'
DEFAULT_ERROR = 'ui/error.html'
#================================================================= #=================================================================
## Reference non-YAML config ## Reference non-YAML config
#================================================================= #=================================================================
@ -39,10 +47,11 @@ def pywb_config_manual(config = {}):
wb_handler = config_utils.create_wb_handler( wb_handler = config_utils.create_wb_handler(
cdx_source = cdx_source, cdx_source = cdx_source,
archive_paths = route_config.get('archive_paths', './sample_archive/warcs/'), archive_paths = route_config.get('archive_paths', './sample_archive/warcs/'),
head_html = route_config.get('head_insert_html'), head_html = route_config.get('head_insert_html', DEFAULT_HEAD_INSERT),
query_html = route_config.get('query_html'), query_html = route_config.get('query_html', DEFAULT_QUERY),
search_html = route_config.get('search_html'), search_html = route_config.get('search_html', DEFAULT_SEARCH),
static_path = route_config.get('static_path', hostpaths[0] + 'static/')
static_path = hostpaths[0] + route_config.get('static_path', 'static/')
) )
logging.info('Adding Collection: ' + name) logging.info('Adding Collection: ' + name)
@ -56,6 +65,12 @@ def pywb_config_manual(config = {}):
if config.get('debug_echo_req', False): if config.get('debug_echo_req', False):
routes.append(archivalrouter.Route('echo_req', handlers.DebugEchoHandler())) routes.append(archivalrouter.Route('echo_req', handlers.DebugEchoHandler()))
static_routes = config.get('static_routes', {'static': 'static/'})
for static_name, static_path in static_routes.iteritems():
routes.append(archivalrouter.Route(static_name, handlers.StaticHandler(static_path)))
# Check for new proxy mode! # Check for new proxy mode!
if config.get('enable_http_proxy', False): if config.get('enable_http_proxy', False):
router = proxy.ProxyArchivalRouter router = proxy.ProxyArchivalRouter
@ -70,8 +85,8 @@ def pywb_config_manual(config = {}):
# (See archivalrouter.ReferRedirect) # (See archivalrouter.ReferRedirect)
hostpaths = hostpaths, hostpaths = hostpaths,
home_view = config_utils.load_template_file(config.get('home_html'), 'Home Page'), home_view = config_utils.load_template_file(config.get('home_html', DEFAULT_INDEX), 'Home Page'),
error_view = config_utils.load_template_file(config.get('error_html'), 'Error Page') error_view = config_utils.load_template_file(config.get('error_html', DEFAULT_ERROR), 'Error Page')
) )

View File

@ -6,7 +6,7 @@ import time
from os import path from os import path
from itertools import imap from itertools import imap
from jinja2 import Environment, FileSystemLoader from jinja2 import Environment, FileSystemLoader, PackageLoader
#================================================================= #=================================================================
@ -31,7 +31,12 @@ class J2TemplateView:
def make_jinja_env(self, template_dir): def make_jinja_env(self, template_dir):
jinja_env = Environment(loader = FileSystemLoader(template_dir), trim_blocks = True) if template_dir.startswith('.') or template_dir.startswith('file://'):
loader = FileSystemLoader(template_dir)
else:
loader = PackageLoader(__package__, template_dir)
jinja_env = Environment(loader = loader, trim_blocks = True)
jinja_env.filters['format_ts'] = J2TemplateView.format_ts jinja_env.filters['format_ts'] = J2TemplateView.format_ts
return jinja_env return jinja_env

View File

@ -75,14 +75,17 @@ class WbRequest:
self.wb_prefix = wb_prefix if not use_abs_prefix else WbRequest.make_abs_prefix(env, wb_prefix) self.wb_prefix = wb_prefix if not use_abs_prefix else WbRequest.make_abs_prefix(env, wb_prefix)
if not wb_url_str:
wb_url_str = '/'
# wb_url present and not root page # wb_url present and not root page
if wb_url_str != '/' and wb_url_str != '' and wburl_class: if wb_url_str != '/' and wburl_class:
self.wb_url_str = wb_url_str self.wb_url_str = wb_url_str
self.wb_url = wburl_class(wb_url_str) self.wb_url = wburl_class(wb_url_str)
self.urlrewriter = url_rewriter_class(self.wb_url, self.wb_prefix) self.urlrewriter = url_rewriter_class(self.wb_url, self.wb_prefix)
else: else:
# no wb_url, just store blank # no wb_url, just store blank wb_url
self.wb_url_str = '/' self.wb_url_str = wb_url_str
self.wb_url = None self.wb_url = None
self.urlrewriter = None self.urlrewriter = None

View File

@ -88,7 +88,14 @@ class TestWb:
assert 'Mon, Jan 27 2014 17:12:51' in resp.body assert 'Mon, Jan 27 2014 17:12:51' in resp.body
assert '/pywb/20140127171251/http://www.iana.org/domains/example' in resp.body assert '/pywb/20140127171251/http://www.iana.org/domains/example' in resp.body
# XX: Doesn't work as webtest does not support proxy mode def test_static_content(self):
resp = self.testapp.get('/test-static/wb.css')
assert resp.status_int == 200
assert resp.content_type == 'text/css'
assert resp.content_length > 0
# XX: Doesn't work as webtest does not support proxy mode
# need a way to test # need a way to test
#def test_proxy_replay(self): #def test_proxy_replay(self):
#resp = self.testapp.get('http://www.iana.org/domains/idn-tables') #resp = self.testapp.get('http://www.iana.org/domains/idn-tables')

3
run.sh
View File

@ -12,7 +12,8 @@ mypath=$(cd `dirname $0` && pwd)
app="pywb.wbapp" app="pywb.wbapp"
params="--static-map /static=$mypath/static --http-socket :8080 -b 65536" params="--http-socket :8080 -b 65536"
#params="--static-map /static=$mypath/static --http-socket :8080 -b 65536"
if [ -z "$1" ]; then if [ -z "$1" ]; then
# Standard root config # Standard root config

View File

@ -11,6 +11,8 @@ setuptools.setup(name='pywb',
long_description=open('README.md').read(), long_description=open('README.md').read(),
license='GPL', license='GPL',
packages=['pywb'], packages=['pywb'],
provides=['pywb'],
package_data={'pywb': ['ui/*', 'static/*']},
install_requires=['uwsgi', 'rfc3987', 'chardet', 'redis', 'jinja2', 'surt', 'pyyaml', 'WebTest'], install_requires=['uwsgi', 'rfc3987', 'chardet', 'redis', 'jinja2', 'surt', 'pyyaml', 'WebTest'],
tests_require=['WebTest', 'pytest'], tests_require=['WebTest', 'pytest'],
zip_safe=False) zip_safe=False)

View File

@ -38,27 +38,27 @@ archive_paths: ./sample_archive/warcs/
# ==== Optional UI: HTML/Jinja2 Templates ==== # ==== Optional UI: HTML/Jinja2 Templates ====
# template for <head> insert into replayed html content # template for <head> insert into replayed html content
head_insert_html: ./ui/head_insert.html head_insert_html: ui/head_insert.html
# template to for 'calendar' query, # template to for 'calendar' query,
# eg, a listing of captures in response to a ../*/<url> # eg, a listing of captures in response to a ../*/<url>
# #
# may be a simple listing or a more complex 'calendar' UI # may be a simple listing or a more complex 'calendar' UI
# if omitted, will list raw cdx in plain text # if omitted, will list raw cdx in plain text
query_html: ./ui/query.html query_html: ui/query.html
# template for search page, which is displayed when no search url is entered # template for search page, which is displayed when no search url is entered
# in a collection # in a collection
search_html: ./ui/search.html search_html: ui/search.html
# template for home page. # template for home page.
# if no other route is set, this will be rendered at /, /index.htm and /index.html # if no other route is set, this will be rendered at /, /index.htm and /index.html
home_html: ./ui/index.html home_html: ui/index.html
# error page temlpate for may formatting error message and details # error page temlpate for may formatting error message and details
# if omitted, a text response is returned # if omitted, a text response is returned
error_html: ./ui/error.html error_html: ui/error.html
# ==== Other Paths ==== # ==== Other Paths ====
@ -70,11 +70,13 @@ error_html: ./ui/error.html
# to http://localhost:8080/pywb/image.gif # to http://localhost:8080/pywb/image.gif
# #
hostpaths: ['http://localhost:8080/'] #hostpaths: ['http://localhost:8080/']
# List of route names:
# <route>: <package or file path>
static_routes:
test-static: static/
# Custom path for serving html content
# Default is hostname[0] + '/static/'
#static_path: /static/
# ==== New / Experimental Settings ==== # ==== New / Experimental Settings ====
# Not yet production ready -- used primarily for testing # Not yet production ready -- used primarily for testing