From b11f4fad93046bf66c4a597ba6329f7ce636647c Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Fri, 7 Feb 2014 19:32:58 -0800 Subject: [PATCH] add support for pywb static content routes (seperate from uwsgi) adding StaticHandler and loading templates and static resources from current package add default template and static data to be included in the pywb package add test for custom static route --- README.md | 2 +- config.yaml | 32 ++++++++++++++--------- pywb/handlers.py | 45 +++++++++++++++++++++++++++++++- pywb/pywb_init.py | 27 ++++++++++++++----- {static => pywb/static}/wb.css | 0 {static => pywb/static}/wb.js | 0 {ui => pywb/ui}/error.html | 0 {ui => pywb/ui}/head_insert.html | 0 {ui => pywb/ui}/index.html | 0 {ui => pywb/ui}/query.html | 0 {ui => pywb/ui}/search.html | 0 pywb/views.py | 9 +++++-- pywb/wbrequestresponse.py | 9 ++++--- run-tests.py | 9 ++++++- run.sh | 3 ++- setup.py | 2 ++ test_config.yaml | 20 +++++++------- 17 files changed, 121 insertions(+), 37 deletions(-) rename {static => pywb/static}/wb.css (100%) rename {static => pywb/static}/wb.js (100%) rename {ui => pywb/ui}/error.html (100%) rename {ui => pywb/ui}/head_insert.html (100%) rename {ui => pywb/ui}/index.html (100%) rename {ui => pywb/ui}/query.html (100%) rename {ui => pywb/ui}/search.html (100%) diff --git a/README.md b/README.md index ca3de93c..3da19c48 100644 --- a/README.md +++ b/README.md @@ -151,7 +151,7 @@ the location of those files. #### SURT -By default, pywb expects the cdx files to be Sort-Friendly-Url-Transform (SURT) ordering. +By default, pywb expects the cdx files to be Sort-friendly URL Reordering Transform (SURT) ordering. This is an ordering that transforms: `example.com` -> `com,example)/` to faciliate better search. It is recommended for future indexing, but is not required. diff --git a/config.yaml b/config.yaml index 00793a2c..82acddbb 100644 --- a/config.yaml +++ b/config.yaml @@ -21,7 +21,9 @@ collections: # # * Set to true if cdxs start with surts: com,example)/ # * Set to false if cdx start with urls: example.com)/ -surt_ordered: true +# +# default: +# surt_ordered: true # list of paths prefixes for pywb look to 'resolve' WARC and ARC filenames # in the cdx to their absolute path @@ -34,30 +36,33 @@ surt_ordered: true archive_paths: ./sample_archive/warcs/ -# ==== Optional UI: HTML/Jinja2 Templates ==== +# The following are default settings -- uncomment to change +# Set to '' to disable the ui + +# ==== UI: HTML/Jinja2 Templates ==== # template for insert into replayed html content -head_insert_html: ./ui/head_insert.html +#head_insert_html: ui/head_insert.html # template to for 'calendar' query, # eg, a listing of captures in response to a ../*/ # # may be a simple listing or a more complex 'calendar' UI # if omitted, will list raw cdx in plain text -query_html: ./ui/query.html +#query_html: ui/query.html # template for search page, which is displayed when no search url is entered # in a collection -search_html: ./ui/search.html +#search_html: ui/search.html # template for home page. # if no other route is set, this will be rendered at /, /index.htm and /index.html -home_html: ./ui/index.html +#home_html: ui/index.html # error page temlpate for may formatting error message and details # if omitted, a text response is returned -error_html: ./ui/error.html +#error_html: ui/error.html # ==== Other Paths ==== @@ -69,17 +74,18 @@ error_html: ./ui/error.html # to http://localhost:8080/pywb/image.gif # -hostpaths: ['http://localhost:8080/'] +#hostpaths: ['http://localhost:8080/'] -# Custom path for serving html content -# Default is hostname[0] + '/static/' -#static_path: /static/ +# List of route names: +# : +static_routes: + static: static/ # ==== New / Experimental Settings ==== # Not yet production ready -- used primarily for testing # Enable simple http proxy mode -enable_http_proxy: false +#enable_http_proxy: false # enable cdx server api for querying cdx directly (experimental) -enable_cdx_api: false +#enable_cdx_api: false diff --git a/pywb/handlers.py b/pywb/handlers.py index 264943cd..329d99e6 100644 --- a/pywb/handlers.py +++ b/pywb/handlers.py @@ -4,7 +4,10 @@ import urlparse from wbrequestresponse import WbResponse from wburl import WbUrl -from wbexceptions import WbException +from wbexceptions import WbException, NotFoundException + +import pkgutil +import mimetypes class BaseHandler: @@ -98,6 +101,46 @@ class CDXHandler(BaseHandler): def __str__(self): return 'CDX Server: ' + str(self.cdx_reader) + +#================================================================= +# Static Content Handler +#================================================================= +class StaticHandler(BaseHandler): + def __init__(self, static_path, pkg = __package__): + mimetypes.init() + + self.static_path = static_path + self.pkg = pkg + + def __call__(self, wbrequest): + full_path = self.static_path + wbrequest.wb_url_str + + try: + if full_path.startswith('.') or full_path.startswith('file://'): + data = open(full_path, 'rb') + else: + data = pkgutil.get_data(self.pkg, full_path) + + if 'wsgi.file_wrapper' in wbrequest.env: + reader = wbrequest.env['wsgi.file_wrapper'](data) + else: + reader = iter(lambda: data.read(), '') + + content_type, _ = mimetypes.guess_type(full_path) + + return WbResponse.text_stream(data, content_type = content_type) + + except IOError: + raise NotFoundException('Static File Not Found: ' + wbrequest.wb_url_str) + + @staticmethod + def get_wburl_type(): + return None + + def __str__(self): + return 'Static files from ' + self.static_path + + #================================================================= # Debug Handlers #================================================================= diff --git a/pywb/pywb_init.py b/pywb/pywb_init.py index 616e7fac..bd8c7400 100644 --- a/pywb/pywb_init.py +++ b/pywb/pywb_init.py @@ -7,6 +7,14 @@ import config_utils import logging import proxy +#================================================================= +DEFAULT_HEAD_INSERT = 'ui/head_insert.html' +DEFAULT_QUERY = 'ui/query.html' +DEFAULT_SEARCH = 'ui/search.html' +DEFAULT_INDEX = 'ui/index.html' +DEFAULT_ERROR = 'ui/error.html' + + #================================================================= ## Reference non-YAML config #================================================================= @@ -39,10 +47,11 @@ def pywb_config_manual(config = {}): wb_handler = config_utils.create_wb_handler( cdx_source = cdx_source, archive_paths = route_config.get('archive_paths', './sample_archive/warcs/'), - head_html = route_config.get('head_insert_html'), - query_html = route_config.get('query_html'), - search_html = route_config.get('search_html'), - static_path = route_config.get('static_path', hostpaths[0] + 'static/') + head_html = route_config.get('head_insert_html', DEFAULT_HEAD_INSERT), + query_html = route_config.get('query_html', DEFAULT_QUERY), + search_html = route_config.get('search_html', DEFAULT_SEARCH), + + static_path = hostpaths[0] + route_config.get('static_path', 'static/') ) logging.info('Adding Collection: ' + name) @@ -56,6 +65,12 @@ def pywb_config_manual(config = {}): if config.get('debug_echo_req', False): routes.append(archivalrouter.Route('echo_req', handlers.DebugEchoHandler())) + + static_routes = config.get('static_routes', {'static': 'static/'}) + + for static_name, static_path in static_routes.iteritems(): + routes.append(archivalrouter.Route(static_name, handlers.StaticHandler(static_path))) + # Check for new proxy mode! if config.get('enable_http_proxy', False): router = proxy.ProxyArchivalRouter @@ -70,8 +85,8 @@ def pywb_config_manual(config = {}): # (See archivalrouter.ReferRedirect) hostpaths = hostpaths, - home_view = config_utils.load_template_file(config.get('home_html'), 'Home Page'), - error_view = config_utils.load_template_file(config.get('error_html'), 'Error Page') + home_view = config_utils.load_template_file(config.get('home_html', DEFAULT_INDEX), 'Home Page'), + error_view = config_utils.load_template_file(config.get('error_html', DEFAULT_ERROR), 'Error Page') ) diff --git a/static/wb.css b/pywb/static/wb.css similarity index 100% rename from static/wb.css rename to pywb/static/wb.css diff --git a/static/wb.js b/pywb/static/wb.js similarity index 100% rename from static/wb.js rename to pywb/static/wb.js diff --git a/ui/error.html b/pywb/ui/error.html similarity index 100% rename from ui/error.html rename to pywb/ui/error.html diff --git a/ui/head_insert.html b/pywb/ui/head_insert.html similarity index 100% rename from ui/head_insert.html rename to pywb/ui/head_insert.html diff --git a/ui/index.html b/pywb/ui/index.html similarity index 100% rename from ui/index.html rename to pywb/ui/index.html diff --git a/ui/query.html b/pywb/ui/query.html similarity index 100% rename from ui/query.html rename to pywb/ui/query.html diff --git a/ui/search.html b/pywb/ui/search.html similarity index 100% rename from ui/search.html rename to pywb/ui/search.html diff --git a/pywb/views.py b/pywb/views.py index 75356f8e..ec6d5c51 100644 --- a/pywb/views.py +++ b/pywb/views.py @@ -6,7 +6,7 @@ import time from os import path from itertools import imap -from jinja2 import Environment, FileSystemLoader +from jinja2 import Environment, FileSystemLoader, PackageLoader #================================================================= @@ -31,7 +31,12 @@ class J2TemplateView: def make_jinja_env(self, template_dir): - jinja_env = Environment(loader = FileSystemLoader(template_dir), trim_blocks = True) + if template_dir.startswith('.') or template_dir.startswith('file://'): + loader = FileSystemLoader(template_dir) + else: + loader = PackageLoader(__package__, template_dir) + + jinja_env = Environment(loader = loader, trim_blocks = True) jinja_env.filters['format_ts'] = J2TemplateView.format_ts return jinja_env diff --git a/pywb/wbrequestresponse.py b/pywb/wbrequestresponse.py index 8449f588..2bdc8bbb 100644 --- a/pywb/wbrequestresponse.py +++ b/pywb/wbrequestresponse.py @@ -75,14 +75,17 @@ class WbRequest: self.wb_prefix = wb_prefix if not use_abs_prefix else WbRequest.make_abs_prefix(env, wb_prefix) + if not wb_url_str: + wb_url_str = '/' + # wb_url present and not root page - if wb_url_str != '/' and wb_url_str != '' and wburl_class: + if wb_url_str != '/' and wburl_class: self.wb_url_str = wb_url_str self.wb_url = wburl_class(wb_url_str) self.urlrewriter = url_rewriter_class(self.wb_url, self.wb_prefix) else: - # no wb_url, just store blank - self.wb_url_str = '/' + # no wb_url, just store blank wb_url + self.wb_url_str = wb_url_str self.wb_url = None self.urlrewriter = None diff --git a/run-tests.py b/run-tests.py index 550f871c..4782af2c 100644 --- a/run-tests.py +++ b/run-tests.py @@ -88,7 +88,14 @@ class TestWb: assert 'Mon, Jan 27 2014 17:12:51' in resp.body assert '/pywb/20140127171251/http://www.iana.org/domains/example' in resp.body - # XX: Doesn't work as webtest does not support proxy mode + def test_static_content(self): + resp = self.testapp.get('/test-static/wb.css') + assert resp.status_int == 200 + assert resp.content_type == 'text/css' + assert resp.content_length > 0 + + + # XX: Doesn't work as webtest does not support proxy mode # need a way to test #def test_proxy_replay(self): #resp = self.testapp.get('http://www.iana.org/domains/idn-tables') diff --git a/run.sh b/run.sh index ec2d1206..d6e484b9 100755 --- a/run.sh +++ b/run.sh @@ -12,7 +12,8 @@ mypath=$(cd `dirname $0` && pwd) app="pywb.wbapp" -params="--static-map /static=$mypath/static --http-socket :8080 -b 65536" +params="--http-socket :8080 -b 65536" +#params="--static-map /static=$mypath/static --http-socket :8080 -b 65536" if [ -z "$1" ]; then # Standard root config diff --git a/setup.py b/setup.py index ac46c007..74cb604b 100755 --- a/setup.py +++ b/setup.py @@ -11,6 +11,8 @@ setuptools.setup(name='pywb', long_description=open('README.md').read(), license='GPL', packages=['pywb'], + provides=['pywb'], + package_data={'pywb': ['ui/*', 'static/*']}, install_requires=['uwsgi', 'rfc3987', 'chardet', 'redis', 'jinja2', 'surt', 'pyyaml', 'WebTest'], tests_require=['WebTest', 'pytest'], zip_safe=False) diff --git a/test_config.yaml b/test_config.yaml index 8227c091..45226cc9 100644 --- a/test_config.yaml +++ b/test_config.yaml @@ -38,27 +38,27 @@ archive_paths: ./sample_archive/warcs/ # ==== Optional UI: HTML/Jinja2 Templates ==== # template for insert into replayed html content -head_insert_html: ./ui/head_insert.html +head_insert_html: ui/head_insert.html # template to for 'calendar' query, # eg, a listing of captures in response to a ../*/ # # may be a simple listing or a more complex 'calendar' UI # if omitted, will list raw cdx in plain text -query_html: ./ui/query.html +query_html: ui/query.html # template for search page, which is displayed when no search url is entered # in a collection -search_html: ./ui/search.html +search_html: ui/search.html # template for home page. # if no other route is set, this will be rendered at /, /index.htm and /index.html -home_html: ./ui/index.html +home_html: ui/index.html # error page temlpate for may formatting error message and details # if omitted, a text response is returned -error_html: ./ui/error.html +error_html: ui/error.html # ==== Other Paths ==== @@ -70,11 +70,13 @@ error_html: ./ui/error.html # to http://localhost:8080/pywb/image.gif # -hostpaths: ['http://localhost:8080/'] +#hostpaths: ['http://localhost:8080/'] + +# List of route names: +# : +static_routes: + test-static: static/ -# Custom path for serving html content -# Default is hostname[0] + '/static/' -#static_path: /static/ # ==== New / Experimental Settings ==== # Not yet production ready -- used primarily for testing