diff --git a/README.md b/README.md index ca3de93c..3da19c48 100644 --- a/README.md +++ b/README.md @@ -151,7 +151,7 @@ the location of those files. #### SURT -By default, pywb expects the cdx files to be Sort-Friendly-Url-Transform (SURT) ordering. +By default, pywb expects the cdx files to be Sort-friendly URL Reordering Transform (SURT) ordering. This is an ordering that transforms: `example.com` -> `com,example)/` to faciliate better search. It is recommended for future indexing, but is not required. diff --git a/config.yaml b/config.yaml index 00793a2c..82acddbb 100644 --- a/config.yaml +++ b/config.yaml @@ -21,7 +21,9 @@ collections: # # * Set to true if cdxs start with surts: com,example)/ # * Set to false if cdx start with urls: example.com)/ -surt_ordered: true +# +# default: +# surt_ordered: true # list of paths prefixes for pywb look to 'resolve' WARC and ARC filenames # in the cdx to their absolute path @@ -34,30 +36,33 @@ surt_ordered: true archive_paths: ./sample_archive/warcs/ -# ==== Optional UI: HTML/Jinja2 Templates ==== +# The following are default settings -- uncomment to change +# Set to '' to disable the ui + +# ==== UI: HTML/Jinja2 Templates ==== # template for insert into replayed html content -head_insert_html: ./ui/head_insert.html +#head_insert_html: ui/head_insert.html # template to for 'calendar' query, # eg, a listing of captures in response to a ../*/ # # may be a simple listing or a more complex 'calendar' UI # if omitted, will list raw cdx in plain text -query_html: ./ui/query.html +#query_html: ui/query.html # template for search page, which is displayed when no search url is entered # in a collection -search_html: ./ui/search.html +#search_html: ui/search.html # template for home page. # if no other route is set, this will be rendered at /, /index.htm and /index.html -home_html: ./ui/index.html +#home_html: ui/index.html # error page temlpate for may formatting error message and details # if omitted, a text response is returned -error_html: ./ui/error.html +#error_html: ui/error.html # ==== Other Paths ==== @@ -69,17 +74,18 @@ error_html: ./ui/error.html # to http://localhost:8080/pywb/image.gif # -hostpaths: ['http://localhost:8080/'] +#hostpaths: ['http://localhost:8080/'] -# Custom path for serving html content -# Default is hostname[0] + '/static/' -#static_path: /static/ +# List of route names: +# : +static_routes: + static: static/ # ==== New / Experimental Settings ==== # Not yet production ready -- used primarily for testing # Enable simple http proxy mode -enable_http_proxy: false +#enable_http_proxy: false # enable cdx server api for querying cdx directly (experimental) -enable_cdx_api: false +#enable_cdx_api: false diff --git a/pywb/handlers.py b/pywb/handlers.py index 264943cd..329d99e6 100644 --- a/pywb/handlers.py +++ b/pywb/handlers.py @@ -4,7 +4,10 @@ import urlparse from wbrequestresponse import WbResponse from wburl import WbUrl -from wbexceptions import WbException +from wbexceptions import WbException, NotFoundException + +import pkgutil +import mimetypes class BaseHandler: @@ -98,6 +101,46 @@ class CDXHandler(BaseHandler): def __str__(self): return 'CDX Server: ' + str(self.cdx_reader) + +#================================================================= +# Static Content Handler +#================================================================= +class StaticHandler(BaseHandler): + def __init__(self, static_path, pkg = __package__): + mimetypes.init() + + self.static_path = static_path + self.pkg = pkg + + def __call__(self, wbrequest): + full_path = self.static_path + wbrequest.wb_url_str + + try: + if full_path.startswith('.') or full_path.startswith('file://'): + data = open(full_path, 'rb') + else: + data = pkgutil.get_data(self.pkg, full_path) + + if 'wsgi.file_wrapper' in wbrequest.env: + reader = wbrequest.env['wsgi.file_wrapper'](data) + else: + reader = iter(lambda: data.read(), '') + + content_type, _ = mimetypes.guess_type(full_path) + + return WbResponse.text_stream(data, content_type = content_type) + + except IOError: + raise NotFoundException('Static File Not Found: ' + wbrequest.wb_url_str) + + @staticmethod + def get_wburl_type(): + return None + + def __str__(self): + return 'Static files from ' + self.static_path + + #================================================================= # Debug Handlers #================================================================= diff --git a/pywb/pywb_init.py b/pywb/pywb_init.py index 616e7fac..bd8c7400 100644 --- a/pywb/pywb_init.py +++ b/pywb/pywb_init.py @@ -7,6 +7,14 @@ import config_utils import logging import proxy +#================================================================= +DEFAULT_HEAD_INSERT = 'ui/head_insert.html' +DEFAULT_QUERY = 'ui/query.html' +DEFAULT_SEARCH = 'ui/search.html' +DEFAULT_INDEX = 'ui/index.html' +DEFAULT_ERROR = 'ui/error.html' + + #================================================================= ## Reference non-YAML config #================================================================= @@ -39,10 +47,11 @@ def pywb_config_manual(config = {}): wb_handler = config_utils.create_wb_handler( cdx_source = cdx_source, archive_paths = route_config.get('archive_paths', './sample_archive/warcs/'), - head_html = route_config.get('head_insert_html'), - query_html = route_config.get('query_html'), - search_html = route_config.get('search_html'), - static_path = route_config.get('static_path', hostpaths[0] + 'static/') + head_html = route_config.get('head_insert_html', DEFAULT_HEAD_INSERT), + query_html = route_config.get('query_html', DEFAULT_QUERY), + search_html = route_config.get('search_html', DEFAULT_SEARCH), + + static_path = hostpaths[0] + route_config.get('static_path', 'static/') ) logging.info('Adding Collection: ' + name) @@ -56,6 +65,12 @@ def pywb_config_manual(config = {}): if config.get('debug_echo_req', False): routes.append(archivalrouter.Route('echo_req', handlers.DebugEchoHandler())) + + static_routes = config.get('static_routes', {'static': 'static/'}) + + for static_name, static_path in static_routes.iteritems(): + routes.append(archivalrouter.Route(static_name, handlers.StaticHandler(static_path))) + # Check for new proxy mode! if config.get('enable_http_proxy', False): router = proxy.ProxyArchivalRouter @@ -70,8 +85,8 @@ def pywb_config_manual(config = {}): # (See archivalrouter.ReferRedirect) hostpaths = hostpaths, - home_view = config_utils.load_template_file(config.get('home_html'), 'Home Page'), - error_view = config_utils.load_template_file(config.get('error_html'), 'Error Page') + home_view = config_utils.load_template_file(config.get('home_html', DEFAULT_INDEX), 'Home Page'), + error_view = config_utils.load_template_file(config.get('error_html', DEFAULT_ERROR), 'Error Page') ) diff --git a/static/wb.css b/pywb/static/wb.css similarity index 100% rename from static/wb.css rename to pywb/static/wb.css diff --git a/static/wb.js b/pywb/static/wb.js similarity index 100% rename from static/wb.js rename to pywb/static/wb.js diff --git a/ui/error.html b/pywb/ui/error.html similarity index 100% rename from ui/error.html rename to pywb/ui/error.html diff --git a/ui/head_insert.html b/pywb/ui/head_insert.html similarity index 100% rename from ui/head_insert.html rename to pywb/ui/head_insert.html diff --git a/ui/index.html b/pywb/ui/index.html similarity index 100% rename from ui/index.html rename to pywb/ui/index.html diff --git a/ui/query.html b/pywb/ui/query.html similarity index 100% rename from ui/query.html rename to pywb/ui/query.html diff --git a/ui/search.html b/pywb/ui/search.html similarity index 100% rename from ui/search.html rename to pywb/ui/search.html diff --git a/pywb/views.py b/pywb/views.py index 75356f8e..ec6d5c51 100644 --- a/pywb/views.py +++ b/pywb/views.py @@ -6,7 +6,7 @@ import time from os import path from itertools import imap -from jinja2 import Environment, FileSystemLoader +from jinja2 import Environment, FileSystemLoader, PackageLoader #================================================================= @@ -31,7 +31,12 @@ class J2TemplateView: def make_jinja_env(self, template_dir): - jinja_env = Environment(loader = FileSystemLoader(template_dir), trim_blocks = True) + if template_dir.startswith('.') or template_dir.startswith('file://'): + loader = FileSystemLoader(template_dir) + else: + loader = PackageLoader(__package__, template_dir) + + jinja_env = Environment(loader = loader, trim_blocks = True) jinja_env.filters['format_ts'] = J2TemplateView.format_ts return jinja_env diff --git a/pywb/wbrequestresponse.py b/pywb/wbrequestresponse.py index 8449f588..2bdc8bbb 100644 --- a/pywb/wbrequestresponse.py +++ b/pywb/wbrequestresponse.py @@ -75,14 +75,17 @@ class WbRequest: self.wb_prefix = wb_prefix if not use_abs_prefix else WbRequest.make_abs_prefix(env, wb_prefix) + if not wb_url_str: + wb_url_str = '/' + # wb_url present and not root page - if wb_url_str != '/' and wb_url_str != '' and wburl_class: + if wb_url_str != '/' and wburl_class: self.wb_url_str = wb_url_str self.wb_url = wburl_class(wb_url_str) self.urlrewriter = url_rewriter_class(self.wb_url, self.wb_prefix) else: - # no wb_url, just store blank - self.wb_url_str = '/' + # no wb_url, just store blank wb_url + self.wb_url_str = wb_url_str self.wb_url = None self.urlrewriter = None diff --git a/run-tests.py b/run-tests.py index 550f871c..4782af2c 100644 --- a/run-tests.py +++ b/run-tests.py @@ -88,7 +88,14 @@ class TestWb: assert 'Mon, Jan 27 2014 17:12:51' in resp.body assert '/pywb/20140127171251/http://www.iana.org/domains/example' in resp.body - # XX: Doesn't work as webtest does not support proxy mode + def test_static_content(self): + resp = self.testapp.get('/test-static/wb.css') + assert resp.status_int == 200 + assert resp.content_type == 'text/css' + assert resp.content_length > 0 + + + # XX: Doesn't work as webtest does not support proxy mode # need a way to test #def test_proxy_replay(self): #resp = self.testapp.get('http://www.iana.org/domains/idn-tables') diff --git a/run.sh b/run.sh index ec2d1206..d6e484b9 100755 --- a/run.sh +++ b/run.sh @@ -12,7 +12,8 @@ mypath=$(cd `dirname $0` && pwd) app="pywb.wbapp" -params="--static-map /static=$mypath/static --http-socket :8080 -b 65536" +params="--http-socket :8080 -b 65536" +#params="--static-map /static=$mypath/static --http-socket :8080 -b 65536" if [ -z "$1" ]; then # Standard root config diff --git a/setup.py b/setup.py index ac46c007..74cb604b 100755 --- a/setup.py +++ b/setup.py @@ -11,6 +11,8 @@ setuptools.setup(name='pywb', long_description=open('README.md').read(), license='GPL', packages=['pywb'], + provides=['pywb'], + package_data={'pywb': ['ui/*', 'static/*']}, install_requires=['uwsgi', 'rfc3987', 'chardet', 'redis', 'jinja2', 'surt', 'pyyaml', 'WebTest'], tests_require=['WebTest', 'pytest'], zip_safe=False) diff --git a/test_config.yaml b/test_config.yaml index 8227c091..45226cc9 100644 --- a/test_config.yaml +++ b/test_config.yaml @@ -38,27 +38,27 @@ archive_paths: ./sample_archive/warcs/ # ==== Optional UI: HTML/Jinja2 Templates ==== # template for insert into replayed html content -head_insert_html: ./ui/head_insert.html +head_insert_html: ui/head_insert.html # template to for 'calendar' query, # eg, a listing of captures in response to a ../*/ # # may be a simple listing or a more complex 'calendar' UI # if omitted, will list raw cdx in plain text -query_html: ./ui/query.html +query_html: ui/query.html # template for search page, which is displayed when no search url is entered # in a collection -search_html: ./ui/search.html +search_html: ui/search.html # template for home page. # if no other route is set, this will be rendered at /, /index.htm and /index.html -home_html: ./ui/index.html +home_html: ui/index.html # error page temlpate for may formatting error message and details # if omitted, a text response is returned -error_html: ./ui/error.html +error_html: ui/error.html # ==== Other Paths ==== @@ -70,11 +70,13 @@ error_html: ./ui/error.html # to http://localhost:8080/pywb/image.gif # -hostpaths: ['http://localhost:8080/'] +#hostpaths: ['http://localhost:8080/'] + +# List of route names: +# : +static_routes: + test-static: static/ -# Custom path for serving html content -# Default is hostname[0] + '/static/' -#static_path: /static/ # ==== New / Experimental Settings ==== # Not yet production ready -- used primarily for testing