From 304ddbec847316839a26ce72e08b7edbd9a09f72 Mon Sep 17 00:00:00 2001
From: Ilya Kreymer
Date: Fri, 31 Jan 2014 10:04:21 -0800
Subject: [PATCH] Support for new UI, as per #16 * Refactor views class to
support more Jinja2 views (J2Template) * Add a home page, collection search
page, and error pages, all optional * all exceptions appear on error page *
wbrequest supports a request with an empty or / wb_url
---
README.md | 4 +-
config.yaml | 10 ++++
pywb/archivalrouter.py | 32 ++++++++--
pywb/cdxserve.py | 5 +-
pywb/handlers.py | 36 +++++++++---
pywb/indexreader.py | 13 ++++-
pywb/pywb_init.py | 34 +++++++----
pywb/replay_resolvers.py | 36 ++++++++----
pywb/replay_views.py | 14 ++---
pywb/views.py | 119 ++++++++++++++++++++------------------
pywb/wbapp.py | 50 ++++++----------
pywb/wbexceptions.py | 23 +++++---
pywb/wbrequestresponse.py | 9 ++-
pywb/wburl.py | 4 +-
ui/error.html | 11 ++++
ui/head_insert.html | 2 +-
ui/index.html | 9 +++
ui/query.html | 12 +++-
ui/search.html | 6 ++
19 files changed, 281 insertions(+), 148 deletions(-)
create mode 100644 ui/error.html
create mode 100644 ui/index.html
create mode 100644 ui/search.html
diff --git a/README.md b/README.md
index 9424ca9b..d1f6979a 100644
--- a/README.md
+++ b/README.md
@@ -27,6 +27,8 @@ Ex: The [Internet Archive Wayback Machine](https//archive.org/web/) has urls of
A listing of archived content, often in calendar form, is available when a `*` is used instead of timestamp.
+The Wayback Machine uses an html parser to rewrite relative and absolute links, as well as absolute links found in javascript, css and some xml.
+
pywb uses this interface as a starting point.
@@ -36,7 +38,7 @@ pywb currently works best with 2.7.x
It should run in a standard WSGI container, although currently
tested primarily with uWSGI 1.9 and 2.0
-Support for other versions of Python 3 is planned.
+Support for Python 3 is planned.
### Installation
diff --git a/config.yaml b/config.yaml
index ad36bcf3..a2cc6842 100644
--- a/config.yaml
+++ b/config.yaml
@@ -52,6 +52,9 @@ routes:
# if omitted, the capture listing lists raw index
calendar_html_template: ./ui/query.html
+ # ui: optional Jinja2 template to use for 'search' page
+ # this page is displayed when no search url is entered
+ search_html_template: ./ui/search.html
# list of host names that pywb will be running from to detect
# 'fallthrough' requests based on referrer
@@ -63,6 +66,13 @@ routes:
hostpaths: ['http://localhost:8080/']
+# ui: optional Jinja2 template for home page
+# if no other route is set to home page, this template will
+# be rendered at /, /index.htm and /index.html
+home_html_template: ./ui/index.html
+# ui: optional Jinja2 template for rendering any errors
+# the error page may print a detailed error message
+error_html_template: ./ui/error.html
diff --git a/pywb/archivalrouter.py b/pywb/archivalrouter.py
index 869408df..398d9c5c 100644
--- a/pywb/archivalrouter.py
+++ b/pywb/archivalrouter.py
@@ -1,5 +1,6 @@
import urlparse
import re
+import wbexceptions
from wbrequestresponse import WbRequest, WbResponse
from url_rewriter import UrlRewriter
@@ -9,25 +10,39 @@ from wburl import WbUrl
# ArchivalRequestRouter -- route WB requests in archival mode
#=================================================================
class ArchivalRequestRouter:
- def __init__(self, handlers, hostpaths = None, abs_path = True, archivalurl_class = WbUrl):
- self.handlers = handlers
+ def __init__(self, routes, hostpaths = None, abs_path = True, archivalurl_class = WbUrl, homepage = None, errorpage = None):
+ self.routes = routes
self.fallback = ReferRedirect(hostpaths)
self.abs_path = abs_path
self.archivalurl_class = archivalurl_class
+ self.homepage = homepage
+ self.errorpage = errorpage
+
def __call__(self, env):
- for handler in self.handlers:
- result = handler(env, self.abs_path, self.archivalurl_class)
+ for route in self.routes:
+ result = route(env, self.abs_path, self.archivalurl_class)
if result:
return result
+ # Home Page
+ if env['REL_REQUEST_URI'] in ['/', '/index.html', '/index.htm']:
+ return self.render_homepage()
+
if not self.fallback:
return None
return self.fallback(WbRequest.from_uri(None, env))
-
+ def render_homepage(self):
+ # render the homepage!
+ if self.homepage:
+ return self.homepage.render_response(routes = self.routes)
+ else:
+ # default home page template
+ text = '\n'.join(map(str, self.routes))
+ return WbResponse.text_response(text)
#=================================================================
# Route by matching regex (or fixed prefix)
@@ -36,10 +51,11 @@ class ArchivalRequestRouter:
class Route:
# match upto next slash
- SLASH_LOOKAHEAD ='(?=/)'
+ SLASH_LOOKAHEAD ='(?=/|$)'
def __init__(self, regex, handler, coll_group = 0, lookahead = SLASH_LOOKAHEAD):
+ self.path = regex
self.regex = re.compile(regex + lookahead)
self.handler = handler
# collection id from regex group (default 0)
@@ -83,6 +99,10 @@ class Route:
def _handle_request(self, wbrequest):
return self.handler(wbrequest)
+ def __str__(self):
+ #return '* ' + self.regex_str + ' => ' + str(self.handler)
+ return str(self.handler)
+
#=================================================================
# ReferRedirect -- redirect urls that have 'fallen through' based on the referrer settings
diff --git a/pywb/cdxserve.py b/pywb/cdxserve.py
index e4922516..752b26f9 100644
--- a/pywb/cdxserve.py
+++ b/pywb/cdxserve.py
@@ -114,6 +114,9 @@ def cdx_reverse(cdx_iter, limit):
>>> test_cdx('org,iana)/_js/2013.1/jquery.js', reverse = True, resolve_revisits = True, limit = 1)
org,iana)/_js/2013.1/jquery.js 20140126201307 https://www.iana.org/_js/2013.1/jquery.js application/x-javascript 200 AAW2RS7JB7HTF666XNZDQYJFA6PDQBPO - - 543 778507 iana.warc.gz 33449 7311 iana.warc.gz
+
+ # no match, single result
+ >>> test_cdx('org,iana)/dont_have_this', reverse = True, resolve_revisits = True, limit = 1)
"""
# optimize for single last
@@ -123,7 +126,7 @@ def cdx_reverse(cdx_iter, limit):
for cdx in cdx_iter:
last = cdx
- return [last]
+ return [last] if last else []
reverse_cdxs = deque(maxlen = limit)
diff --git a/pywb/handlers.py b/pywb/handlers.py
index 2e2e77fe..6c7026c4 100644
--- a/pywb/handlers.py
+++ b/pywb/handlers.py
@@ -2,34 +2,52 @@ import views
import utils
import urlparse
+from wbrequestresponse import WbResponse
+
#=================================================================
# Standard WB Handler
#=================================================================
class WBHandler:
- def __init__(self, cdx_reader, replay, html_view = None):
+ def __init__(self, cdx_reader, replay, capturespage = None, searchpage = None):
self.cdx_reader = cdx_reader
self.replay = replay
- self.html_view = html_view
- self.text_view = views.TextQueryView()
+
+ self.text_view = views.TextCapturesView()
+ self.html_view = capturespage
+ self.searchpage = searchpage
+
def __call__(self, wbrequest):
+
+ if wbrequest.wb_url_str == '/':
+ return self.render_searchpage(wbrequest)
+
with utils.PerfTimer(wbrequest.env.get('X_PERF'), 'query') as t:
cdx_lines = self.cdx_reader.load_for_request(wbrequest, parsed_cdx = True)
# new special modifier to always show cdx index
if wbrequest.wb_url.mod == 'cdx_':
- return self.text_view(wbrequest, cdx_lines)
+ return self.text_view.render_response(wbrequest, cdx_lines)
if (wbrequest.wb_url.type == wbrequest.wb_url.QUERY) or (wbrequest.wb_url.type == wbrequest.wb_url.URL_QUERY):
- if not self.html_view:
- return self.text_view(wbrequest, cdx_lines)
- else:
- return self.html_view(wbrequest, cdx_lines)
+ query_view = self.html_view if self.html_view else self.text_view
+ return query_view.render_response(wbrequest, cdx_lines)
with utils.PerfTimer(wbrequest.env.get('X_PERF'), 'replay') as t:
return self.replay(wbrequest, cdx_lines, self.cdx_reader)
+ def render_searchpage(self, wbrequest):
+ if self.searchpage:
+ return self.searchpage.render_response(wbrequest = wbrequest)
+ else:
+ return WbResponse.text_response('No Lookup Url Specified')
+
+
+ def __str__(self):
+ return 'WBHandler: ' + str(self.cdx_reader) + ', ' + str(self.replay)
+
+
#=================================================================
# CDX-Server Handler -- pass all params to cdx server
@@ -37,7 +55,7 @@ class WBHandler:
class CDXHandler:
def __init__(self, cdx_reader, view = None):
self.cdx_reader = cdx_reader
- self.view = view if view else views.TextQueryView()
+ self.view = view if view else views.TextCapturesView()
def __call__(self, wbrequest):
url = wbrequest.wb_url.url
diff --git a/pywb/indexreader.py b/pywb/indexreader.py
index c2049fff..1daacc82 100644
--- a/pywb/indexreader.py
+++ b/pywb/indexreader.py
@@ -83,7 +83,10 @@ class LocalCDXServer(IndexReader):
def load_cdx(self, url, params = {}, parsed_cdx = True, **kwvalues):
# canonicalize to surt (canonicalization is part of surt conversion)
- key = surt.surt(url)
+ try:
+ key = surt.surt(url)
+ except Exception as e:
+ raise wbexceptions.BadUrlException('Bad Request Url: ' + url)
# if not surt, unsurt the surt to get canonicalized non-surt url
if not self.surt_ordered:
@@ -123,6 +126,10 @@ class LocalCDXServer(IndexReader):
}[wburl.type]
+ def __str__(self):
+ return 'load cdx indexes from ' + str(self.sources)
+
+
#=================================================================
class RemoteCDXServer(IndexReader):
@@ -196,6 +203,10 @@ class RemoteCDXServer(IndexReader):
}[wburl.type]
+ def __str__(self):
+ return 'server cdx from ' + self.server_url
+
+
#=================================================================
class CDXCaptureResult(OrderedDict):
CDX_FORMATS = [
diff --git a/pywb/pywb_init.py b/pywb/pywb_init.py
index 1690c64c..ea21668e 100644
--- a/pywb/pywb_init.py
+++ b/pywb/pywb_init.py
@@ -39,13 +39,13 @@ def pywb_config_manual():
prefixes = [replay_resolvers.PrefixResolver(test_dir + 'warcs/')]
# Jinja2 head insert
- head_insert = views.J2HeadInsertView('./ui/head_insert.html')
+ head_insert = views.J2TemplateView('./ui/head_insert.html')
# Create rewriting replay handler to rewrite records
- replayer = replay_views.RewritingReplayView(resolvers = prefixes, archiveloader = aloader, head_insert = head_insert, buffer_response = True)
+ replayer = replay_views.RewritingReplayView(resolvers = prefixes, archiveloader = aloader, head_insert_view = head_insert, buffer_response = True)
# Create Jinja2 based html query view
- html_view = views.J2QueryView('./ui/query.html')
+ html_view = views.J2HtmlCapturesView('./ui/query.html')
# WB handler which uses the index reader, replayer, and html_view
wb_handler = handlers.WBHandler(indexs, replayer, html_view)
@@ -81,11 +81,21 @@ def pywb_config(config_file = None):
routes = map(yaml_parse_route, config['routes'])
+ homepage = yaml_load_template(config, 'home_html_template', 'Home Page Template')
+ errorpage = yaml_load_template(config, 'error_html_template', 'Error Page Template')
+
hostpaths = config.get('hostpaths', ['http://localhost:8080/'])
- return ArchivalRequestRouter(routes, hostpaths)
+ return ArchivalRequestRouter(routes, hostpaths, homepage = homepage, errorpage = errorpage)
+def yaml_load_template(config, name, desc = None):
+ file = config.get(name)
+ if file:
+ logging.info('Adding {0}: {1}'.format(desc if desc else name, file))
+ file = views.J2TemplateView(file)
+ return file
+
def yaml_parse_index_loader(config):
@@ -113,17 +123,19 @@ def yaml_parse_index_loader(config):
return indexreader.LocalCDXServer([uri])
+
+
def yaml_parse_head_insert(config):
# First, try a template file
head_insert_file = config.get('head_insert_html_template')
if head_insert_file:
logging.info('Adding Head-Insert Template: ' + head_insert_file)
- return views.J2HeadInsertView(head_insert_file)
+ return views.J2TemplateView(head_insert_file)
# Then, static head_insert text
head_insert_text = config.get('head_insert_text', '')
- logging.info('Adding Head-Insert Text: ' + head_insert_text)
- return head_insert_text
+ logging.info('Adding Head-Insert Text: ' + head_insert_text)
+ return views.StaticTextView(head_insert_text)
def yaml_parse_calendar_view(config):
@@ -133,7 +145,7 @@ def yaml_parse_calendar_view(config):
else:
logging.info('No HTML Calendar View Present')
- return views.J2QueryView(html_view_file) if html_view_file else None
+ return views.J2HtmlCapturesView(html_view_file) if html_view_file else None
@@ -150,12 +162,14 @@ def yaml_parse_route(config):
replayer = replay_views.RewritingReplayView(resolvers = archive_resolvers,
archiveloader = archive_loader,
- head_insert = head_insert,
+ head_insert_view = head_insert,
buffer_response = config.get('buffer_response', False))
html_view = yaml_parse_calendar_view(config)
- wb_handler = handlers.WBHandler(index_loader, replayer, html_view)
+ searchpage = yaml_load_template(config, 'search_html_template', 'Search Page Template')
+
+ wb_handler = handlers.WBHandler(index_loader, replayer, html_view, searchpage = searchpage)
return Route(name, wb_handler)
diff --git a/pywb/replay_resolvers.py b/pywb/replay_resolvers.py
index d87951e6..3f44ce33 100644
--- a/pywb/replay_resolvers.py
+++ b/pywb/replay_resolvers.py
@@ -16,6 +16,13 @@ class PrefixResolver:
def __call__(self, filename):
return [self.prefix + filename] if (self.contains in filename) else []
+ def __repr__(self):
+ if self.contains:
+ return "PrefixResolver('{0}', contains = '{1}')".format(self.prefix, self.contains)
+ else:
+ return "PrefixResolver('{0}')".format(self.prefix)
+
+
#======================================
class RedisResolver:
def __init__(self, redis_url, key_prefix = 'w:'):
@@ -31,9 +38,14 @@ class RedisResolver:
print e
return None
+ def __repr__(self):
+ return "RedisResolver('{0}')".format(self.redis_url)
+
+
#======================================
class PathIndexResolver:
def __init__(self, pathindex_file):
+ self.pathindex_file = pathindex_file
self.reader = binsearch.FileReader(pathindex_file)
def __call__(self, filename):
@@ -47,27 +59,32 @@ class PathIndexResolver:
return gen_list(result)
+ def __repr__(self):
+ return "PathIndexResolver('{0}')".format(self.pathindex_file)
+
#TODO: more options (remote files, contains param, etc..)
# find best resolver given the path
def make_best_resolver(path):
"""
# http path
- >>> class_name(make_best_resolver('http://myhost.example.com/warcs/'))
- 'PrefixResolver'
+ >>> make_best_resolver('http://myhost.example.com/warcs/')
+ PrefixResolver('http://myhost.example.com/warcs/')
# redis path
- >>> class_name(make_best_resolver('redis://myhost.example.com:1234/1'))
- 'RedisResolver'
+ >>> make_best_resolver('redis://myhost.example.com:1234/1')
+ RedisResolver('redis://myhost.example.com:1234/1')
# a file
- >>> class_name(make_best_resolver('file://' + os.path.realpath(__file__)))
- 'PathIndexResolver'
+ >>> make_best_resolver('file://' + os.path.dirname(os.path.realpath(__file__)) + '/replay_resolvers.py')
+ PathIndexResolver('/home/ilya/workspace/pywb/pywb/replay_resolvers.py')
# a dir
- >>> class_name(make_best_resolver('file://' + os.path.dirname(os.path.realpath(__file__))))
- 'PrefixResolver'
+ >>> make_best_resolver('file://' + os.path.dirname(os.path.realpath(__file__)))
+ PrefixResolver('/home/ilya/workspace/pywb/pywb')
+
"""
+
url_parts = urlparse.urlsplit(path)
if url_parts.scheme == 'redis':
@@ -90,9 +107,6 @@ import utils
#=================================================================
if __name__ == "__main__" or utils.enable_doctests():
- def class_name(obj):
- return obj.__class__.__name__
-
import doctest
doctest.testmod()
diff --git a/pywb/replay_views.py b/pywb/replay_views.py
index 8904ccfc..45ea3b7b 100644
--- a/pywb/replay_views.py
+++ b/pywb/replay_views.py
@@ -210,12 +210,15 @@ class ReplayView:
stream.close()
+ def __str__(self):
+ return 'find archive files from ' + str(self.resolvers)
+
#=================================================================
class RewritingReplayView(ReplayView):
- def __init__(self, resolvers, archiveloader, head_insert = None, header_rewriter = None, redir_to_exact = True, buffer_response = False):
+ def __init__(self, resolvers, archiveloader, head_insert_view = None, header_rewriter = None, redir_to_exact = True, buffer_response = False):
ReplayView.__init__(self, resolvers, archiveloader)
- self.head_insert = head_insert
+ self.head_insert_view = head_insert_view
self.header_rewriter = header_rewriter if header_rewriter else HeaderRewriter()
self.redir_to_exact = redir_to_exact
@@ -300,12 +303,7 @@ class RewritingReplayView(ReplayView):
status_headers = rewritten_headers.status_headers
if text_type == 'html':
- # Support head_insert func
- if hasattr(self.head_insert, '__call__'):
- head_insert_str = self.head_insert(wbrequest, response.cdx)
- else:
- head_insert_str = str(self.head_insert)
-
+ head_insert_str = self.head_insert_view.render_to_string(wbrequest = wbrequest, cdx = response.cdx) if self.head_insert_view else None
rewriter = html_rewriter.HTMLRewriter(urlrewriter, outstream = None, head_insert = head_insert_str)
elif text_type == 'css':
rewriter = regex_rewriters.CSSRewriter(urlrewriter)
diff --git a/pywb/views.py b/pywb/views.py
index 380e427f..a12753af 100644
--- a/pywb/views.py
+++ b/pywb/views.py
@@ -10,63 +10,72 @@ from jinja2 import Environment, FileSystemLoader
#=================================================================
-class TextQueryView:
- def __call__(self, wbrequest, cdx_lines):
+class StaticTextView:
+ def __init__(self, text):
+ self.text = text
+
+ def render_to_string(self, **kwargs):
+ return self.text
+
+ def render_response(self, **kwargs):
+ return wbrequestresponse.WbResponse.text_stream(self.text)
+
+#=================================================================
+class J2TemplateView:
+ def __init__(self, filename):
+ template_dir, template_file = path.split(filename)
+
+ self.template_file = template_file
+
+ self.jinja_env = self.make_jinja_env(template_dir)
+
+
+ def make_jinja_env(self, template_dir):
+ jinja_env = Environment(loader = FileSystemLoader(template_dir), trim_blocks = True)
+ jinja_env.filters['format_ts'] = J2TemplateView.format_ts
+ return jinja_env
+
+ def render_to_string(self, **kwargs):
+ template = self.jinja_env.get_template(self.template_file)
+
+ template_result = template.render(**kwargs)
+
+ return template_result
+
+ def render_response(self, **kwargs):
+ template_result = self.render_to_string(**kwargs)
+ return wbrequestresponse.WbResponse.text_response(str(template_result), content_type = 'text/html; charset=utf-8')
+
+
+ # Filters
+ @staticmethod
+ def format_ts(value, format='%a, %b %d %Y %H:%M:%S'):
+ value = utils.timestamp_to_datetime(value)
+ return time.strftime(format, value)
+
+
+
+
+# cdx index view
+
+#=================================================================
+# html captures 'calendar' view
+#=================================================================
+class J2HtmlCapturesView(J2TemplateView):
+ def render_response(self, wbrequest, cdx_lines):
+ return J2TemplateView.render_response(self,
+ cdx_lines = list(cdx_lines),
+ url = wbrequest.wb_url.url,
+ prefix = wbrequest.wb_prefix)
+
+
+#=================================================================
+# stream raw cdx text
+#=================================================================
+class TextCapturesView:
+ def render_response(self, wbrequest, cdx_lines):
cdx_lines = imap(lambda x: str(x) + '\n', cdx_lines)
return wbrequestresponse.WbResponse.text_stream(cdx_lines)
-#=================================================================
-class J2QueryView:
- def __init__(self, filename, buffer_index = True):
- template_dir, template_file = path.split(filename)
-
- self.template_file = template_file
- self.buffer_index = buffer_index
-
- self.jinja_env = make_jinja_env(template_dir)
- def __call__(self, wbrequest, cdx_lines):
- template = self.jinja_env.get_template(self.template_file)
-
- # buffer/convert to list so we have length available for template
- if self.buffer_index:
- cdx_lines = list(cdx_lines)
-
- response = template.render(cdx_lines = cdx_lines,
- url = wbrequest.wb_url.url,
- prefix = wbrequest.wb_prefix)
-
- return wbrequestresponse.WbResponse.text_response(str(response), content_type = 'text/html')
-
-
-#=================================================================
-# Render the head insert (eg. banner)
-#=================================================================
-class J2HeadInsertView:
- def __init__(self, filename, buffer_index = True):
- template_dir, template_file = path.split(filename)
- self.template_file = template_file
-
- self.jinja_env = make_jinja_env(template_dir)
-
-
- def __call__(self, wbrequest, cdx):
- template = self.jinja_env.get_template(self.template_file)
-
-
- return template.render(wbrequest = wbrequest,cdx = cdx)
-
-
-
-#=================================================================
-# Jinja funcs
-def make_jinja_env(template_dir):
- jinja_env = Environment(loader = FileSystemLoader(template_dir), trim_blocks = True)
- jinja_env.filters['format_ts'] = format_ts
- return jinja_env
-
-# Filters
-def format_ts(value, format='%H:%M / %d-%m-%Y'):
- value = utils.timestamp_to_datetime(value)
- return time.strftime(format, value)
diff --git a/pywb/wbapp.py b/pywb/wbapp.py
index 14857003..3dc5ecee 100644
--- a/pywb/wbapp.py
+++ b/pywb/wbapp.py
@@ -8,29 +8,6 @@ import importlib
import logging
-
-## ===========
-'''
-
-To declare Wayback with one collection, `mycoll`
-and will be accessed by user at:
-
-`http://mywb.example.com:8080/mycoll/`
-
-and will load cdx from cdx server running at:
-
-`http://cdx.example.com/cdx`
-
-and look for warcs at paths:
-
-`http://warcs.example.com/servewarc/` and
-`http://warcs.example.com/anotherpath/`,
-
-one could declare a `sample_wb_settings()` method as follows
-'''
-
-
-
def create_wb_app(wb_router):
# Top-level wsgi application
@@ -52,14 +29,13 @@ def create_wb_app(wb_router):
response = WbResponse(StatusAndHeaders(ir.status, ir.httpHeaders))
except (wbexceptions.NotFoundException, wbexceptions.AccessException) as e:
- logging.info(str(e))
- response = handle_exception(env, e)
+ response = handle_exception(env, wb_router.errorpage, e, False)
+
+ except wbexceptions.WbException as wbe:
+ response = handle_exception(env, wb_router.errorpage, wbe, False)
except Exception as e:
- last_exc = e
- import traceback
- traceback.print_exc()
- response = handle_exception(env, e)
+ response = handle_exception(env, wb_router.errorpage, e, True)
return response(env, start_response)
@@ -67,13 +43,25 @@ def create_wb_app(wb_router):
return application
-def handle_exception(env, exc):
+def handle_exception(env, errorpage, exc, print_trace):
if hasattr(exc, 'status'):
status = exc.status()
else:
status = '400 Bad Request'
- return WbResponse.text_response(status + ' Error: ' + str(exc), status = status)
+ if print_trace:
+ import traceback
+ err_details = traceback.format_exc(exc)
+ print err_details
+ else:
+ logging.info(str(exc))
+ err_details = None
+
+ if errorpage:
+ import traceback
+ return errorpage.render_response(err_msg = str(exc), err_details = err_details)
+ else:
+ return WbResponse.text_response(status + ' Error: ' + str(exc), status = status)
#=================================================================
diff --git a/pywb/wbexceptions.py b/pywb/wbexceptions.py
index e1c9a5bf..06be06bf 100644
--- a/pywb/wbexceptions.py
+++ b/pywb/wbexceptions.py
@@ -1,26 +1,33 @@
-class RequestParseException(Exception):
+class WbException(Exception):
+ pass
+
+class RequestParseException(WbException):
+ def __init__(self, string, to_parse):
+ WbException.__init__(self, string + to_parse)
+ self.to_parse = to_parse
+
def status(_):
return '400 Bad Request'
-class BadUrlException(Exception):
+class BadUrlException(WbException):
def status(_):
return '400 Bad Request'
-class AccessException(Exception):
+class AccessException(WbException):
def status(_):
return '403 Forbidden'
-class InvalidCDXException(Exception):
+class InvalidCDXException(WbException):
def status(_):
return '500 Internal Server Error'
-class NotFoundException(Exception):
+class NotFoundException(WbException):
def status(_):
return '404 Not Found'
# Exceptions that effect a specific capture and result in a retry
-class CaptureException(Exception):
+class CaptureException(WbException):
def status(_):
return '500 Internal Server Error'
@@ -47,9 +54,9 @@ class ArchiveLoadFailed(CaptureException):
def status(_):
return '503 Service Unavailable'
-class InternalRedirect(Exception):
+class InternalRedirect(WbException):
def __init__(self, location, status = '302 Internal Redirect'):
- Exception.__init__(self, 'Redirecting -> ' + location)
+ WbException.__init__(self, 'Redirecting -> ' + location)
self.status = status
self.httpHeaders = [('Location', location)]
diff --git a/pywb/wbrequestresponse.py b/pywb/wbrequestresponse.py
index e19ae361..a09db184 100644
--- a/pywb/wbrequestresponse.py
+++ b/pywb/wbrequestresponse.py
@@ -68,7 +68,14 @@ class WbRequest:
self.wb_prefix = wb_prefix if not use_abs_prefix else WbRequest.make_abs_prefix(env, wb_prefix)
- self.wb_url = archivalurl_class(wb_url)
+ # wb_url present and not root page
+ if wb_url != '/' and wb_url != '' and archivalurl_class:
+ self.wb_url_str = wb_url
+ self.wb_url = archivalurl_class(wb_url)
+ else:
+ # no wb_url, just store blank
+ self.wb_url_str = '/'
+ self.wb_url = None
self.coll = coll
diff --git a/pywb/wburl.py b/pywb/wburl.py
index 431dc786..a980545f 100644
--- a/pywb/wburl.py
+++ b/pywb/wburl.py
@@ -82,10 +82,10 @@ class WbUrl:
self.mod = ''
if not any (f(url) for f in [self._init_query, self._init_replay]):
- raise wbexceptions.RequestParseException('Invalid WB Request Url: ' + url)
+ raise wbexceptions.RequestParseException('Invalid WB Request Url: ', url)
if len(self.url) == 0:
- raise wbexceptions.RequestParseException('Invalid WB Request Url: ' + url)
+ raise wbexceptions.RequestParseException('Invalid WB Request Url: ', url)
# protocol agnostic url -> http://
if self.url.startswith('//'):
diff --git a/ui/error.html b/ui/error.html
new file mode 100644
index 00000000..b3a8c478
--- /dev/null
+++ b/ui/error.html
@@ -0,0 +1,11 @@
+Pywb Error
+{{ err_msg }}
+
+{% if err_details %}
+Error Details:
+
+
+{{ err_details }}
+
+
+{% endif %}
diff --git a/ui/head_insert.html b/ui/head_insert.html
index 671f6fc5..3af55ad5 100644
--- a/ui/head_insert.html
+++ b/ui/head_insert.html
@@ -1,7 +1,7 @@
diff --git a/ui/index.html b/ui/index.html
new file mode 100644
index 00000000..22fd5637
--- /dev/null
+++ b/ui/index.html
@@ -0,0 +1,9 @@
+pywb Sample Home Page
+
+The following archive collections are available:
+
+
diff --git a/ui/query.html b/ui/query.html
index 7d353436..11712fda 100644
--- a/ui/query.html
+++ b/ui/query.html
@@ -4,14 +4,20 @@
- * Unique captures are bold.
* Other captures are duplicates of a previous capture.
+
+ * Unique captures are bold. Other captures are duplicates of a previous capture.
+