From 304ddbec847316839a26ce72e08b7edbd9a09f72 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Fri, 31 Jan 2014 10:04:21 -0800 Subject: [PATCH] Support for new UI, as per #16 * Refactor views class to support more Jinja2 views (J2Template) * Add a home page, collection search page, and error pages, all optional * all exceptions appear on error page * wbrequest supports a request with an empty or / wb_url --- README.md | 4 +- config.yaml | 10 ++++ pywb/archivalrouter.py | 32 ++++++++-- pywb/cdxserve.py | 5 +- pywb/handlers.py | 36 +++++++++--- pywb/indexreader.py | 13 ++++- pywb/pywb_init.py | 34 +++++++---- pywb/replay_resolvers.py | 36 ++++++++---- pywb/replay_views.py | 14 ++--- pywb/views.py | 119 ++++++++++++++++++++------------------ pywb/wbapp.py | 50 ++++++---------- pywb/wbexceptions.py | 23 +++++--- pywb/wbrequestresponse.py | 9 ++- pywb/wburl.py | 4 +- ui/error.html | 11 ++++ ui/head_insert.html | 2 +- ui/index.html | 9 +++ ui/query.html | 12 +++- ui/search.html | 6 ++ 19 files changed, 281 insertions(+), 148 deletions(-) create mode 100644 ui/error.html create mode 100644 ui/index.html create mode 100644 ui/search.html diff --git a/README.md b/README.md index 9424ca9b..d1f6979a 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,8 @@ Ex: The [Internet Archive Wayback Machine](https//archive.org/web/) has urls of A listing of archived content, often in calendar form, is available when a `*` is used instead of timestamp. +The Wayback Machine uses an html parser to rewrite relative and absolute links, as well as absolute links found in javascript, css and some xml. + pywb uses this interface as a starting point. @@ -36,7 +38,7 @@ pywb currently works best with 2.7.x It should run in a standard WSGI container, although currently tested primarily with uWSGI 1.9 and 2.0 -Support for other versions of Python 3 is planned. +Support for Python 3 is planned. ### Installation diff --git a/config.yaml b/config.yaml index ad36bcf3..a2cc6842 100644 --- a/config.yaml +++ b/config.yaml @@ -52,6 +52,9 @@ routes: # if omitted, the capture listing lists raw index calendar_html_template: ./ui/query.html + # ui: optional Jinja2 template to use for 'search' page + # this page is displayed when no search url is entered + search_html_template: ./ui/search.html # list of host names that pywb will be running from to detect # 'fallthrough' requests based on referrer @@ -63,6 +66,13 @@ routes: hostpaths: ['http://localhost:8080/'] +# ui: optional Jinja2 template for home page +# if no other route is set to home page, this template will +# be rendered at /, /index.htm and /index.html +home_html_template: ./ui/index.html +# ui: optional Jinja2 template for rendering any errors +# the error page may print a detailed error message +error_html_template: ./ui/error.html diff --git a/pywb/archivalrouter.py b/pywb/archivalrouter.py index 869408df..398d9c5c 100644 --- a/pywb/archivalrouter.py +++ b/pywb/archivalrouter.py @@ -1,5 +1,6 @@ import urlparse import re +import wbexceptions from wbrequestresponse import WbRequest, WbResponse from url_rewriter import UrlRewriter @@ -9,25 +10,39 @@ from wburl import WbUrl # ArchivalRequestRouter -- route WB requests in archival mode #================================================================= class ArchivalRequestRouter: - def __init__(self, handlers, hostpaths = None, abs_path = True, archivalurl_class = WbUrl): - self.handlers = handlers + def __init__(self, routes, hostpaths = None, abs_path = True, archivalurl_class = WbUrl, homepage = None, errorpage = None): + self.routes = routes self.fallback = ReferRedirect(hostpaths) self.abs_path = abs_path self.archivalurl_class = archivalurl_class + self.homepage = homepage + self.errorpage = errorpage + def __call__(self, env): - for handler in self.handlers: - result = handler(env, self.abs_path, self.archivalurl_class) + for route in self.routes: + result = route(env, self.abs_path, self.archivalurl_class) if result: return result + # Home Page + if env['REL_REQUEST_URI'] in ['/', '/index.html', '/index.htm']: + return self.render_homepage() + if not self.fallback: return None return self.fallback(WbRequest.from_uri(None, env)) - + def render_homepage(self): + # render the homepage! + if self.homepage: + return self.homepage.render_response(routes = self.routes) + else: + # default home page template + text = '\n'.join(map(str, self.routes)) + return WbResponse.text_response(text) #================================================================= # Route by matching regex (or fixed prefix) @@ -36,10 +51,11 @@ class ArchivalRequestRouter: class Route: # match upto next slash - SLASH_LOOKAHEAD ='(?=/)' + SLASH_LOOKAHEAD ='(?=/|$)' def __init__(self, regex, handler, coll_group = 0, lookahead = SLASH_LOOKAHEAD): + self.path = regex self.regex = re.compile(regex + lookahead) self.handler = handler # collection id from regex group (default 0) @@ -83,6 +99,10 @@ class Route: def _handle_request(self, wbrequest): return self.handler(wbrequest) + def __str__(self): + #return '* ' + self.regex_str + ' => ' + str(self.handler) + return str(self.handler) + #================================================================= # ReferRedirect -- redirect urls that have 'fallen through' based on the referrer settings diff --git a/pywb/cdxserve.py b/pywb/cdxserve.py index e4922516..752b26f9 100644 --- a/pywb/cdxserve.py +++ b/pywb/cdxserve.py @@ -114,6 +114,9 @@ def cdx_reverse(cdx_iter, limit): >>> test_cdx('org,iana)/_js/2013.1/jquery.js', reverse = True, resolve_revisits = True, limit = 1) org,iana)/_js/2013.1/jquery.js 20140126201307 https://www.iana.org/_js/2013.1/jquery.js application/x-javascript 200 AAW2RS7JB7HTF666XNZDQYJFA6PDQBPO - - 543 778507 iana.warc.gz 33449 7311 iana.warc.gz + + # no match, single result + >>> test_cdx('org,iana)/dont_have_this', reverse = True, resolve_revisits = True, limit = 1) """ # optimize for single last @@ -123,7 +126,7 @@ def cdx_reverse(cdx_iter, limit): for cdx in cdx_iter: last = cdx - return [last] + return [last] if last else [] reverse_cdxs = deque(maxlen = limit) diff --git a/pywb/handlers.py b/pywb/handlers.py index 2e2e77fe..6c7026c4 100644 --- a/pywb/handlers.py +++ b/pywb/handlers.py @@ -2,34 +2,52 @@ import views import utils import urlparse +from wbrequestresponse import WbResponse + #================================================================= # Standard WB Handler #================================================================= class WBHandler: - def __init__(self, cdx_reader, replay, html_view = None): + def __init__(self, cdx_reader, replay, capturespage = None, searchpage = None): self.cdx_reader = cdx_reader self.replay = replay - self.html_view = html_view - self.text_view = views.TextQueryView() + + self.text_view = views.TextCapturesView() + self.html_view = capturespage + self.searchpage = searchpage + def __call__(self, wbrequest): + + if wbrequest.wb_url_str == '/': + return self.render_searchpage(wbrequest) + with utils.PerfTimer(wbrequest.env.get('X_PERF'), 'query') as t: cdx_lines = self.cdx_reader.load_for_request(wbrequest, parsed_cdx = True) # new special modifier to always show cdx index if wbrequest.wb_url.mod == 'cdx_': - return self.text_view(wbrequest, cdx_lines) + return self.text_view.render_response(wbrequest, cdx_lines) if (wbrequest.wb_url.type == wbrequest.wb_url.QUERY) or (wbrequest.wb_url.type == wbrequest.wb_url.URL_QUERY): - if not self.html_view: - return self.text_view(wbrequest, cdx_lines) - else: - return self.html_view(wbrequest, cdx_lines) + query_view = self.html_view if self.html_view else self.text_view + return query_view.render_response(wbrequest, cdx_lines) with utils.PerfTimer(wbrequest.env.get('X_PERF'), 'replay') as t: return self.replay(wbrequest, cdx_lines, self.cdx_reader) + def render_searchpage(self, wbrequest): + if self.searchpage: + return self.searchpage.render_response(wbrequest = wbrequest) + else: + return WbResponse.text_response('No Lookup Url Specified') + + + def __str__(self): + return 'WBHandler: ' + str(self.cdx_reader) + ', ' + str(self.replay) + + #================================================================= # CDX-Server Handler -- pass all params to cdx server @@ -37,7 +55,7 @@ class WBHandler: class CDXHandler: def __init__(self, cdx_reader, view = None): self.cdx_reader = cdx_reader - self.view = view if view else views.TextQueryView() + self.view = view if view else views.TextCapturesView() def __call__(self, wbrequest): url = wbrequest.wb_url.url diff --git a/pywb/indexreader.py b/pywb/indexreader.py index c2049fff..1daacc82 100644 --- a/pywb/indexreader.py +++ b/pywb/indexreader.py @@ -83,7 +83,10 @@ class LocalCDXServer(IndexReader): def load_cdx(self, url, params = {}, parsed_cdx = True, **kwvalues): # canonicalize to surt (canonicalization is part of surt conversion) - key = surt.surt(url) + try: + key = surt.surt(url) + except Exception as e: + raise wbexceptions.BadUrlException('Bad Request Url: ' + url) # if not surt, unsurt the surt to get canonicalized non-surt url if not self.surt_ordered: @@ -123,6 +126,10 @@ class LocalCDXServer(IndexReader): }[wburl.type] + def __str__(self): + return 'load cdx indexes from ' + str(self.sources) + + #================================================================= class RemoteCDXServer(IndexReader): @@ -196,6 +203,10 @@ class RemoteCDXServer(IndexReader): }[wburl.type] + def __str__(self): + return 'server cdx from ' + self.server_url + + #================================================================= class CDXCaptureResult(OrderedDict): CDX_FORMATS = [ diff --git a/pywb/pywb_init.py b/pywb/pywb_init.py index 1690c64c..ea21668e 100644 --- a/pywb/pywb_init.py +++ b/pywb/pywb_init.py @@ -39,13 +39,13 @@ def pywb_config_manual(): prefixes = [replay_resolvers.PrefixResolver(test_dir + 'warcs/')] # Jinja2 head insert - head_insert = views.J2HeadInsertView('./ui/head_insert.html') + head_insert = views.J2TemplateView('./ui/head_insert.html') # Create rewriting replay handler to rewrite records - replayer = replay_views.RewritingReplayView(resolvers = prefixes, archiveloader = aloader, head_insert = head_insert, buffer_response = True) + replayer = replay_views.RewritingReplayView(resolvers = prefixes, archiveloader = aloader, head_insert_view = head_insert, buffer_response = True) # Create Jinja2 based html query view - html_view = views.J2QueryView('./ui/query.html') + html_view = views.J2HtmlCapturesView('./ui/query.html') # WB handler which uses the index reader, replayer, and html_view wb_handler = handlers.WBHandler(indexs, replayer, html_view) @@ -81,11 +81,21 @@ def pywb_config(config_file = None): routes = map(yaml_parse_route, config['routes']) + homepage = yaml_load_template(config, 'home_html_template', 'Home Page Template') + errorpage = yaml_load_template(config, 'error_html_template', 'Error Page Template') + hostpaths = config.get('hostpaths', ['http://localhost:8080/']) - return ArchivalRequestRouter(routes, hostpaths) + return ArchivalRequestRouter(routes, hostpaths, homepage = homepage, errorpage = errorpage) +def yaml_load_template(config, name, desc = None): + file = config.get(name) + if file: + logging.info('Adding {0}: {1}'.format(desc if desc else name, file)) + file = views.J2TemplateView(file) + return file + def yaml_parse_index_loader(config): @@ -113,17 +123,19 @@ def yaml_parse_index_loader(config): return indexreader.LocalCDXServer([uri]) + + def yaml_parse_head_insert(config): # First, try a template file head_insert_file = config.get('head_insert_html_template') if head_insert_file: logging.info('Adding Head-Insert Template: ' + head_insert_file) - return views.J2HeadInsertView(head_insert_file) + return views.J2TemplateView(head_insert_file) # Then, static head_insert text head_insert_text = config.get('head_insert_text', '') - logging.info('Adding Head-Insert Text: ' + head_insert_text) - return head_insert_text + logging.info('Adding Head-Insert Text: ' + head_insert_text) + return views.StaticTextView(head_insert_text) def yaml_parse_calendar_view(config): @@ -133,7 +145,7 @@ def yaml_parse_calendar_view(config): else: logging.info('No HTML Calendar View Present') - return views.J2QueryView(html_view_file) if html_view_file else None + return views.J2HtmlCapturesView(html_view_file) if html_view_file else None @@ -150,12 +162,14 @@ def yaml_parse_route(config): replayer = replay_views.RewritingReplayView(resolvers = archive_resolvers, archiveloader = archive_loader, - head_insert = head_insert, + head_insert_view = head_insert, buffer_response = config.get('buffer_response', False)) html_view = yaml_parse_calendar_view(config) - wb_handler = handlers.WBHandler(index_loader, replayer, html_view) + searchpage = yaml_load_template(config, 'search_html_template', 'Search Page Template') + + wb_handler = handlers.WBHandler(index_loader, replayer, html_view, searchpage = searchpage) return Route(name, wb_handler) diff --git a/pywb/replay_resolvers.py b/pywb/replay_resolvers.py index d87951e6..3f44ce33 100644 --- a/pywb/replay_resolvers.py +++ b/pywb/replay_resolvers.py @@ -16,6 +16,13 @@ class PrefixResolver: def __call__(self, filename): return [self.prefix + filename] if (self.contains in filename) else [] + def __repr__(self): + if self.contains: + return "PrefixResolver('{0}', contains = '{1}')".format(self.prefix, self.contains) + else: + return "PrefixResolver('{0}')".format(self.prefix) + + #====================================== class RedisResolver: def __init__(self, redis_url, key_prefix = 'w:'): @@ -31,9 +38,14 @@ class RedisResolver: print e return None + def __repr__(self): + return "RedisResolver('{0}')".format(self.redis_url) + + #====================================== class PathIndexResolver: def __init__(self, pathindex_file): + self.pathindex_file = pathindex_file self.reader = binsearch.FileReader(pathindex_file) def __call__(self, filename): @@ -47,27 +59,32 @@ class PathIndexResolver: return gen_list(result) + def __repr__(self): + return "PathIndexResolver('{0}')".format(self.pathindex_file) + #TODO: more options (remote files, contains param, etc..) # find best resolver given the path def make_best_resolver(path): """ # http path - >>> class_name(make_best_resolver('http://myhost.example.com/warcs/')) - 'PrefixResolver' + >>> make_best_resolver('http://myhost.example.com/warcs/') + PrefixResolver('http://myhost.example.com/warcs/') # redis path - >>> class_name(make_best_resolver('redis://myhost.example.com:1234/1')) - 'RedisResolver' + >>> make_best_resolver('redis://myhost.example.com:1234/1') + RedisResolver('redis://myhost.example.com:1234/1') # a file - >>> class_name(make_best_resolver('file://' + os.path.realpath(__file__))) - 'PathIndexResolver' + >>> make_best_resolver('file://' + os.path.dirname(os.path.realpath(__file__)) + '/replay_resolvers.py') + PathIndexResolver('/home/ilya/workspace/pywb/pywb/replay_resolvers.py') # a dir - >>> class_name(make_best_resolver('file://' + os.path.dirname(os.path.realpath(__file__)))) - 'PrefixResolver' + >>> make_best_resolver('file://' + os.path.dirname(os.path.realpath(__file__))) + PrefixResolver('/home/ilya/workspace/pywb/pywb') + """ + url_parts = urlparse.urlsplit(path) if url_parts.scheme == 'redis': @@ -90,9 +107,6 @@ import utils #================================================================= if __name__ == "__main__" or utils.enable_doctests(): - def class_name(obj): - return obj.__class__.__name__ - import doctest doctest.testmod() diff --git a/pywb/replay_views.py b/pywb/replay_views.py index 8904ccfc..45ea3b7b 100644 --- a/pywb/replay_views.py +++ b/pywb/replay_views.py @@ -210,12 +210,15 @@ class ReplayView: stream.close() + def __str__(self): + return 'find archive files from ' + str(self.resolvers) + #================================================================= class RewritingReplayView(ReplayView): - def __init__(self, resolvers, archiveloader, head_insert = None, header_rewriter = None, redir_to_exact = True, buffer_response = False): + def __init__(self, resolvers, archiveloader, head_insert_view = None, header_rewriter = None, redir_to_exact = True, buffer_response = False): ReplayView.__init__(self, resolvers, archiveloader) - self.head_insert = head_insert + self.head_insert_view = head_insert_view self.header_rewriter = header_rewriter if header_rewriter else HeaderRewriter() self.redir_to_exact = redir_to_exact @@ -300,12 +303,7 @@ class RewritingReplayView(ReplayView): status_headers = rewritten_headers.status_headers if text_type == 'html': - # Support head_insert func - if hasattr(self.head_insert, '__call__'): - head_insert_str = self.head_insert(wbrequest, response.cdx) - else: - head_insert_str = str(self.head_insert) - + head_insert_str = self.head_insert_view.render_to_string(wbrequest = wbrequest, cdx = response.cdx) if self.head_insert_view else None rewriter = html_rewriter.HTMLRewriter(urlrewriter, outstream = None, head_insert = head_insert_str) elif text_type == 'css': rewriter = regex_rewriters.CSSRewriter(urlrewriter) diff --git a/pywb/views.py b/pywb/views.py index 380e427f..a12753af 100644 --- a/pywb/views.py +++ b/pywb/views.py @@ -10,63 +10,72 @@ from jinja2 import Environment, FileSystemLoader #================================================================= -class TextQueryView: - def __call__(self, wbrequest, cdx_lines): +class StaticTextView: + def __init__(self, text): + self.text = text + + def render_to_string(self, **kwargs): + return self.text + + def render_response(self, **kwargs): + return wbrequestresponse.WbResponse.text_stream(self.text) + +#================================================================= +class J2TemplateView: + def __init__(self, filename): + template_dir, template_file = path.split(filename) + + self.template_file = template_file + + self.jinja_env = self.make_jinja_env(template_dir) + + + def make_jinja_env(self, template_dir): + jinja_env = Environment(loader = FileSystemLoader(template_dir), trim_blocks = True) + jinja_env.filters['format_ts'] = J2TemplateView.format_ts + return jinja_env + + def render_to_string(self, **kwargs): + template = self.jinja_env.get_template(self.template_file) + + template_result = template.render(**kwargs) + + return template_result + + def render_response(self, **kwargs): + template_result = self.render_to_string(**kwargs) + return wbrequestresponse.WbResponse.text_response(str(template_result), content_type = 'text/html; charset=utf-8') + + + # Filters + @staticmethod + def format_ts(value, format='%a, %b %d %Y %H:%M:%S'): + value = utils.timestamp_to_datetime(value) + return time.strftime(format, value) + + + + +# cdx index view + +#================================================================= +# html captures 'calendar' view +#================================================================= +class J2HtmlCapturesView(J2TemplateView): + def render_response(self, wbrequest, cdx_lines): + return J2TemplateView.render_response(self, + cdx_lines = list(cdx_lines), + url = wbrequest.wb_url.url, + prefix = wbrequest.wb_prefix) + + +#================================================================= +# stream raw cdx text +#================================================================= +class TextCapturesView: + def render_response(self, wbrequest, cdx_lines): cdx_lines = imap(lambda x: str(x) + '\n', cdx_lines) return wbrequestresponse.WbResponse.text_stream(cdx_lines) -#================================================================= -class J2QueryView: - def __init__(self, filename, buffer_index = True): - template_dir, template_file = path.split(filename) - - self.template_file = template_file - self.buffer_index = buffer_index - - self.jinja_env = make_jinja_env(template_dir) - def __call__(self, wbrequest, cdx_lines): - template = self.jinja_env.get_template(self.template_file) - - # buffer/convert to list so we have length available for template - if self.buffer_index: - cdx_lines = list(cdx_lines) - - response = template.render(cdx_lines = cdx_lines, - url = wbrequest.wb_url.url, - prefix = wbrequest.wb_prefix) - - return wbrequestresponse.WbResponse.text_response(str(response), content_type = 'text/html') - - -#================================================================= -# Render the head insert (eg. banner) -#================================================================= -class J2HeadInsertView: - def __init__(self, filename, buffer_index = True): - template_dir, template_file = path.split(filename) - self.template_file = template_file - - self.jinja_env = make_jinja_env(template_dir) - - - def __call__(self, wbrequest, cdx): - template = self.jinja_env.get_template(self.template_file) - - - return template.render(wbrequest = wbrequest,cdx = cdx) - - - -#================================================================= -# Jinja funcs -def make_jinja_env(template_dir): - jinja_env = Environment(loader = FileSystemLoader(template_dir), trim_blocks = True) - jinja_env.filters['format_ts'] = format_ts - return jinja_env - -# Filters -def format_ts(value, format='%H:%M / %d-%m-%Y'): - value = utils.timestamp_to_datetime(value) - return time.strftime(format, value) diff --git a/pywb/wbapp.py b/pywb/wbapp.py index 14857003..3dc5ecee 100644 --- a/pywb/wbapp.py +++ b/pywb/wbapp.py @@ -8,29 +8,6 @@ import importlib import logging - -## =========== -''' - -To declare Wayback with one collection, `mycoll` -and will be accessed by user at: - -`http://mywb.example.com:8080/mycoll/` - -and will load cdx from cdx server running at: - -`http://cdx.example.com/cdx` - -and look for warcs at paths: - -`http://warcs.example.com/servewarc/` and -`http://warcs.example.com/anotherpath/`, - -one could declare a `sample_wb_settings()` method as follows -''' - - - def create_wb_app(wb_router): # Top-level wsgi application @@ -52,14 +29,13 @@ def create_wb_app(wb_router): response = WbResponse(StatusAndHeaders(ir.status, ir.httpHeaders)) except (wbexceptions.NotFoundException, wbexceptions.AccessException) as e: - logging.info(str(e)) - response = handle_exception(env, e) + response = handle_exception(env, wb_router.errorpage, e, False) + + except wbexceptions.WbException as wbe: + response = handle_exception(env, wb_router.errorpage, wbe, False) except Exception as e: - last_exc = e - import traceback - traceback.print_exc() - response = handle_exception(env, e) + response = handle_exception(env, wb_router.errorpage, e, True) return response(env, start_response) @@ -67,13 +43,25 @@ def create_wb_app(wb_router): return application -def handle_exception(env, exc): +def handle_exception(env, errorpage, exc, print_trace): if hasattr(exc, 'status'): status = exc.status() else: status = '400 Bad Request' - return WbResponse.text_response(status + ' Error: ' + str(exc), status = status) + if print_trace: + import traceback + err_details = traceback.format_exc(exc) + print err_details + else: + logging.info(str(exc)) + err_details = None + + if errorpage: + import traceback + return errorpage.render_response(err_msg = str(exc), err_details = err_details) + else: + return WbResponse.text_response(status + ' Error: ' + str(exc), status = status) #================================================================= diff --git a/pywb/wbexceptions.py b/pywb/wbexceptions.py index e1c9a5bf..06be06bf 100644 --- a/pywb/wbexceptions.py +++ b/pywb/wbexceptions.py @@ -1,26 +1,33 @@ -class RequestParseException(Exception): +class WbException(Exception): + pass + +class RequestParseException(WbException): + def __init__(self, string, to_parse): + WbException.__init__(self, string + to_parse) + self.to_parse = to_parse + def status(_): return '400 Bad Request' -class BadUrlException(Exception): +class BadUrlException(WbException): def status(_): return '400 Bad Request' -class AccessException(Exception): +class AccessException(WbException): def status(_): return '403 Forbidden' -class InvalidCDXException(Exception): +class InvalidCDXException(WbException): def status(_): return '500 Internal Server Error' -class NotFoundException(Exception): +class NotFoundException(WbException): def status(_): return '404 Not Found' # Exceptions that effect a specific capture and result in a retry -class CaptureException(Exception): +class CaptureException(WbException): def status(_): return '500 Internal Server Error' @@ -47,9 +54,9 @@ class ArchiveLoadFailed(CaptureException): def status(_): return '503 Service Unavailable' -class InternalRedirect(Exception): +class InternalRedirect(WbException): def __init__(self, location, status = '302 Internal Redirect'): - Exception.__init__(self, 'Redirecting -> ' + location) + WbException.__init__(self, 'Redirecting -> ' + location) self.status = status self.httpHeaders = [('Location', location)] diff --git a/pywb/wbrequestresponse.py b/pywb/wbrequestresponse.py index e19ae361..a09db184 100644 --- a/pywb/wbrequestresponse.py +++ b/pywb/wbrequestresponse.py @@ -68,7 +68,14 @@ class WbRequest: self.wb_prefix = wb_prefix if not use_abs_prefix else WbRequest.make_abs_prefix(env, wb_prefix) - self.wb_url = archivalurl_class(wb_url) + # wb_url present and not root page + if wb_url != '/' and wb_url != '' and archivalurl_class: + self.wb_url_str = wb_url + self.wb_url = archivalurl_class(wb_url) + else: + # no wb_url, just store blank + self.wb_url_str = '/' + self.wb_url = None self.coll = coll diff --git a/pywb/wburl.py b/pywb/wburl.py index 431dc786..a980545f 100644 --- a/pywb/wburl.py +++ b/pywb/wburl.py @@ -82,10 +82,10 @@ class WbUrl: self.mod = '' if not any (f(url) for f in [self._init_query, self._init_replay]): - raise wbexceptions.RequestParseException('Invalid WB Request Url: ' + url) + raise wbexceptions.RequestParseException('Invalid WB Request Url: ', url) if len(self.url) == 0: - raise wbexceptions.RequestParseException('Invalid WB Request Url: ' + url) + raise wbexceptions.RequestParseException('Invalid WB Request Url: ', url) # protocol agnostic url -> http:// if self.url.startswith('//'): diff --git a/ui/error.html b/ui/error.html new file mode 100644 index 00000000..b3a8c478 --- /dev/null +++ b/ui/error.html @@ -0,0 +1,11 @@ +

Pywb Error

+{{ err_msg }} + +{% if err_details %} +

Error Details:

+

+

+{{ err_details }}
+
+

+{% endif %} diff --git a/ui/head_insert.html b/ui/head_insert.html index 671f6fc5..3af55ad5 100644 --- a/ui/head_insert.html +++ b/ui/head_insert.html @@ -1,7 +1,7 @@ diff --git a/ui/index.html b/ui/index.html new file mode 100644 index 00000000..22fd5637 --- /dev/null +++ b/ui/index.html @@ -0,0 +1,9 @@ +

pywb Sample Home Page

+ +The following archive collections are available: + + diff --git a/ui/query.html b/ui/query.html index 7d353436..11712fda 100644 --- a/ui/query.html +++ b/ui/query.html @@ -4,14 +4,20 @@ + + - {% for cdx in cdx_lines %} + {% for cdx in cdx_lines %} - + + + {% endfor %}
CaptureStatusOriginal Url Archive File
{{ cdx['timestamp'] | format_ts('%a, %b %d %Y %H:%M:%S') }}{{ cdx['timestamp'] | format_ts}} {{ cdx['filename'] }}{{ cdx['statuscode'] }}{{ cdx['originalurl'] }}
- * Unique captures are bold.
* Other captures are duplicates of a previous capture.
+

+ * Unique captures are bold. Other captures are duplicates of a previous capture. +

diff --git a/ui/search.html b/ui/search.html new file mode 100644 index 00000000..2e1e5b36 --- /dev/null +++ b/ui/search.html @@ -0,0 +1,6 @@ +

pywb Search Page

+Search Archived Content: +
+ + +