mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
Support for new UI, as per #16
* Refactor views class to support more Jinja2 views (J2Template) * Add a home page, collection search page, and error pages, all optional * all exceptions appear on error page * wbrequest supports a request with an empty or / wb_url
This commit is contained in:
parent
57fe9515db
commit
304ddbec84
@ -27,6 +27,8 @@ Ex: The [Internet Archive Wayback Machine](https//archive.org/web/) has urls of
|
||||
|
||||
A listing of archived content, often in calendar form, is available when a `*` is used instead of timestamp.
|
||||
|
||||
The Wayback Machine uses an html parser to rewrite relative and absolute links, as well as absolute links found in javascript, css and some xml.
|
||||
|
||||
pywb uses this interface as a starting point.
|
||||
|
||||
|
||||
@ -36,7 +38,7 @@ pywb currently works best with 2.7.x
|
||||
It should run in a standard WSGI container, although currently
|
||||
tested primarily with uWSGI 1.9 and 2.0
|
||||
|
||||
Support for other versions of Python 3 is planned.
|
||||
Support for Python 3 is planned.
|
||||
|
||||
|
||||
### Installation
|
||||
|
10
config.yaml
10
config.yaml
@ -52,6 +52,9 @@ routes:
|
||||
# if omitted, the capture listing lists raw index
|
||||
calendar_html_template: ./ui/query.html
|
||||
|
||||
# ui: optional Jinja2 template to use for 'search' page
|
||||
# this page is displayed when no search url is entered
|
||||
search_html_template: ./ui/search.html
|
||||
|
||||
# list of host names that pywb will be running from to detect
|
||||
# 'fallthrough' requests based on referrer
|
||||
@ -63,6 +66,13 @@ routes:
|
||||
|
||||
hostpaths: ['http://localhost:8080/']
|
||||
|
||||
# ui: optional Jinja2 template for home page
|
||||
# if no other route is set to home page, this template will
|
||||
# be rendered at /, /index.htm and /index.html
|
||||
home_html_template: ./ui/index.html
|
||||
|
||||
|
||||
# ui: optional Jinja2 template for rendering any errors
|
||||
# the error page may print a detailed error message
|
||||
error_html_template: ./ui/error.html
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
import urlparse
|
||||
import re
|
||||
import wbexceptions
|
||||
|
||||
from wbrequestresponse import WbRequest, WbResponse
|
||||
from url_rewriter import UrlRewriter
|
||||
@ -9,25 +10,39 @@ from wburl import WbUrl
|
||||
# ArchivalRequestRouter -- route WB requests in archival mode
|
||||
#=================================================================
|
||||
class ArchivalRequestRouter:
|
||||
def __init__(self, handlers, hostpaths = None, abs_path = True, archivalurl_class = WbUrl):
|
||||
self.handlers = handlers
|
||||
def __init__(self, routes, hostpaths = None, abs_path = True, archivalurl_class = WbUrl, homepage = None, errorpage = None):
|
||||
self.routes = routes
|
||||
self.fallback = ReferRedirect(hostpaths)
|
||||
self.abs_path = abs_path
|
||||
self.archivalurl_class = archivalurl_class
|
||||
|
||||
self.homepage = homepage
|
||||
self.errorpage = errorpage
|
||||
|
||||
def __call__(self, env):
|
||||
for handler in self.handlers:
|
||||
result = handler(env, self.abs_path, self.archivalurl_class)
|
||||
for route in self.routes:
|
||||
result = route(env, self.abs_path, self.archivalurl_class)
|
||||
if result:
|
||||
return result
|
||||
|
||||
# Home Page
|
||||
if env['REL_REQUEST_URI'] in ['/', '/index.html', '/index.htm']:
|
||||
return self.render_homepage()
|
||||
|
||||
if not self.fallback:
|
||||
return None
|
||||
|
||||
return self.fallback(WbRequest.from_uri(None, env))
|
||||
|
||||
|
||||
|
||||
def render_homepage(self):
|
||||
# render the homepage!
|
||||
if self.homepage:
|
||||
return self.homepage.render_response(routes = self.routes)
|
||||
else:
|
||||
# default home page template
|
||||
text = '\n'.join(map(str, self.routes))
|
||||
return WbResponse.text_response(text)
|
||||
|
||||
#=================================================================
|
||||
# Route by matching regex (or fixed prefix)
|
||||
@ -36,10 +51,11 @@ class ArchivalRequestRouter:
|
||||
class Route:
|
||||
|
||||
# match upto next slash
|
||||
SLASH_LOOKAHEAD ='(?=/)'
|
||||
SLASH_LOOKAHEAD ='(?=/|$)'
|
||||
|
||||
|
||||
def __init__(self, regex, handler, coll_group = 0, lookahead = SLASH_LOOKAHEAD):
|
||||
self.path = regex
|
||||
self.regex = re.compile(regex + lookahead)
|
||||
self.handler = handler
|
||||
# collection id from regex group (default 0)
|
||||
@ -83,6 +99,10 @@ class Route:
|
||||
def _handle_request(self, wbrequest):
|
||||
return self.handler(wbrequest)
|
||||
|
||||
def __str__(self):
|
||||
#return '* ' + self.regex_str + ' => ' + str(self.handler)
|
||||
return str(self.handler)
|
||||
|
||||
|
||||
#=================================================================
|
||||
# ReferRedirect -- redirect urls that have 'fallen through' based on the referrer settings
|
||||
|
@ -114,6 +114,9 @@ def cdx_reverse(cdx_iter, limit):
|
||||
|
||||
>>> test_cdx('org,iana)/_js/2013.1/jquery.js', reverse = True, resolve_revisits = True, limit = 1)
|
||||
org,iana)/_js/2013.1/jquery.js 20140126201307 https://www.iana.org/_js/2013.1/jquery.js application/x-javascript 200 AAW2RS7JB7HTF666XNZDQYJFA6PDQBPO - - 543 778507 iana.warc.gz 33449 7311 iana.warc.gz
|
||||
|
||||
# no match, single result
|
||||
>>> test_cdx('org,iana)/dont_have_this', reverse = True, resolve_revisits = True, limit = 1)
|
||||
"""
|
||||
|
||||
# optimize for single last
|
||||
@ -123,7 +126,7 @@ def cdx_reverse(cdx_iter, limit):
|
||||
for cdx in cdx_iter:
|
||||
last = cdx
|
||||
|
||||
return [last]
|
||||
return [last] if last else []
|
||||
|
||||
reverse_cdxs = deque(maxlen = limit)
|
||||
|
||||
|
@ -2,34 +2,52 @@ import views
|
||||
import utils
|
||||
import urlparse
|
||||
|
||||
from wbrequestresponse import WbResponse
|
||||
|
||||
#=================================================================
|
||||
# Standard WB Handler
|
||||
#=================================================================
|
||||
class WBHandler:
|
||||
def __init__(self, cdx_reader, replay, html_view = None):
|
||||
def __init__(self, cdx_reader, replay, capturespage = None, searchpage = None):
|
||||
self.cdx_reader = cdx_reader
|
||||
self.replay = replay
|
||||
self.html_view = html_view
|
||||
self.text_view = views.TextQueryView()
|
||||
|
||||
self.text_view = views.TextCapturesView()
|
||||
self.html_view = capturespage
|
||||
self.searchpage = searchpage
|
||||
|
||||
|
||||
def __call__(self, wbrequest):
|
||||
|
||||
if wbrequest.wb_url_str == '/':
|
||||
return self.render_searchpage(wbrequest)
|
||||
|
||||
with utils.PerfTimer(wbrequest.env.get('X_PERF'), 'query') as t:
|
||||
cdx_lines = self.cdx_reader.load_for_request(wbrequest, parsed_cdx = True)
|
||||
|
||||
# new special modifier to always show cdx index
|
||||
if wbrequest.wb_url.mod == 'cdx_':
|
||||
return self.text_view(wbrequest, cdx_lines)
|
||||
return self.text_view.render_response(wbrequest, cdx_lines)
|
||||
|
||||
if (wbrequest.wb_url.type == wbrequest.wb_url.QUERY) or (wbrequest.wb_url.type == wbrequest.wb_url.URL_QUERY):
|
||||
if not self.html_view:
|
||||
return self.text_view(wbrequest, cdx_lines)
|
||||
else:
|
||||
return self.html_view(wbrequest, cdx_lines)
|
||||
query_view = self.html_view if self.html_view else self.text_view
|
||||
return query_view.render_response(wbrequest, cdx_lines)
|
||||
|
||||
with utils.PerfTimer(wbrequest.env.get('X_PERF'), 'replay') as t:
|
||||
return self.replay(wbrequest, cdx_lines, self.cdx_reader)
|
||||
|
||||
|
||||
def render_searchpage(self, wbrequest):
|
||||
if self.searchpage:
|
||||
return self.searchpage.render_response(wbrequest = wbrequest)
|
||||
else:
|
||||
return WbResponse.text_response('No Lookup Url Specified')
|
||||
|
||||
|
||||
def __str__(self):
|
||||
return 'WBHandler: ' + str(self.cdx_reader) + ', ' + str(self.replay)
|
||||
|
||||
|
||||
|
||||
#=================================================================
|
||||
# CDX-Server Handler -- pass all params to cdx server
|
||||
@ -37,7 +55,7 @@ class WBHandler:
|
||||
class CDXHandler:
|
||||
def __init__(self, cdx_reader, view = None):
|
||||
self.cdx_reader = cdx_reader
|
||||
self.view = view if view else views.TextQueryView()
|
||||
self.view = view if view else views.TextCapturesView()
|
||||
|
||||
def __call__(self, wbrequest):
|
||||
url = wbrequest.wb_url.url
|
||||
|
@ -83,7 +83,10 @@ class LocalCDXServer(IndexReader):
|
||||
|
||||
def load_cdx(self, url, params = {}, parsed_cdx = True, **kwvalues):
|
||||
# canonicalize to surt (canonicalization is part of surt conversion)
|
||||
key = surt.surt(url)
|
||||
try:
|
||||
key = surt.surt(url)
|
||||
except Exception as e:
|
||||
raise wbexceptions.BadUrlException('Bad Request Url: ' + url)
|
||||
|
||||
# if not surt, unsurt the surt to get canonicalized non-surt url
|
||||
if not self.surt_ordered:
|
||||
@ -123,6 +126,10 @@ class LocalCDXServer(IndexReader):
|
||||
}[wburl.type]
|
||||
|
||||
|
||||
def __str__(self):
|
||||
return 'load cdx indexes from ' + str(self.sources)
|
||||
|
||||
|
||||
|
||||
#=================================================================
|
||||
class RemoteCDXServer(IndexReader):
|
||||
@ -196,6 +203,10 @@ class RemoteCDXServer(IndexReader):
|
||||
}[wburl.type]
|
||||
|
||||
|
||||
def __str__(self):
|
||||
return 'server cdx from ' + self.server_url
|
||||
|
||||
|
||||
#=================================================================
|
||||
class CDXCaptureResult(OrderedDict):
|
||||
CDX_FORMATS = [
|
||||
|
@ -39,13 +39,13 @@ def pywb_config_manual():
|
||||
prefixes = [replay_resolvers.PrefixResolver(test_dir + 'warcs/')]
|
||||
|
||||
# Jinja2 head insert
|
||||
head_insert = views.J2HeadInsertView('./ui/head_insert.html')
|
||||
head_insert = views.J2TemplateView('./ui/head_insert.html')
|
||||
|
||||
# Create rewriting replay handler to rewrite records
|
||||
replayer = replay_views.RewritingReplayView(resolvers = prefixes, archiveloader = aloader, head_insert = head_insert, buffer_response = True)
|
||||
replayer = replay_views.RewritingReplayView(resolvers = prefixes, archiveloader = aloader, head_insert_view = head_insert, buffer_response = True)
|
||||
|
||||
# Create Jinja2 based html query view
|
||||
html_view = views.J2QueryView('./ui/query.html')
|
||||
html_view = views.J2HtmlCapturesView('./ui/query.html')
|
||||
|
||||
# WB handler which uses the index reader, replayer, and html_view
|
||||
wb_handler = handlers.WBHandler(indexs, replayer, html_view)
|
||||
@ -81,11 +81,21 @@ def pywb_config(config_file = None):
|
||||
|
||||
routes = map(yaml_parse_route, config['routes'])
|
||||
|
||||
homepage = yaml_load_template(config, 'home_html_template', 'Home Page Template')
|
||||
errorpage = yaml_load_template(config, 'error_html_template', 'Error Page Template')
|
||||
|
||||
hostpaths = config.get('hostpaths', ['http://localhost:8080/'])
|
||||
|
||||
return ArchivalRequestRouter(routes, hostpaths)
|
||||
return ArchivalRequestRouter(routes, hostpaths, homepage = homepage, errorpage = errorpage)
|
||||
|
||||
|
||||
def yaml_load_template(config, name, desc = None):
|
||||
file = config.get(name)
|
||||
if file:
|
||||
logging.info('Adding {0}: {1}'.format(desc if desc else name, file))
|
||||
file = views.J2TemplateView(file)
|
||||
return file
|
||||
|
||||
|
||||
|
||||
def yaml_parse_index_loader(config):
|
||||
@ -113,17 +123,19 @@ def yaml_parse_index_loader(config):
|
||||
return indexreader.LocalCDXServer([uri])
|
||||
|
||||
|
||||
|
||||
|
||||
def yaml_parse_head_insert(config):
|
||||
# First, try a template file
|
||||
head_insert_file = config.get('head_insert_html_template')
|
||||
if head_insert_file:
|
||||
logging.info('Adding Head-Insert Template: ' + head_insert_file)
|
||||
return views.J2HeadInsertView(head_insert_file)
|
||||
return views.J2TemplateView(head_insert_file)
|
||||
|
||||
# Then, static head_insert text
|
||||
head_insert_text = config.get('head_insert_text', '')
|
||||
logging.info('Adding Head-Insert Text: ' + head_insert_text)
|
||||
return head_insert_text
|
||||
logging.info('Adding Head-Insert Text: ' + head_insert_text)
|
||||
return views.StaticTextView(head_insert_text)
|
||||
|
||||
|
||||
def yaml_parse_calendar_view(config):
|
||||
@ -133,7 +145,7 @@ def yaml_parse_calendar_view(config):
|
||||
else:
|
||||
logging.info('No HTML Calendar View Present')
|
||||
|
||||
return views.J2QueryView(html_view_file) if html_view_file else None
|
||||
return views.J2HtmlCapturesView(html_view_file) if html_view_file else None
|
||||
|
||||
|
||||
|
||||
@ -150,12 +162,14 @@ def yaml_parse_route(config):
|
||||
|
||||
replayer = replay_views.RewritingReplayView(resolvers = archive_resolvers,
|
||||
archiveloader = archive_loader,
|
||||
head_insert = head_insert,
|
||||
head_insert_view = head_insert,
|
||||
buffer_response = config.get('buffer_response', False))
|
||||
|
||||
html_view = yaml_parse_calendar_view(config)
|
||||
|
||||
wb_handler = handlers.WBHandler(index_loader, replayer, html_view)
|
||||
searchpage = yaml_load_template(config, 'search_html_template', 'Search Page Template')
|
||||
|
||||
wb_handler = handlers.WBHandler(index_loader, replayer, html_view, searchpage = searchpage)
|
||||
|
||||
return Route(name, wb_handler)
|
||||
|
||||
|
@ -16,6 +16,13 @@ class PrefixResolver:
|
||||
def __call__(self, filename):
|
||||
return [self.prefix + filename] if (self.contains in filename) else []
|
||||
|
||||
def __repr__(self):
|
||||
if self.contains:
|
||||
return "PrefixResolver('{0}', contains = '{1}')".format(self.prefix, self.contains)
|
||||
else:
|
||||
return "PrefixResolver('{0}')".format(self.prefix)
|
||||
|
||||
|
||||
#======================================
|
||||
class RedisResolver:
|
||||
def __init__(self, redis_url, key_prefix = 'w:'):
|
||||
@ -31,9 +38,14 @@ class RedisResolver:
|
||||
print e
|
||||
return None
|
||||
|
||||
def __repr__(self):
|
||||
return "RedisResolver('{0}')".format(self.redis_url)
|
||||
|
||||
|
||||
#======================================
|
||||
class PathIndexResolver:
|
||||
def __init__(self, pathindex_file):
|
||||
self.pathindex_file = pathindex_file
|
||||
self.reader = binsearch.FileReader(pathindex_file)
|
||||
|
||||
def __call__(self, filename):
|
||||
@ -47,27 +59,32 @@ class PathIndexResolver:
|
||||
|
||||
return gen_list(result)
|
||||
|
||||
def __repr__(self):
|
||||
return "PathIndexResolver('{0}')".format(self.pathindex_file)
|
||||
|
||||
|
||||
#TODO: more options (remote files, contains param, etc..)
|
||||
# find best resolver given the path
|
||||
def make_best_resolver(path):
|
||||
"""
|
||||
# http path
|
||||
>>> class_name(make_best_resolver('http://myhost.example.com/warcs/'))
|
||||
'PrefixResolver'
|
||||
>>> make_best_resolver('http://myhost.example.com/warcs/')
|
||||
PrefixResolver('http://myhost.example.com/warcs/')
|
||||
|
||||
# redis path
|
||||
>>> class_name(make_best_resolver('redis://myhost.example.com:1234/1'))
|
||||
'RedisResolver'
|
||||
>>> make_best_resolver('redis://myhost.example.com:1234/1')
|
||||
RedisResolver('redis://myhost.example.com:1234/1')
|
||||
|
||||
# a file
|
||||
>>> class_name(make_best_resolver('file://' + os.path.realpath(__file__)))
|
||||
'PathIndexResolver'
|
||||
>>> make_best_resolver('file://' + os.path.dirname(os.path.realpath(__file__)) + '/replay_resolvers.py')
|
||||
PathIndexResolver('/home/ilya/workspace/pywb/pywb/replay_resolvers.py')
|
||||
|
||||
# a dir
|
||||
>>> class_name(make_best_resolver('file://' + os.path.dirname(os.path.realpath(__file__))))
|
||||
'PrefixResolver'
|
||||
>>> make_best_resolver('file://' + os.path.dirname(os.path.realpath(__file__)))
|
||||
PrefixResolver('/home/ilya/workspace/pywb/pywb')
|
||||
|
||||
"""
|
||||
|
||||
url_parts = urlparse.urlsplit(path)
|
||||
|
||||
if url_parts.scheme == 'redis':
|
||||
@ -90,9 +107,6 @@ import utils
|
||||
#=================================================================
|
||||
if __name__ == "__main__" or utils.enable_doctests():
|
||||
|
||||
def class_name(obj):
|
||||
return obj.__class__.__name__
|
||||
|
||||
import doctest
|
||||
doctest.testmod()
|
||||
|
||||
|
@ -210,12 +210,15 @@ class ReplayView:
|
||||
stream.close()
|
||||
|
||||
|
||||
def __str__(self):
|
||||
return 'find archive files from ' + str(self.resolvers)
|
||||
|
||||
#=================================================================
|
||||
class RewritingReplayView(ReplayView):
|
||||
|
||||
def __init__(self, resolvers, archiveloader, head_insert = None, header_rewriter = None, redir_to_exact = True, buffer_response = False):
|
||||
def __init__(self, resolvers, archiveloader, head_insert_view = None, header_rewriter = None, redir_to_exact = True, buffer_response = False):
|
||||
ReplayView.__init__(self, resolvers, archiveloader)
|
||||
self.head_insert = head_insert
|
||||
self.head_insert_view = head_insert_view
|
||||
self.header_rewriter = header_rewriter if header_rewriter else HeaderRewriter()
|
||||
self.redir_to_exact = redir_to_exact
|
||||
|
||||
@ -300,12 +303,7 @@ class RewritingReplayView(ReplayView):
|
||||
status_headers = rewritten_headers.status_headers
|
||||
|
||||
if text_type == 'html':
|
||||
# Support head_insert func
|
||||
if hasattr(self.head_insert, '__call__'):
|
||||
head_insert_str = self.head_insert(wbrequest, response.cdx)
|
||||
else:
|
||||
head_insert_str = str(self.head_insert)
|
||||
|
||||
head_insert_str = self.head_insert_view.render_to_string(wbrequest = wbrequest, cdx = response.cdx) if self.head_insert_view else None
|
||||
rewriter = html_rewriter.HTMLRewriter(urlrewriter, outstream = None, head_insert = head_insert_str)
|
||||
elif text_type == 'css':
|
||||
rewriter = regex_rewriters.CSSRewriter(urlrewriter)
|
||||
|
119
pywb/views.py
119
pywb/views.py
@ -10,63 +10,72 @@ from jinja2 import Environment, FileSystemLoader
|
||||
|
||||
|
||||
#=================================================================
|
||||
class TextQueryView:
|
||||
def __call__(self, wbrequest, cdx_lines):
|
||||
class StaticTextView:
|
||||
def __init__(self, text):
|
||||
self.text = text
|
||||
|
||||
def render_to_string(self, **kwargs):
|
||||
return self.text
|
||||
|
||||
def render_response(self, **kwargs):
|
||||
return wbrequestresponse.WbResponse.text_stream(self.text)
|
||||
|
||||
#=================================================================
|
||||
class J2TemplateView:
|
||||
def __init__(self, filename):
|
||||
template_dir, template_file = path.split(filename)
|
||||
|
||||
self.template_file = template_file
|
||||
|
||||
self.jinja_env = self.make_jinja_env(template_dir)
|
||||
|
||||
|
||||
def make_jinja_env(self, template_dir):
|
||||
jinja_env = Environment(loader = FileSystemLoader(template_dir), trim_blocks = True)
|
||||
jinja_env.filters['format_ts'] = J2TemplateView.format_ts
|
||||
return jinja_env
|
||||
|
||||
def render_to_string(self, **kwargs):
|
||||
template = self.jinja_env.get_template(self.template_file)
|
||||
|
||||
template_result = template.render(**kwargs)
|
||||
|
||||
return template_result
|
||||
|
||||
def render_response(self, **kwargs):
|
||||
template_result = self.render_to_string(**kwargs)
|
||||
return wbrequestresponse.WbResponse.text_response(str(template_result), content_type = 'text/html; charset=utf-8')
|
||||
|
||||
|
||||
# Filters
|
||||
@staticmethod
|
||||
def format_ts(value, format='%a, %b %d %Y %H:%M:%S'):
|
||||
value = utils.timestamp_to_datetime(value)
|
||||
return time.strftime(format, value)
|
||||
|
||||
|
||||
|
||||
|
||||
# cdx index view
|
||||
|
||||
#=================================================================
|
||||
# html captures 'calendar' view
|
||||
#=================================================================
|
||||
class J2HtmlCapturesView(J2TemplateView):
|
||||
def render_response(self, wbrequest, cdx_lines):
|
||||
return J2TemplateView.render_response(self,
|
||||
cdx_lines = list(cdx_lines),
|
||||
url = wbrequest.wb_url.url,
|
||||
prefix = wbrequest.wb_prefix)
|
||||
|
||||
|
||||
#=================================================================
|
||||
# stream raw cdx text
|
||||
#=================================================================
|
||||
class TextCapturesView:
|
||||
def render_response(self, wbrequest, cdx_lines):
|
||||
cdx_lines = imap(lambda x: str(x) + '\n', cdx_lines)
|
||||
return wbrequestresponse.WbResponse.text_stream(cdx_lines)
|
||||
|
||||
#=================================================================
|
||||
class J2QueryView:
|
||||
def __init__(self, filename, buffer_index = True):
|
||||
template_dir, template_file = path.split(filename)
|
||||
|
||||
self.template_file = template_file
|
||||
self.buffer_index = buffer_index
|
||||
|
||||
self.jinja_env = make_jinja_env(template_dir)
|
||||
|
||||
|
||||
def __call__(self, wbrequest, cdx_lines):
|
||||
template = self.jinja_env.get_template(self.template_file)
|
||||
|
||||
# buffer/convert to list so we have length available for template
|
||||
if self.buffer_index:
|
||||
cdx_lines = list(cdx_lines)
|
||||
|
||||
response = template.render(cdx_lines = cdx_lines,
|
||||
url = wbrequest.wb_url.url,
|
||||
prefix = wbrequest.wb_prefix)
|
||||
|
||||
return wbrequestresponse.WbResponse.text_response(str(response), content_type = 'text/html')
|
||||
|
||||
|
||||
#=================================================================
|
||||
# Render the head insert (eg. banner)
|
||||
#=================================================================
|
||||
class J2HeadInsertView:
|
||||
def __init__(self, filename, buffer_index = True):
|
||||
template_dir, template_file = path.split(filename)
|
||||
self.template_file = template_file
|
||||
|
||||
self.jinja_env = make_jinja_env(template_dir)
|
||||
|
||||
|
||||
def __call__(self, wbrequest, cdx):
|
||||
template = self.jinja_env.get_template(self.template_file)
|
||||
|
||||
|
||||
return template.render(wbrequest = wbrequest,cdx = cdx)
|
||||
|
||||
|
||||
|
||||
#=================================================================
|
||||
# Jinja funcs
|
||||
def make_jinja_env(template_dir):
|
||||
jinja_env = Environment(loader = FileSystemLoader(template_dir), trim_blocks = True)
|
||||
jinja_env.filters['format_ts'] = format_ts
|
||||
return jinja_env
|
||||
|
||||
# Filters
|
||||
def format_ts(value, format='%H:%M / %d-%m-%Y'):
|
||||
value = utils.timestamp_to_datetime(value)
|
||||
return time.strftime(format, value)
|
||||
|
@ -8,29 +8,6 @@ import importlib
|
||||
import logging
|
||||
|
||||
|
||||
|
||||
## ===========
|
||||
'''
|
||||
|
||||
To declare Wayback with one collection, `mycoll`
|
||||
and will be accessed by user at:
|
||||
|
||||
`http://mywb.example.com:8080/mycoll/`
|
||||
|
||||
and will load cdx from cdx server running at:
|
||||
|
||||
`http://cdx.example.com/cdx`
|
||||
|
||||
and look for warcs at paths:
|
||||
|
||||
`http://warcs.example.com/servewarc/` and
|
||||
`http://warcs.example.com/anotherpath/`,
|
||||
|
||||
one could declare a `sample_wb_settings()` method as follows
|
||||
'''
|
||||
|
||||
|
||||
|
||||
def create_wb_app(wb_router):
|
||||
|
||||
# Top-level wsgi application
|
||||
@ -52,14 +29,13 @@ def create_wb_app(wb_router):
|
||||
response = WbResponse(StatusAndHeaders(ir.status, ir.httpHeaders))
|
||||
|
||||
except (wbexceptions.NotFoundException, wbexceptions.AccessException) as e:
|
||||
logging.info(str(e))
|
||||
response = handle_exception(env, e)
|
||||
response = handle_exception(env, wb_router.errorpage, e, False)
|
||||
|
||||
except wbexceptions.WbException as wbe:
|
||||
response = handle_exception(env, wb_router.errorpage, wbe, False)
|
||||
|
||||
except Exception as e:
|
||||
last_exc = e
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
response = handle_exception(env, e)
|
||||
response = handle_exception(env, wb_router.errorpage, e, True)
|
||||
|
||||
return response(env, start_response)
|
||||
|
||||
@ -67,13 +43,25 @@ def create_wb_app(wb_router):
|
||||
return application
|
||||
|
||||
|
||||
def handle_exception(env, exc):
|
||||
def handle_exception(env, errorpage, exc, print_trace):
|
||||
if hasattr(exc, 'status'):
|
||||
status = exc.status()
|
||||
else:
|
||||
status = '400 Bad Request'
|
||||
|
||||
return WbResponse.text_response(status + ' Error: ' + str(exc), status = status)
|
||||
if print_trace:
|
||||
import traceback
|
||||
err_details = traceback.format_exc(exc)
|
||||
print err_details
|
||||
else:
|
||||
logging.info(str(exc))
|
||||
err_details = None
|
||||
|
||||
if errorpage:
|
||||
import traceback
|
||||
return errorpage.render_response(err_msg = str(exc), err_details = err_details)
|
||||
else:
|
||||
return WbResponse.text_response(status + ' Error: ' + str(exc), status = status)
|
||||
|
||||
|
||||
#=================================================================
|
||||
|
@ -1,26 +1,33 @@
|
||||
|
||||
class RequestParseException(Exception):
|
||||
class WbException(Exception):
|
||||
pass
|
||||
|
||||
class RequestParseException(WbException):
|
||||
def __init__(self, string, to_parse):
|
||||
WbException.__init__(self, string + to_parse)
|
||||
self.to_parse = to_parse
|
||||
|
||||
def status(_):
|
||||
return '400 Bad Request'
|
||||
|
||||
class BadUrlException(Exception):
|
||||
class BadUrlException(WbException):
|
||||
def status(_):
|
||||
return '400 Bad Request'
|
||||
|
||||
class AccessException(Exception):
|
||||
class AccessException(WbException):
|
||||
def status(_):
|
||||
return '403 Forbidden'
|
||||
|
||||
class InvalidCDXException(Exception):
|
||||
class InvalidCDXException(WbException):
|
||||
def status(_):
|
||||
return '500 Internal Server Error'
|
||||
|
||||
class NotFoundException(Exception):
|
||||
class NotFoundException(WbException):
|
||||
def status(_):
|
||||
return '404 Not Found'
|
||||
|
||||
# Exceptions that effect a specific capture and result in a retry
|
||||
class CaptureException(Exception):
|
||||
class CaptureException(WbException):
|
||||
def status(_):
|
||||
return '500 Internal Server Error'
|
||||
|
||||
@ -47,9 +54,9 @@ class ArchiveLoadFailed(CaptureException):
|
||||
def status(_):
|
||||
return '503 Service Unavailable'
|
||||
|
||||
class InternalRedirect(Exception):
|
||||
class InternalRedirect(WbException):
|
||||
def __init__(self, location, status = '302 Internal Redirect'):
|
||||
Exception.__init__(self, 'Redirecting -> ' + location)
|
||||
WbException.__init__(self, 'Redirecting -> ' + location)
|
||||
self.status = status
|
||||
self.httpHeaders = [('Location', location)]
|
||||
|
||||
|
@ -68,7 +68,14 @@ class WbRequest:
|
||||
|
||||
self.wb_prefix = wb_prefix if not use_abs_prefix else WbRequest.make_abs_prefix(env, wb_prefix)
|
||||
|
||||
self.wb_url = archivalurl_class(wb_url)
|
||||
# wb_url present and not root page
|
||||
if wb_url != '/' and wb_url != '' and archivalurl_class:
|
||||
self.wb_url_str = wb_url
|
||||
self.wb_url = archivalurl_class(wb_url)
|
||||
else:
|
||||
# no wb_url, just store blank
|
||||
self.wb_url_str = '/'
|
||||
self.wb_url = None
|
||||
|
||||
self.coll = coll
|
||||
|
||||
|
@ -82,10 +82,10 @@ class WbUrl:
|
||||
self.mod = ''
|
||||
|
||||
if not any (f(url) for f in [self._init_query, self._init_replay]):
|
||||
raise wbexceptions.RequestParseException('Invalid WB Request Url: ' + url)
|
||||
raise wbexceptions.RequestParseException('Invalid WB Request Url: ', url)
|
||||
|
||||
if len(self.url) == 0:
|
||||
raise wbexceptions.RequestParseException('Invalid WB Request Url: ' + url)
|
||||
raise wbexceptions.RequestParseException('Invalid WB Request Url: ', url)
|
||||
|
||||
# protocol agnostic url -> http://
|
||||
if self.url.startswith('//'):
|
||||
|
11
ui/error.html
Normal file
11
ui/error.html
Normal file
@ -0,0 +1,11 @@
|
||||
<h2>Pywb Error</h2>
|
||||
<b>{{ err_msg }}</b>
|
||||
|
||||
{% if err_details %}
|
||||
<p>Error Details:</p>
|
||||
<p>
|
||||
<pre>
|
||||
{{ err_details }}
|
||||
</pre>
|
||||
</p>
|
||||
{% endif %}
|
@ -1,7 +1,7 @@
|
||||
<!-- WB Insert -->
|
||||
<script>
|
||||
wbinfo = {}
|
||||
wbinfo.capture_str = "{{ cdx['timestamp'] | format_ts('%a, %b %d %Y %H:%M:%S') }}";
|
||||
wbinfo.capture_str = "{{ cdx['timestamp'] | format_ts }}";
|
||||
</script>
|
||||
<script src='/static/wb.js'> </script>
|
||||
<link rel='stylesheet' href='/static/wb.css'/>
|
||||
|
9
ui/index.html
Normal file
9
ui/index.html
Normal file
@ -0,0 +1,9 @@
|
||||
<h2>pywb Sample Home Page</h2>
|
||||
|
||||
The following archive collections are available:
|
||||
|
||||
<ul>
|
||||
{% for route in routes %}
|
||||
<li><a href="{{ '/' + route.path }}">{{ '/' + route.path }}</a>: {{ route | string }}</li>
|
||||
{% endfor %}
|
||||
</ul>
|
@ -4,14 +4,20 @@
|
||||
<table id="captures" style="border-spacing: 10px;">
|
||||
<tr>
|
||||
<th>Capture</th>
|
||||
<th>Status</th>
|
||||
<th>Original Url</th>
|
||||
<th>Archive File</th>
|
||||
</tr>
|
||||
{% for cdx in cdx_lines %}
|
||||
{% for cdx in cdx_lines %}
|
||||
<tr style="{{ 'font-weight: bold' if cdx['mimetype'] != 'warc/revisit' else '' }}">
|
||||
<td><a href="{{ prefix }}{{ cdx.timestamp }}/{{ url }}">{{ cdx['timestamp'] | format_ts('%a, %b %d %Y %H:%M:%S') }}</a></td>
|
||||
<td><a href="{{ prefix }}{{ cdx.timestamp }}/{{ url }}">{{ cdx['timestamp'] | format_ts}}</a></td>
|
||||
<td>{{ cdx['filename'] }}</td>
|
||||
<td>{{ cdx['statuscode'] }}</td>
|
||||
<td>{{ cdx['originalurl'] }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</table>
|
||||
<i><b>* Unique captures are bold.</b><br/>* Other captures are duplicates of a previous capture.</i>
|
||||
<p>
|
||||
<i><b>* Unique captures are bold.</b> Other captures are duplicates of a previous capture.</i>
|
||||
</p>
|
||||
</body>
|
||||
|
6
ui/search.html
Normal file
6
ui/search.html
Normal file
@ -0,0 +1,6 @@
|
||||
<h2>pywb Search Page</h2>
|
||||
Search Archived Content:
|
||||
<form onsubmit="url = document.getElementById('search').value; if (url != '') { document.location.href = '{{ wbrequest.wb_prefix }}' + '*/' + url; } return false;">
|
||||
<input id="search" name="search" placeholder="Enter url to search"/>
|
||||
<button type="submit">Search</button>
|
||||
</form>
|
Loading…
x
Reference in New Issue
Block a user