diff --git a/pywb/indexreader.py b/pywb/indexreader.py index c87d011a..6a340022 100644 --- a/pywb/indexreader.py +++ b/pywb/indexreader.py @@ -2,6 +2,7 @@ import urllib import urllib2 import wbexceptions import itertools +from collections import OrderedDict from wbarchivalurl import ArchivalUrl @@ -78,7 +79,7 @@ class RemoteCDXServer: }[wburl.type] -class CDXCaptureResult(dict): +class CDXCaptureResult(OrderedDict): CDX_FORMATS = [ # Public CDX Format ["urlkey","timestamp","original","mimetype","statuscode","digest","length"], @@ -99,6 +100,8 @@ class CDXCaptureResult(dict): ] def __init__(self, cdxline): + OrderedDict.__init__(self) + cdxline = cdxline.rstrip() fields = cdxline.split(' ') diff --git a/pywb/query.py b/pywb/query.py index abfa3e0a..4a909141 100644 --- a/pywb/query.py +++ b/pywb/query.py @@ -3,6 +3,8 @@ import utils import wbrequestresponse import wbexceptions +from jinja2 import Environment, FileSystemLoader + class QueryHandler: def __init__(self, cdxserver = None): if not cdxserver: @@ -23,13 +25,50 @@ class QueryHandler: cdxlines = utils.peek_iter(cdxlines) - if cdxlines is not None: - return wbrequestresponse.WbResponse.text_stream(cdxlines) + if cdxlines is None: + raise wbexceptions.NotFoundException('WB Does Not Have Url: ' + wburl.url) + + cdxlines = self.filterCdx(wbrequest, cdxlines) + + # Output raw cdx stream + return wbrequestresponse.WbResponse.text_stream(cdxlines) + + def filterCdx(self, wbrequest, cdxlines): + # Subclasses may wrap cdxlines iterator in a filter + return cdxlines + + +class J2QueryRenderer: + def __init__(self, template_dir, template_file): + self.template_file = template_file + + self.jinja_env = Environment(loader = FileSystemLoader(template_dir), trim_blocks = True) + + def __call__(self, wbrequest, query_response): + cdxlines = query_response.body + + def parse_cdx(): + for cdx in cdxlines: + try: + cdx = indexreader.CDXCaptureResult(cdx) + yield cdx + + except wbexceptions.InvalidCDXException: + import traceback + traceback.print_exc() + pass + + + template = self.jinja_env.get_template(self.template_file) + response = template.render(cdxlines = parse_cdx(), + url = wbrequest.wb_url.url, + prefix = wbrequest.wb_prefix) + + return wbrequestresponse.WbResponse.text_response(str(response), content_type = 'text/html') - raise wbexceptions.NotFoundException('WB Does Not Have Url: ' + wburl.url) ## =========== -## Simple handlers for debuging +## Simple handlers for debugging class EchoEnv: def __call__(self, wbrequest): return wbrequestresponse.WbResponse.text_response(str(wbrequest.env)) diff --git a/pywb/replay.py b/pywb/replay.py index 31cf98ce..ca993535 100644 --- a/pywb/replay.py +++ b/pywb/replay.py @@ -17,16 +17,17 @@ import wbexceptions #================================================================= class WBHandler: - def __init__(self, query, replay): + def __init__(self, query, replay, htmlquery = None): self.query = query self.replay = replay + self.htmlquery = htmlquery def __call__(self, wbrequest): with utils.PerfTimer(wbrequest.env.get('X_PERF'), 'query') as t: query_response = self.query(wbrequest) if (wbrequest.wb_url.type == ArchivalUrl.QUERY) or (wbrequest.wb_url.type == ArchivalUrl.URL_QUERY): - return query_response + return self.htmlquery(wbrequest, query_response) if self.htmlquery else query_response with utils.PerfTimer(wbrequest.env.get('X_PERF'), 'replay') as t: return self.replay(wbrequest, query_response) diff --git a/pywb/wbrequestresponse.py b/pywb/wbrequestresponse.py index 05990aa7..57863b2d 100644 --- a/pywb/wbrequestresponse.py +++ b/pywb/wbrequestresponse.py @@ -119,12 +119,12 @@ class WbResponse: self.body = value @staticmethod - def text_stream(text, status = '200 OK'): - return WbResponse(StatusAndHeaders(status, [('Content-Type', 'text/plain')]), value = text) + def text_stream(text, status = '200 OK', content_type = 'text/plain'): + return WbResponse(StatusAndHeaders(status, [('Content-Type', content_type)]), value = text) @staticmethod - def text_response(text, status = '200 OK'): - return WbResponse(StatusAndHeaders(status, [('Content-Type', 'text/plain')]), value = [text]) + def text_response(text, status = '200 OK', content_type = 'text/plain'): + return WbResponse(StatusAndHeaders(status, [('Content-Type', content_type)]), value = [text]) @staticmethod def redir_response(location, status = '302 Redirect'): diff --git a/setup.py b/setup.py index 7f91ba06..aecb3512 100755 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ setuptools.setup(name='pywb', long_description=open('README.md').read(), license='GPL', packages=['pywb'], - install_requires=['uwsgi', 'rfc3987', 'chardet', 'redis'], + install_requires=['uwsgi', 'rfc3987', 'chardet', 'redis', 'jinja2'], # test_suite='?', # not sure how to run doctests here zip_safe=False) diff --git a/ui/query.html b/ui/query.html new file mode 100644 index 00000000..b73e0815 --- /dev/null +++ b/ui/query.html @@ -0,0 +1,14 @@ + +  Captures of {{ url }} + + {% for cdx in cdxlines %} + + + + + {% endfor %} +
{{ cdx.timestamp }}{{ cdx['filename'] }}
+ +