mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
first pass on html rendering via jinja, support for query (cdx) rendering
This commit is contained in:
parent
bcc9588c00
commit
7ce6d0d22b
@ -2,6 +2,7 @@ import urllib
|
|||||||
import urllib2
|
import urllib2
|
||||||
import wbexceptions
|
import wbexceptions
|
||||||
import itertools
|
import itertools
|
||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
from wbarchivalurl import ArchivalUrl
|
from wbarchivalurl import ArchivalUrl
|
||||||
|
|
||||||
@ -78,7 +79,7 @@ class RemoteCDXServer:
|
|||||||
}[wburl.type]
|
}[wburl.type]
|
||||||
|
|
||||||
|
|
||||||
class CDXCaptureResult(dict):
|
class CDXCaptureResult(OrderedDict):
|
||||||
CDX_FORMATS = [
|
CDX_FORMATS = [
|
||||||
# Public CDX Format
|
# Public CDX Format
|
||||||
["urlkey","timestamp","original","mimetype","statuscode","digest","length"],
|
["urlkey","timestamp","original","mimetype","statuscode","digest","length"],
|
||||||
@ -99,6 +100,8 @@ class CDXCaptureResult(dict):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def __init__(self, cdxline):
|
def __init__(self, cdxline):
|
||||||
|
OrderedDict.__init__(self)
|
||||||
|
|
||||||
cdxline = cdxline.rstrip()
|
cdxline = cdxline.rstrip()
|
||||||
fields = cdxline.split(' ')
|
fields = cdxline.split(' ')
|
||||||
|
|
||||||
|
@ -3,6 +3,8 @@ import utils
|
|||||||
import wbrequestresponse
|
import wbrequestresponse
|
||||||
import wbexceptions
|
import wbexceptions
|
||||||
|
|
||||||
|
from jinja2 import Environment, FileSystemLoader
|
||||||
|
|
||||||
class QueryHandler:
|
class QueryHandler:
|
||||||
def __init__(self, cdxserver = None):
|
def __init__(self, cdxserver = None):
|
||||||
if not cdxserver:
|
if not cdxserver:
|
||||||
@ -23,13 +25,50 @@ class QueryHandler:
|
|||||||
|
|
||||||
cdxlines = utils.peek_iter(cdxlines)
|
cdxlines = utils.peek_iter(cdxlines)
|
||||||
|
|
||||||
if cdxlines is not None:
|
if cdxlines is None:
|
||||||
return wbrequestresponse.WbResponse.text_stream(cdxlines)
|
raise wbexceptions.NotFoundException('WB Does Not Have Url: ' + wburl.url)
|
||||||
|
|
||||||
|
cdxlines = self.filterCdx(wbrequest, cdxlines)
|
||||||
|
|
||||||
|
# Output raw cdx stream
|
||||||
|
return wbrequestresponse.WbResponse.text_stream(cdxlines)
|
||||||
|
|
||||||
|
def filterCdx(self, wbrequest, cdxlines):
|
||||||
|
# Subclasses may wrap cdxlines iterator in a filter
|
||||||
|
return cdxlines
|
||||||
|
|
||||||
|
|
||||||
|
class J2QueryRenderer:
|
||||||
|
def __init__(self, template_dir, template_file):
|
||||||
|
self.template_file = template_file
|
||||||
|
|
||||||
|
self.jinja_env = Environment(loader = FileSystemLoader(template_dir), trim_blocks = True)
|
||||||
|
|
||||||
|
def __call__(self, wbrequest, query_response):
|
||||||
|
cdxlines = query_response.body
|
||||||
|
|
||||||
|
def parse_cdx():
|
||||||
|
for cdx in cdxlines:
|
||||||
|
try:
|
||||||
|
cdx = indexreader.CDXCaptureResult(cdx)
|
||||||
|
yield cdx
|
||||||
|
|
||||||
|
except wbexceptions.InvalidCDXException:
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
template = self.jinja_env.get_template(self.template_file)
|
||||||
|
response = template.render(cdxlines = parse_cdx(),
|
||||||
|
url = wbrequest.wb_url.url,
|
||||||
|
prefix = wbrequest.wb_prefix)
|
||||||
|
|
||||||
|
return wbrequestresponse.WbResponse.text_response(str(response), content_type = 'text/html')
|
||||||
|
|
||||||
raise wbexceptions.NotFoundException('WB Does Not Have Url: ' + wburl.url)
|
|
||||||
|
|
||||||
## ===========
|
## ===========
|
||||||
## Simple handlers for debuging
|
## Simple handlers for debugging
|
||||||
class EchoEnv:
|
class EchoEnv:
|
||||||
def __call__(self, wbrequest):
|
def __call__(self, wbrequest):
|
||||||
return wbrequestresponse.WbResponse.text_response(str(wbrequest.env))
|
return wbrequestresponse.WbResponse.text_response(str(wbrequest.env))
|
||||||
|
@ -17,16 +17,17 @@ import wbexceptions
|
|||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class WBHandler:
|
class WBHandler:
|
||||||
def __init__(self, query, replay):
|
def __init__(self, query, replay, htmlquery = None):
|
||||||
self.query = query
|
self.query = query
|
||||||
self.replay = replay
|
self.replay = replay
|
||||||
|
self.htmlquery = htmlquery
|
||||||
|
|
||||||
def __call__(self, wbrequest):
|
def __call__(self, wbrequest):
|
||||||
with utils.PerfTimer(wbrequest.env.get('X_PERF'), 'query') as t:
|
with utils.PerfTimer(wbrequest.env.get('X_PERF'), 'query') as t:
|
||||||
query_response = self.query(wbrequest)
|
query_response = self.query(wbrequest)
|
||||||
|
|
||||||
if (wbrequest.wb_url.type == ArchivalUrl.QUERY) or (wbrequest.wb_url.type == ArchivalUrl.URL_QUERY):
|
if (wbrequest.wb_url.type == ArchivalUrl.QUERY) or (wbrequest.wb_url.type == ArchivalUrl.URL_QUERY):
|
||||||
return query_response
|
return self.htmlquery(wbrequest, query_response) if self.htmlquery else query_response
|
||||||
|
|
||||||
with utils.PerfTimer(wbrequest.env.get('X_PERF'), 'replay') as t:
|
with utils.PerfTimer(wbrequest.env.get('X_PERF'), 'replay') as t:
|
||||||
return self.replay(wbrequest, query_response)
|
return self.replay(wbrequest, query_response)
|
||||||
|
@ -119,12 +119,12 @@ class WbResponse:
|
|||||||
self.body = value
|
self.body = value
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def text_stream(text, status = '200 OK'):
|
def text_stream(text, status = '200 OK', content_type = 'text/plain'):
|
||||||
return WbResponse(StatusAndHeaders(status, [('Content-Type', 'text/plain')]), value = text)
|
return WbResponse(StatusAndHeaders(status, [('Content-Type', content_type)]), value = text)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def text_response(text, status = '200 OK'):
|
def text_response(text, status = '200 OK', content_type = 'text/plain'):
|
||||||
return WbResponse(StatusAndHeaders(status, [('Content-Type', 'text/plain')]), value = [text])
|
return WbResponse(StatusAndHeaders(status, [('Content-Type', content_type)]), value = [text])
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def redir_response(location, status = '302 Redirect'):
|
def redir_response(location, status = '302 Redirect'):
|
||||||
|
2
setup.py
2
setup.py
@ -11,7 +11,7 @@ setuptools.setup(name='pywb',
|
|||||||
long_description=open('README.md').read(),
|
long_description=open('README.md').read(),
|
||||||
license='GPL',
|
license='GPL',
|
||||||
packages=['pywb'],
|
packages=['pywb'],
|
||||||
install_requires=['uwsgi', 'rfc3987', 'chardet', 'redis'],
|
install_requires=['uwsgi', 'rfc3987', 'chardet', 'redis', 'jinja2'],
|
||||||
# test_suite='?', # not sure how to run doctests here
|
# test_suite='?', # not sure how to run doctests here
|
||||||
zip_safe=False)
|
zip_safe=False)
|
||||||
|
|
||||||
|
14
ui/query.html
Normal file
14
ui/query.html
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
<body>
|
||||||
|
<b><span id="count"></span> Captures of {{ url }}</b>
|
||||||
|
<table id="captures">
|
||||||
|
{% for cdx in cdxlines %}
|
||||||
|
<tr>
|
||||||
|
<td><a href="{{ prefix}}{{ cdx.timestamp }}/{{ url }}">{{ cdx.timestamp }}</a></td>
|
||||||
|
<td><a href="https://archive.org/details/{{ cdx['filename'] }}">{{ cdx['filename'] }}</a></td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</table>
|
||||||
|
<script>
|
||||||
|
document.getElementById("count").innerHTML = document.getElementById("captures").getElementsByTagName("tr").length
|
||||||
|
</script>
|
||||||
|
</body>
|
Loading…
x
Reference in New Issue
Block a user