1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

experiment: support pages.yaml and lists.yaml, full list of bookmarks and filtered by list

This commit is contained in:
Ilya Kreymer 2018-01-16 22:06:27 -08:00
parent ba747a5caa
commit 2204c3dc42
4 changed files with 161 additions and 33 deletions

View File

@ -13,7 +13,7 @@ from wsgiprox.wsgiprox import WSGIProxMiddleware
from pywb.recorder.multifilewarcwriter import MultiFileWARCWriter
from pywb.recorder.recorderapp import RecorderApp
from pywb.utils.loaders import load_yaml_config
from pywb.utils.loaders import load_yaml_config, load
from pywb.utils.geventserver import GeventServer
from pywb.utils.io import StreamIter
@ -29,6 +29,7 @@ import os
import traceback
import requests
import logging
import yaml
# ============================================================================
@ -79,7 +80,11 @@ class FrontEndApp(object):
self.static_dir = config.get('static_dir', 'static')
metadata_templ = os.path.join(self.warcserver.root_dir, '{coll}', 'metadata.yaml')
pages_templ = os.path.join(self.warcserver.root_dir, '{coll}', 'pages.yaml')
lists_templ = os.path.join(self.warcserver.root_dir, '{coll}', 'lists.yaml')
self.metadata_cache = MetadataCache(metadata_templ)
self.pages_cache = PagesCache(pages_templ)
self.lists_cache = ListsCache(lists_templ)
def _init_routes(self):
self.url_map = Map()
@ -188,9 +193,6 @@ class FrontEndApp(object):
self.raise_not_found(environ, 'Static File Not Found: {0}'.format(filepath))
def get_metadata(self, coll):
#if coll == self.all_coll:
# coll = '*'
metadata = {'coll': coll,
'type': 'replay'}
@ -209,6 +211,9 @@ class FrontEndApp(object):
metadata = self.get_metadata(coll)
pages = self.pages_cache.load(coll)
lists = self.lists_cache.load(coll)
view = BaseInsertView(self.rewriterapp.jinja_env, 'search.html')
wb_prefix = environ.get('SCRIPT_NAME')
@ -218,6 +223,8 @@ class FrontEndApp(object):
content = view.render_to_string(environ,
wb_prefix=wb_prefix,
metadata=metadata,
pages=pages,
lists=lists,
coll=coll)
return WbResponse.text_response(content, content_type='text/html; charset="utf-8"')
@ -431,8 +438,11 @@ class MetadataCache(object):
return self.store_new(coll, path, mtime)
def init_obj(self, coll, path):
return load_yaml_config(path)
def store_new(self, coll, path, mtime):
obj = load_yaml_config(path)
obj = self.init_obj(coll, path)
self.cache[coll] = (mtime, obj)
return obj
@ -442,6 +452,58 @@ class MetadataCache(object):
return {name: value[1] for name, value in iteritems(self.cache)}
def _get_id(url, config):
timestamp = config.get('timestamp')
url = config.get('url')
if timestamp:
return str(timestamp) + '/' + url
else:
return url
# ============================================================================
class PagesCache(MetadataCache):
def init_obj(self, coll, path):
configdata = None
page_map = {}
try:
configdata = load(path)
config_gen = yaml.load_all(configdata)
for config in config_gen:
if 'url' not in config:
continue
page_map[self._get_id(config)] = config
finally:
if configdata:
configdata.close()
return page_map
# ============================================================================
class ListsCache(MetadataCache):
def init_obj(self, coll, path):
configdata = None
page_lists = {}
try:
configdata = load(path)
config_gen = yaml.load_all(configdata)
for config in config_gen:
for name, listobj in iteritems(config):
if not listobj.get('pages'):
continue
page_lists[name] = listobj
finally:
if configdata:
configdata.close()
return page_lists
# ============================================================================
if __name__ == "__main__":

View File

@ -0,0 +1,21 @@
<!DOCTYPE html>
<html lang="en">
<head>
<title>{% block title %}{% endblock %}</title>
<meta name="viewport" content="width=device-width, initial-scale=1">
<!-- jquery and bootstrap dependencies query view -->
<link rel="stylesheet" href="{{ host_prefix }}/{{ static_path }}/css/query.css">
<link rel="stylesheet" href="{{ host_prefix }}/{{ static_path }}/css/bootstrap.min.css">
<link rel="stylesheet" href="{{ host_prefix }}/{{ static_path }}/css/font-awesome.min.css">
<script src="{{ host_prefix }}/{{ static_path }}/js/jquery-latest.min.js"></script>
<script src="{{ host_prefix }}/{{ static_path }}/js/bootstrap.min.js"></script>
{% block head %}
{% endblock %}
</head>
<body>
{% block body %}
{% endblock %}
</body>
</html>

View File

@ -1,17 +1,12 @@
<html>
<head>
<!-- jquery and bootstrap dependencies query view -->
<link rel="stylesheet" href="{{ host_prefix }}/{{ static_path }}/css/query.css">
<link rel="stylesheet" href="{{ host_prefix }}/{{ static_path }}/css/bootstrap.min.css">
<link rel="stylesheet" href="{{ host_prefix }}/{{ static_path }}/css/font-awesome.min.css">
<script src="{{ host_prefix }}/{{ static_path }}/js/jquery-latest.min.js"></script>
<script src="{{ host_prefix }}/{{ static_path }}/js/bootstrap.min.js"></script>
{% extends "base_bootstrap.html" %}
{% block head %}
<script src="{{ host_prefix }}/{{ static_path }}/query.js"></script>
</head>
<body>
{% endblock %}
{% block body %}
<h2 class="text-center">pywb Query Results</h2>
<h3 class="text-center"><b id="count"></b> of <b>{{ url }}</b></h3>
<div id="captureYears"></div>
<script>new RenderCalendar("{{ prefix }}", "{{ url }}");</script>
</body>
</html>
{% endblock %}

View File

@ -1,21 +1,71 @@
{% if metadata %}
{% extends "base_bootstrap.html" %}
<h2>{{ metadata.title if metadata.title else coll }} Search Page</h2>
<div>
<table style="text-align: left">
{% for key, val in metadata.items() %}
<tr><th>{{ key }}:</th><td>{{ val }}</td>
{% endfor %}
</table>
{% block title %}
{{ metadata.title | default(coll) }}
{% endblock %}
{% block body %}
<h2 class="text-center">{{ metadata.title | default(coll) }} Collection</h2>
<h3>Bookmarks</h3>
<div class="row">
<div class="col-md-2">
<ul class="nav nav-pills nav-stacked" role="tablist">
<!-- All Bookmarks-->
<li role="presentation">
<a class="nav-link" data-toggle="pill" role="tab" aria-controls="all" href="#all">All Bookmarks</a>
</li>
{% for name, plist in lists | dictsort %}
<li role="presentation">
<a class="nav-link" data-toggle="pill" role="tab" aria-controls="{{ name }}" href="#{{ name }}">{{ plist.title | default(name) }}</a>
</li>
{% endfor %}
</ul>
</div>
<div class="col-md-10">
<div class="tab-content">
<!-- All Bookmarks -->
<div role="tabpanel" class="tab-pane" id="all">
<b>All Bookmarks</b>
<ul>
{% for key, page in pages | dictsort %}
<li>
<h4><a href="{{ wb_prefix + key }}">{{ page.title | default(page.url)}}</a></h4>
</li>
{% endfor %}
</ul>
</div>
{% for name, plist in lists | dictsort %}
<div role="tabpanel" class="tab-pane" id="{{ name }}">
<b>{{ plist.description }}</b>
<ul>
{% for key in plist.pages %}
{% if key in pages %}
<li>
<h4><a href="{{ wb_prefix + key }}">{{ pages[key].title | default(pages[key].url)}}</a></h4>
</li>
{% endif %}
{% endfor %}
</ul>
</div>
{% endfor %}
</div>
</div>
</div>
{% endif %}
<div class="row">
<h3>Url Search</h3>
<div class="row"><p>Search this collection by url:</p>
<form onsubmit="url = document.getElementById('search').value; if (url != '') { document.location.href = '{{ wb_prefix }}' + '*/' + url; } return false;">
<input id="search" name="search" placeholder="Enter url to search"/>
<button type="submit">Search</button>
</form>
</div>
</div>
{% endblock %}
<p>
Search this collection by url:
<form onsubmit="url = document.getElementById('search').value; if (url != '') { document.location.href = '{{ wb_prefix }}' + '*/' + url; } return false;">
<input id="search" name="search" placeholder="Enter url to search"/>
<button type="submit">Search</button>
</form>
</p>