mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
Merge branch 'develop' for 0.7.6
This commit is contained in:
commit
8b5a6be956
18
CHANGES.rst
18
CHANGES.rst
@ -1,13 +1,26 @@
|
||||
pywb 0.7.6 changelist
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
* new not found Jinja2 template: Add per-collection-overridable ``not_found.html`` template, specified via ``not_found_html`` option. For missing resources, the ``not_found_html`` template is now used instead of the generic ``error_html``
|
||||
|
||||
* client-side rewrite: improved wombat rewrite of postMessage events, unrewrite target on receive, improved Vine replay
|
||||
|
||||
* packaging: allow adding multiple packages for Jinja2 template resolving
|
||||
|
||||
pywb 0.7.5 changelist
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
* Cross platform fixes to support Windows -- all tests pass on Linux, OS X and Windows now. Improved cross-platform support includes:
|
||||
|
||||
- read all files as binary to avoid line ending issues
|
||||
- properly convert url <-> file
|
||||
- avoid platform dependent apis
|
||||
- properly convert between platform dependent file paths and urls
|
||||
- add .gitattributes to ensure line endings on *.warc*, *.arc*, *.cdx* files are unaltered
|
||||
- avoid platform dependent apis (eg. %s for strftime)
|
||||
|
||||
* Change any unhandled exceptions to result in a 500 error, instead of 400.
|
||||
|
||||
* Setup: switch to ``zip_safe=True`` to allow for embedding pywb egg in one-file app with `pyinstaller <https://github.com/pyinstaller/pyinstaller>`_
|
||||
|
||||
* More compresensive client side ``src`` attribute rewriting (via wombat.js), additional server-side HTML tag rewriting.
|
||||
|
||||
|
||||
@ -39,6 +52,7 @@ pywb 0.7.1 changelist
|
||||
* Use `youtube-dl <http://rg3.github.io/youtube-dl/>`_ to find actual video streams from page urls, record video info.
|
||||
|
||||
* New, improved wombat 2.1 -- improved rewriting of dynamic content, including:
|
||||
|
||||
- setAttribute override
|
||||
- Date override sets date to replay timestamp
|
||||
- Image() object override
|
||||
|
@ -1,4 +1,4 @@
|
||||
PyWb 0.7.5
|
||||
PyWb 0.7.6
|
||||
==========
|
||||
|
||||
.. image:: https://travis-ci.org/ikreymer/pywb.png?branch=master
|
||||
|
@ -126,7 +126,15 @@ rules:
|
||||
- videoFileId
|
||||
- signature
|
||||
|
||||
# vine
|
||||
- url_prefix: 'co,vine,cdn,'
|
||||
|
||||
rewrite:
|
||||
js_rewrite_location: urls
|
||||
js_regexs:
|
||||
- match: 'window.location'
|
||||
replace: 'WB_wombat_location'
|
||||
|
||||
# youtube rules
|
||||
#=================================================================
|
||||
|
||||
|
@ -18,7 +18,7 @@ This file is part of pywb, https://github.com/ikreymer/pywb
|
||||
*/
|
||||
|
||||
//============================================
|
||||
// Wombat JS-Rewriting Library v2.1
|
||||
// Wombat JS-Rewriting Library v2.2
|
||||
//============================================
|
||||
_WBWombat = (function() {
|
||||
|
||||
@ -68,6 +68,16 @@ _WBWombat = (function() {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
//============================================
|
||||
function equals_any(string, arr) {
|
||||
for (var i = 0; i < arr.length; i++) {
|
||||
if (string === arr[i]) {
|
||||
return arr[i];
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
//============================================
|
||||
function ends_with(str, suffix) {
|
||||
if (str.indexOf(suffix, str.length - suffix.length) !== -1) {
|
||||
@ -107,6 +117,8 @@ _WBWombat = (function() {
|
||||
|
||||
var SRC_TAGS = ["IMG", "SCRIPT", "VIDEO", "AUDIO", "SOURCE", "EMBED", "INPUT"];
|
||||
|
||||
var REWRITE_ATTRS = ["src", "href", "poster"];
|
||||
|
||||
//============================================
|
||||
function rewrite_url_(url) {
|
||||
// If undefined, just return it
|
||||
@ -158,12 +170,19 @@ _WBWombat = (function() {
|
||||
return wb_replay_date_prefix + wb_orig_host + url;
|
||||
}
|
||||
|
||||
// If full url starting with http://, add prefix
|
||||
|
||||
// If full url starting with http://, https:// or //
|
||||
// add rewrite prefix
|
||||
var prefix = starts_with(url, VALID_PREFIXES);
|
||||
|
||||
if (prefix) {
|
||||
// if already rewriting url, must still check scheme
|
||||
if (starts_with(url, prefix + window.location.host + '/')) {
|
||||
var curr_scheme = window.location.protocol + '//';
|
||||
|
||||
// replace scheme to ensure using the correct server scheme
|
||||
if (starts_with(url, wb_orig_scheme) && (wb_orig_scheme != curr_scheme)) {
|
||||
url = curr_scheme + url.substring(wb_orig_scheme.length);
|
||||
}
|
||||
return url;
|
||||
}
|
||||
return wb_replay_date_prefix + url;
|
||||
@ -254,7 +273,11 @@ _WBWombat = (function() {
|
||||
});
|
||||
return true;
|
||||
} catch (e) {
|
||||
console.log(e);
|
||||
var info = "Can't redefine prop " + prop;
|
||||
if (obj && obj.tagName) {
|
||||
info += " on " + obj.tagName;
|
||||
}
|
||||
console.log(info);
|
||||
obj[prop] = value;
|
||||
return false;
|
||||
}
|
||||
@ -480,13 +503,6 @@ _WBWombat = (function() {
|
||||
async = true;
|
||||
}
|
||||
|
||||
// extra check for correct scheme here.. maybe move to rewrite_url..
|
||||
var curr_scheme = window.location.protocol + '//';
|
||||
|
||||
if (starts_with(url, wb_orig_scheme) && (wb_orig_scheme != curr_scheme)) {
|
||||
url = curr_scheme + url.substring(wb_orig_scheme.length);
|
||||
}
|
||||
|
||||
return orig.call(this, method, url, async, user, password);
|
||||
}
|
||||
|
||||
@ -507,7 +523,7 @@ _WBWombat = (function() {
|
||||
Element.prototype.setAttribute = function(name, value) {
|
||||
if (name) {
|
||||
var lowername = name.toLowerCase();
|
||||
if (lowername == "src" || lowername == "href") {
|
||||
if (equals_any(lowername, REWRITE_ATTRS)) {
|
||||
if (!this._no_rewrite) {
|
||||
value = rewrite_url(value);
|
||||
}
|
||||
@ -518,6 +534,21 @@ _WBWombat = (function() {
|
||||
};
|
||||
}
|
||||
|
||||
//============================================
|
||||
function init_createElementNS_fix()
|
||||
{
|
||||
if (!document.createElementNS) {
|
||||
return;
|
||||
}
|
||||
|
||||
document._orig_createElementNS = document.createElementNS;
|
||||
document.createElementNS = function(namespaceURI, qualifiedName)
|
||||
{
|
||||
namespaceURI = extract_orig(namespaceURI);
|
||||
return document._orig_createElementNS(namespaceURI, qualifiedName);
|
||||
}
|
||||
}
|
||||
|
||||
//============================================
|
||||
function init_image_override() {
|
||||
window.__Image = window.Image;
|
||||
@ -612,8 +643,6 @@ _WBWombat = (function() {
|
||||
//============================================
|
||||
function rewrite_style(value)
|
||||
{
|
||||
//console.log("style rewrite: " + value);
|
||||
|
||||
STYLE_REGEX = /(url\s*\(\s*[\\"']*)([^)'"]+)([\\"']*\s*\))/g;
|
||||
|
||||
function style_replacer(match, n1, n2, n3, offset, string) {
|
||||
@ -694,7 +723,7 @@ _WBWombat = (function() {
|
||||
}
|
||||
|
||||
override_attr(created, "src");
|
||||
} else if (created.tagName && starts_with(created.tagName, SRC_TAGS)) {
|
||||
} else if (created.tagName && equals_any(created.tagName, SRC_TAGS)) {
|
||||
override_attr(created, "src");
|
||||
}
|
||||
|
||||
@ -710,22 +739,28 @@ _WBWombat = (function() {
|
||||
//============================================
|
||||
function init_postmessage_override()
|
||||
{
|
||||
if (!Window.prototype.postMessage) {
|
||||
if (!window.postMessage) {
|
||||
return;
|
||||
}
|
||||
|
||||
var orig = Window.prototype.postMessage;
|
||||
var orig = window.postMessage;
|
||||
|
||||
var postmessage_rewritten = function(message, targetOrigin, transfer) {
|
||||
message = {"origin": targetOrigin, "message": message};
|
||||
|
||||
if (targetOrigin && targetOrigin != "*") {
|
||||
targetOrigin = window.location.origin;
|
||||
}
|
||||
|
||||
|
||||
return orig.call(this, message, targetOrigin, transfer);
|
||||
}
|
||||
|
||||
window.postMessage = postmessage_rewritten;
|
||||
window.Window.prototype.postMessage = postmessage_rewritten;
|
||||
|
||||
if (Window.prototype.postMessage) {
|
||||
window.Window.prototype.postMessage = postmessage_rewritten;
|
||||
}
|
||||
|
||||
for (var i = 0; i < window.frames.length; i++) {
|
||||
try {
|
||||
@ -734,6 +769,30 @@ _WBWombat = (function() {
|
||||
console.log(e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
window._orig_addEventListener = window.addEventListener;
|
||||
|
||||
window.addEventListener = function(type, listener, useCapture) {
|
||||
if (type == "message") {
|
||||
var orig_listener = listener;
|
||||
listener = function(event) {
|
||||
|
||||
var ne = new MessageEvent("message",
|
||||
{"bubbles": event.bubbles,
|
||||
"cancelable": event.cancelable,
|
||||
"data": event.data.message,
|
||||
"origin": event.data.origin,
|
||||
"lastEventId": event.lastEventId,
|
||||
"source": event.source,
|
||||
"ports": event.ports});
|
||||
|
||||
return orig_listener(ne);
|
||||
}
|
||||
}
|
||||
|
||||
return window._orig_addEventListener(type, listener, useCapture);
|
||||
}
|
||||
}
|
||||
|
||||
//============================================
|
||||
@ -882,6 +941,7 @@ _WBWombat = (function() {
|
||||
document.WB_wombat_domain = orig_host;
|
||||
document.WB_wombat_referrer = extract_orig(document.referrer);
|
||||
|
||||
|
||||
// History
|
||||
copy_history_func(window.history, 'pushState');
|
||||
copy_history_func(window.history, 'replaceState');
|
||||
@ -902,6 +962,9 @@ _WBWombat = (function() {
|
||||
// setAttribute
|
||||
init_setAttribute_override();
|
||||
|
||||
// ensure namespace urls are NOT rewritten
|
||||
init_createElementNS_fix();
|
||||
|
||||
// Image
|
||||
init_image_override();
|
||||
|
||||
|
@ -9,10 +9,3 @@
|
||||
</pre>
|
||||
</p>
|
||||
{% endif %}
|
||||
|
||||
{% if env.pywb_proxy_magic and err_url and status == '404 Not Found' %}
|
||||
<p>
|
||||
<a href="//select.{{ env.pywb_proxy_magic }}/{{ err_url }}">Try Different Collection</a>
|
||||
</p>
|
||||
{% endif %}
|
||||
|
||||
|
10
pywb/ui/not_found.html
Normal file
10
pywb/ui/not_found.html
Normal file
@ -0,0 +1,10 @@
|
||||
<h2>Url Not Found</h2>
|
||||
|
||||
The url <b>{{ url }}</b> could not be found in this collection.
|
||||
|
||||
{% if env.pywb_proxy_magic and url %}
|
||||
<p>
|
||||
<a href="//select.{{ env.pywb_proxy_magic }}/{{ url }}">Try Different Collection</a>
|
||||
</p>
|
||||
{% endif %}
|
||||
|
@ -24,7 +24,8 @@ function ts_to_date(ts, is_gmt)
|
||||
</script>
|
||||
</head>
|
||||
<body>
|
||||
<h2>pywb Sample Calendar Results</h2>
|
||||
<h2>pywb Query Results</h2>
|
||||
{% if cdx_lines | length > 0 %}
|
||||
<b>{{ cdx_lines | length }}</b> captures of <b>{{ url }}</b>
|
||||
<table id="captures" style="border-spacing: 10px;">
|
||||
<tr>
|
||||
@ -47,5 +48,8 @@ function ts_to_date(ts, is_gmt)
|
||||
<p>
|
||||
<i><b>* Unique captures are bold.</b> Other captures are duplicates of a previous capture.</i>
|
||||
</p>
|
||||
{% else %}
|
||||
<i>No captures found for <b>{{ url }}</b></i>
|
||||
{% endif %}
|
||||
</body>
|
||||
</html>
|
||||
|
@ -49,12 +49,9 @@ class SearchPageWbUrlHandler(WbUrlHandler):
|
||||
self.banner_html = None
|
||||
|
||||
def render_search_page(self, wbrequest, **kwargs):
|
||||
if self.search_view:
|
||||
return self.search_view.render_response(wbrequest=wbrequest,
|
||||
prefix=wbrequest.wb_prefix,
|
||||
**kwargs)
|
||||
else:
|
||||
return WbResponse.text_response('No Lookup Url Specified')
|
||||
return self.search_view.render_response(wbrequest=wbrequest,
|
||||
prefix=wbrequest.wb_prefix,
|
||||
**kwargs)
|
||||
|
||||
def __call__(self, wbrequest):
|
||||
# root search page
|
||||
@ -110,6 +107,9 @@ class WBHandler(SearchPageWbUrlHandler):
|
||||
super(WBHandler, self).__init__(config)
|
||||
|
||||
self.index_reader = query_handler
|
||||
self.not_found_view = (J2TemplateView.
|
||||
create_template(config.get('not_found_html'),
|
||||
'Not Found Error'))
|
||||
|
||||
cookie_maker = config.get('cookie_maker')
|
||||
record_loader = ArcWarcRecordLoader(cookie_maker=cookie_maker)
|
||||
@ -152,12 +152,19 @@ class WBHandler(SearchPageWbUrlHandler):
|
||||
cdx_callback)
|
||||
|
||||
def handle_not_found(self, wbrequest, nfe):
|
||||
if (not self.fallback_handler or
|
||||
wbrequest.wb_url.is_query() or
|
||||
wbrequest.wb_url.is_identity):
|
||||
raise
|
||||
# check fallback: only for replay queries and not for identity
|
||||
if (self.fallback_handler and
|
||||
not wbrequest.wb_url.is_query() and
|
||||
not wbrequest.wb_url.is_identity):
|
||||
return self.fallback_handler(wbrequest)
|
||||
|
||||
return self.fallback_handler(wbrequest)
|
||||
# if capture query, just return capture page
|
||||
if wbrequest.wb_url.is_query():
|
||||
return self.index_reader.make_cdx_response(wbrequest, [], 'html')
|
||||
else:
|
||||
return self.not_found_view.render_response(status='404 Not Found',
|
||||
env=wbrequest.env,
|
||||
url=wbrequest.wb_url.url)
|
||||
|
||||
def __str__(self):
|
||||
return 'Web Archive Replay Handler'
|
||||
|
@ -34,6 +34,7 @@ DEFAULTS = {
|
||||
'search_html': 'ui/search.html',
|
||||
'home_html': 'ui/index.html',
|
||||
'error_html': 'ui/error.html',
|
||||
'not_found_html': 'ui/not_found.html',
|
||||
|
||||
'proxy_select_html': 'ui/proxy_select.html',
|
||||
'proxy_cert_download_html': 'ui/proxy_cert_download.html',
|
||||
|
@ -61,7 +61,7 @@ def is_wb_handler(obj):
|
||||
#=================================================================
|
||||
class J2TemplateView(object):
|
||||
env_globals = {'static_path': 'static/default',
|
||||
'package': 'pywb'}
|
||||
'packages': ['pywb']}
|
||||
|
||||
def __init__(self, filename):
|
||||
template_dir, template_file = path.split(filename)
|
||||
@ -84,8 +84,11 @@ class J2TemplateView(object):
|
||||
# add relative and absolute path loaders for banner support
|
||||
loaders.append(FileSystemLoader('.'))
|
||||
loaders.append(FileSystemLoader('/'))
|
||||
loaders.append(PackageLoader(self.env_globals['package'],
|
||||
template_dir))
|
||||
|
||||
# add loaders for all specified packages
|
||||
for package in self.env_globals['packages']:
|
||||
loaders.append(PackageLoader(package,
|
||||
template_dir))
|
||||
return loaders
|
||||
|
||||
def render_to_string(self, **kwargs):
|
||||
|
2
setup.py
2
setup.py
@ -34,7 +34,7 @@ class PyTest(TestCommand):
|
||||
|
||||
setup(
|
||||
name='pywb',
|
||||
version='0.7.5',
|
||||
version='0.7.6',
|
||||
url='https://github.com/ikreymer/pywb',
|
||||
author='Ilya Kreymer',
|
||||
author_email='ikreymer@gmail.com',
|
||||
|
@ -86,6 +86,10 @@ home_html: ui/index.html
|
||||
# if omitted, a text response is returned
|
||||
error_html: ui/error.html
|
||||
|
||||
|
||||
# template for 404 not found error, may be customized per collection
|
||||
not_found_html: ui/not_found.html
|
||||
|
||||
# ==== Other Paths ====
|
||||
|
||||
# Rewrite urls with absolute paths instead of relative
|
||||
|
@ -77,6 +77,13 @@ class TestWb:
|
||||
# 17 Captures + header
|
||||
assert len(resp.html.find_all('tr')) == 18
|
||||
|
||||
def test_calendar_not_found(self):
|
||||
# query with no results
|
||||
resp = self.testapp.get('/pywb/*/http://not-exist.example.com')
|
||||
self._assert_basic_html(resp)
|
||||
assert 'No captures found' in resp.body, resp.body
|
||||
assert len(resp.html.find_all('tr')) == 0
|
||||
|
||||
def test_cdx_query(self):
|
||||
resp = self.testapp.get('/pywb/cdx_/*/http://www.iana.org/')
|
||||
self._assert_basic_text(resp)
|
||||
@ -374,6 +381,11 @@ class TestWb:
|
||||
assert resp.status_int == 403
|
||||
assert 'Excluded' in resp.body
|
||||
|
||||
def test_replay_not_found(self):
|
||||
resp = self.testapp.head('/pywb/http://not-exist.example.com', status=404)
|
||||
assert resp.content_type == 'text/html'
|
||||
assert resp.status_int == 404
|
||||
|
||||
def test_static_content(self):
|
||||
resp = self.testapp.get('/static/test/route/wb.css')
|
||||
assert resp.status_int == 200
|
||||
|
Loading…
x
Reference in New Issue
Block a user