From 71d9e58d7c9dc12de11e6de9702f285e25f2db62 Mon Sep 17 00:00:00 2001
From: Ilya Kreymer
Date: Mon, 12 Jan 2015 00:38:51 -0800
Subject: [PATCH 1/7] fixup changes for 0.7.5
---
CHANGES.rst | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/CHANGES.rst b/CHANGES.rst
index 7d7e6d7c..d5a9d93d 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -2,12 +2,16 @@ pywb 0.7.5 changelist
~~~~~~~~~~~~~~~~~~~~~
* Cross platform fixes to support Windows -- all tests pass on Linux, OS X and Windows now. Improved cross-platform support includes:
+
- read all files as binary to avoid line ending issues
- - properly convert url <-> file
- - avoid platform dependent apis
+ - properly convert between platform dependent file paths and urls
+ - add .gitattributes to ensure line endings on *.warc*, *.arc*, *.cdx* files are unaltered
+ - avoid platform dependent apis (eg. %s for strftime)
* Change any unhandled exceptions to result in a 500 error, instead of 400.
+* Setup: switch to ``zip_safe=True`` to allow for embedding pywb egg in one-file app with `pyinstaller `_
+
* More compresensive client side ``src`` attribute rewriting (via wombat.js), additional server-side HTML tag rewriting.
@@ -39,6 +43,7 @@ pywb 0.7.1 changelist
* Use `youtube-dl `_ to find actual video streams from page urls, record video info.
* New, improved wombat 2.1 -- improved rewriting of dynamic content, including:
+
- setAttribute override
- Date override sets date to replay timestamp
- Image() object override
From 48b7751f80dc78af1799762a39f742bab8feb90a Mon Sep 17 00:00:00 2001
From: Ilya Kreymer
Date: Mon, 19 Jan 2015 21:54:11 -0500
Subject: [PATCH 2/7] bump version to 0.7.6 jinja2: allow adding multiple
packages to search path
---
CHANGES.rst | 5 +++++
README.rst | 2 +-
pywb/webapp/views.py | 9 ++++++---
setup.py | 2 +-
4 files changed, 13 insertions(+), 5 deletions(-)
diff --git a/CHANGES.rst b/CHANGES.rst
index d5a9d93d..0d509409 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -1,3 +1,8 @@
+pywb 0.7.6 changelist
+~~~~~~~~~~~~~~~~~~~~~
+
+* packaging: allow adding multiple packages for Jinja2 template resolving
+
pywb 0.7.5 changelist
~~~~~~~~~~~~~~~~~~~~~
diff --git a/README.rst b/README.rst
index 17162d5a..a445f11f 100644
--- a/README.rst
+++ b/README.rst
@@ -1,4 +1,4 @@
-PyWb 0.7.5
+PyWb 0.7.6
==========
.. image:: https://travis-ci.org/ikreymer/pywb.png?branch=develop
diff --git a/pywb/webapp/views.py b/pywb/webapp/views.py
index 23c528e2..9f2dd1e7 100644
--- a/pywb/webapp/views.py
+++ b/pywb/webapp/views.py
@@ -61,7 +61,7 @@ def is_wb_handler(obj):
#=================================================================
class J2TemplateView(object):
env_globals = {'static_path': 'static/default',
- 'package': 'pywb'}
+ 'packages': ['pywb']}
def __init__(self, filename):
template_dir, template_file = path.split(filename)
@@ -84,8 +84,11 @@ class J2TemplateView(object):
# add relative and absolute path loaders for banner support
loaders.append(FileSystemLoader('.'))
loaders.append(FileSystemLoader('/'))
- loaders.append(PackageLoader(self.env_globals['package'],
- template_dir))
+
+ # add loaders for all specified packages
+ for package in self.env_globals['packages']:
+ loaders.append(PackageLoader(package,
+ template_dir))
return loaders
def render_to_string(self, **kwargs):
diff --git a/setup.py b/setup.py
index e53c340e..2efb448b 100755
--- a/setup.py
+++ b/setup.py
@@ -34,7 +34,7 @@ class PyTest(TestCommand):
setup(
name='pywb',
- version='0.7.5',
+ version='0.7.6',
url='https://github.com/ikreymer/pywb',
author='Ilya Kreymer',
author_email='ikreymer@gmail.com',
From c9b2e3e69e710dac4433f9dd87d938687f32b5ab Mon Sep 17 00:00:00 2001
From: Ilya Kreymer
Date: Thu, 22 Jan 2015 16:42:52 -0500
Subject: [PATCH 3/7] wombat 2.2 improvements: * for postMessage, add receive
message overrides which uses original origin to fix message passing tests
that check for origin
* for createElementNS, ensure that the namespace url is not rewritten
* add equals_any() method, add "poster" attr to attr rewriting list
(solves several issues for vine replay, #62)
---
pywb/static/wombat.js | 99 +++++++++++++++++++++++++++++++++++--------
1 file changed, 81 insertions(+), 18 deletions(-)
diff --git a/pywb/static/wombat.js b/pywb/static/wombat.js
index dc9b2205..f0582b43 100644
--- a/pywb/static/wombat.js
+++ b/pywb/static/wombat.js
@@ -18,7 +18,7 @@ This file is part of pywb, https://github.com/ikreymer/pywb
*/
//============================================
-// Wombat JS-Rewriting Library v2.1
+// Wombat JS-Rewriting Library v2.2
//============================================
_WBWombat = (function() {
@@ -68,6 +68,16 @@ _WBWombat = (function() {
return undefined;
}
+ //============================================
+ function equals_any(string, arr) {
+ for (var i = 0; i < arr.length; i++) {
+ if (string === arr[i]) {
+ return arr[i];
+ }
+ }
+ return undefined;
+ }
+
//============================================
function ends_with(str, suffix) {
if (str.indexOf(suffix, str.length - suffix.length) !== -1) {
@@ -107,6 +117,8 @@ _WBWombat = (function() {
var SRC_TAGS = ["IMG", "SCRIPT", "VIDEO", "AUDIO", "SOURCE", "EMBED", "INPUT"];
+ var REWRITE_ATTRS = ["src", "href", "poster"];
+
//============================================
function rewrite_url_(url) {
// If undefined, just return it
@@ -158,12 +170,19 @@ _WBWombat = (function() {
return wb_replay_date_prefix + wb_orig_host + url;
}
- // If full url starting with http://, add prefix
-
+ // If full url starting with http://, https:// or //
+ // add rewrite prefix
var prefix = starts_with(url, VALID_PREFIXES);
if (prefix) {
+ // if already rewriting url, must still check scheme
if (starts_with(url, prefix + window.location.host + '/')) {
+ var curr_scheme = window.location.protocol + '//';
+
+ // replace scheme to ensure using the correct server scheme
+ if (starts_with(url, wb_orig_scheme) && (wb_orig_scheme != curr_scheme)) {
+ url = curr_scheme + url.substring(wb_orig_scheme.length);
+ }
return url;
}
return wb_replay_date_prefix + url;
@@ -254,7 +273,11 @@ _WBWombat = (function() {
});
return true;
} catch (e) {
- console.log(e);
+ var info = "Can't redefine prop " + prop;
+ if (obj && obj.tagName) {
+ info += " on " + obj.tagName;
+ }
+ console.log(info);
obj[prop] = value;
return false;
}
@@ -480,13 +503,6 @@ _WBWombat = (function() {
async = true;
}
- // extra check for correct scheme here.. maybe move to rewrite_url..
- var curr_scheme = window.location.protocol + '//';
-
- if (starts_with(url, wb_orig_scheme) && (wb_orig_scheme != curr_scheme)) {
- url = curr_scheme + url.substring(wb_orig_scheme.length);
- }
-
return orig.call(this, method, url, async, user, password);
}
@@ -507,7 +523,7 @@ _WBWombat = (function() {
Element.prototype.setAttribute = function(name, value) {
if (name) {
var lowername = name.toLowerCase();
- if (lowername == "src" || lowername == "href") {
+ if (equals_any(lowername, REWRITE_ATTRS)) {
if (!this._no_rewrite) {
value = rewrite_url(value);
}
@@ -518,6 +534,21 @@ _WBWombat = (function() {
};
}
+ //============================================
+ function init_createElementNS_fix()
+ {
+ if (!document.createElementNS) {
+ return;
+ }
+
+ document._orig_createElementNS = document.createElementNS;
+ document.createElementNS = function(namespaceURI, qualifiedName)
+ {
+ namespaceURI = extract_orig(namespaceURI);
+ return document._orig_createElementNS(namespaceURI, qualifiedName);
+ }
+ }
+
//============================================
function init_image_override() {
window.__Image = window.Image;
@@ -612,8 +643,6 @@ _WBWombat = (function() {
//============================================
function rewrite_style(value)
{
- //console.log("style rewrite: " + value);
-
STYLE_REGEX = /(url\s*\(\s*[\\"']*)([^)'"]+)([\\"']*\s*\))/g;
function style_replacer(match, n1, n2, n3, offset, string) {
@@ -694,7 +723,7 @@ _WBWombat = (function() {
}
override_attr(created, "src");
- } else if (created.tagName && starts_with(created.tagName, SRC_TAGS)) {
+ } else if (created.tagName && equals_any(created.tagName, SRC_TAGS)) {
override_attr(created, "src");
}
@@ -710,22 +739,28 @@ _WBWombat = (function() {
//============================================
function init_postmessage_override()
{
- if (!Window.prototype.postMessage) {
+ if (!window.postMessage) {
return;
}
- var orig = Window.prototype.postMessage;
+ var orig = window.postMessage;
var postmessage_rewritten = function(message, targetOrigin, transfer) {
+ message = {"origin": targetOrigin, "message": message};
+
if (targetOrigin && targetOrigin != "*") {
targetOrigin = window.location.origin;
}
+
return orig.call(this, message, targetOrigin, transfer);
}
window.postMessage = postmessage_rewritten;
- window.Window.prototype.postMessage = postmessage_rewritten;
+
+ if (Window.prototype.postMessage) {
+ window.Window.prototype.postMessage = postmessage_rewritten;
+ }
for (var i = 0; i < window.frames.length; i++) {
try {
@@ -734,6 +769,30 @@ _WBWombat = (function() {
console.log(e);
}
}
+
+
+ window._orig_addEventListener = window.addEventListener;
+
+ window.addEventListener = function(type, listener, useCapture) {
+ if (type == "message") {
+ var orig_listener = listener;
+ listener = function(event) {
+
+ var ne = new MessageEvent("message",
+ {"bubbles": event.bubbles,
+ "cancelable": event.cancelable,
+ "data": event.data.message,
+ "origin": event.data.origin,
+ "lastEventId": event.lastEventId,
+ "source": event.source,
+ "ports": event.ports});
+
+ return orig_listener(ne);
+ }
+ }
+
+ return window._orig_addEventListener(type, listener, useCapture);
+ }
}
//============================================
@@ -882,6 +941,7 @@ _WBWombat = (function() {
document.WB_wombat_domain = orig_host;
document.WB_wombat_referrer = extract_orig(document.referrer);
+
// History
copy_history_func(window.history, 'pushState');
copy_history_func(window.history, 'replaceState');
@@ -902,6 +962,9 @@ _WBWombat = (function() {
// setAttribute
init_setAttribute_override();
+ // ensure namespace urls are NOT rewritten
+ init_createElementNS_fix();
+
// Image
init_image_override();
From 80fd47ba3e21e02fe58151afa2cf57a83e6c3a7f Mon Sep 17 00:00:00 2001
From: Ilya Kreymer
Date: Thu, 22 Jan 2015 16:45:09 -0500
Subject: [PATCH 4/7] add rules for vine (#62)
---
pywb/rules.yaml | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/pywb/rules.yaml b/pywb/rules.yaml
index cb87a843..325bbd9d 100644
--- a/pywb/rules.yaml
+++ b/pywb/rules.yaml
@@ -126,7 +126,15 @@ rules:
- videoFileId
- signature
+ # vine
+ - url_prefix: 'co,vine,cdn,'
+ rewrite:
+ js_rewrite_location: urls
+ js_regexs:
+ - match: 'window.location'
+ replace: 'WB_wombat_location'
+
# youtube rules
#=================================================================
From 38e3bbbaefc4c7bcc13623822b92411661088bc9 Mon Sep 17 00:00:00 2001
From: Ilya Kreymer
Date: Sat, 24 Jan 2015 12:32:50 -0800
Subject: [PATCH 5/7] templates: add new 'not_found.html' template, which will
be called for any missing replay request instead of default error.html
'not_found_html' settable in the config per collection, as per #65 for not
found index query, still use query.html but add condition to check for 0
results add more query and replay not found remove unused conditional (for
search_view -- always exists)
---
pywb/ui/error.html | 7 -------
pywb/ui/not_found.html | 10 ++++++++++
pywb/ui/query.html | 6 +++++-
pywb/webapp/handlers.py | 29 ++++++++++++++++++-----------
pywb/webapp/pywb_init.py | 1 +
tests/test_config.yaml | 4 ++++
tests/test_integration.py | 12 ++++++++++++
7 files changed, 50 insertions(+), 19 deletions(-)
create mode 100644 pywb/ui/not_found.html
diff --git a/pywb/ui/error.html b/pywb/ui/error.html
index b122fc38..b3a8c478 100644
--- a/pywb/ui/error.html
+++ b/pywb/ui/error.html
@@ -9,10 +9,3 @@
{% endif %}
-
-{% if env.pywb_proxy_magic and err_url and status == '404 Not Found' %}
-
-Try Different Collection
-
-{% endif %}
-
diff --git a/pywb/ui/not_found.html b/pywb/ui/not_found.html
new file mode 100644
index 00000000..39faa3b3
--- /dev/null
+++ b/pywb/ui/not_found.html
@@ -0,0 +1,10 @@
+Url Not Found
+
+The url {{ url }} could not be found in this collection.
+
+{% if env.pywb_proxy_magic and url %}
+
+Try Different Collection
+
+{% endif %}
+
diff --git a/pywb/ui/query.html b/pywb/ui/query.html
index 2d1f5c86..3e54534b 100644
--- a/pywb/ui/query.html
+++ b/pywb/ui/query.html
@@ -24,7 +24,8 @@ function ts_to_date(ts, is_gmt)
- pywb Sample Calendar Results
+ pywb Query Results
+ {% if cdx_lines | length > 0 %}
{{ cdx_lines | length }} captures of {{ url }}
@@ -47,5 +48,8 @@ function ts_to_date(ts, is_gmt)
* Unique captures are bold. Other captures are duplicates of a previous capture.
+ {% else %}
+ No captures found for {{ url }}
+ {% endif %}