diff --git a/pywb/rewrite/html_rewriter.py b/pywb/rewrite/html_rewriter.py
index 36601e98..a855cda2 100644
--- a/pywb/rewrite/html_rewriter.py
+++ b/pywb/rewrite/html_rewriter.py
@@ -45,6 +45,7 @@ class HTMLRewriterMixin(object):
'q': {'cite': defmod},
'ref': {'href': 'oe_'},
'script': {'src': 'js_'},
+ 'source': {'src': 'oe_'},
'div': {'data-src': defmod,
'data-uri': defmod},
'li': {'data-src': defmod,
diff --git a/pywb/rewrite/regex_rewriters.py b/pywb/rewrite/regex_rewriters.py
index 5f429339..0fffc7c2 100644
--- a/pywb/rewrite/regex_rewriters.py
+++ b/pywb/rewrite/regex_rewriters.py
@@ -126,6 +126,8 @@ class JSLinkAndLocationRewriter(JSLinkOnlyRewriter):
rules = rules + [
(r'(? ' + rewritten);
} else {
- //console.log('NOT REWRITTEN ' + url);
+ console.log('NOT REWRITTEN ' + url);
}
return rewritten;
}
+
+ //============================================
+ var HTTP_PREFIX = "http://";
+ var HTTPS_PREFIX = "https://";
+ var REL_PREFIX = "//";
+
+ var VALID_PREFIXES = [HTTP_PREFIX, HTTPS_PREFIX, REL_PREFIX];
+ var IGNORE_PREFIXES = ["#", "about:", "data:", "mailto:", "javascript:"];
+
+ //============================================
function rewrite_url_(url) {
- var http_prefix = "http://";
- var https_prefix = "https://";
- var rel_prefix = "//";
-
// If not dealing with a string, just return it
if (!url || (typeof url) != "string") {
return url;
}
- // ignore anchors
- if (starts_with(url, "#")) {
+ // just in case wombat reference made it into url!
+ url = url.replace("WB_wombat_", "");
+
+ // ignore anchors, about, data
+ if (starts_with(url, IGNORE_PREFIXES)) {
return url;
}
// If starts with prefix, no rewriting needed
// Only check replay prefix (no date) as date may be different for each
// capture
- if (starts_with(url, wb_replay_prefix)) {
+ if (starts_with(url, wb_replay_prefix) || starts_with(url, window.location.origin + wb_replay_prefix)) {
return url;
}
// If server relative url, add prefix and original host
- if (url.charAt(0) == "/" && !starts_with(url, rel_prefix)) {
+ if (url.charAt(0) == "/" && !starts_with(url, REL_PREFIX)) {
// Already a relative url, don't make any changes!
- if (url.indexOf(wb_capture_date_part) >= 0) {
+ if (wb_capture_date_part && url.indexOf(wb_capture_date_part) >= 0) {
return url;
}
@@ -116,9 +133,7 @@ WB_wombat_init = (function() {
// If full url starting with http://, add prefix
- var prefix = starts_with(url, http_prefix) ||
- starts_with(url, https_prefix) ||
- starts_with(url, rel_prefix);
+ var prefix = starts_with(url, VALID_PREFIXES);
if (prefix) {
if (starts_with(url, prefix + window.location.host + '/')) {
@@ -130,25 +145,12 @@ WB_wombat_init = (function() {
// May or may not be a hostname, call function to determine
// If it is, add the prefix and make sure port is removed
if (is_host_url(url) && !starts_with(url, window.location.host + '/')) {
- return wb_replay_date_prefix + http_prefix + url;
+ return wb_replay_date_prefix + HTTP_PREFIX + url;
}
return url;
}
- //============================================
- function copy_object_fields(obj) {
- var new_obj = {};
-
- for (prop in obj) {
- if ((typeof obj[prop]) != "function") {
- new_obj[prop] = obj[prop];
- }
- }
-
- return new_obj;
- }
-
//============================================
function extract_orig(href) {
if (!href) {
@@ -162,6 +164,20 @@ WB_wombat_init = (function() {
// extract original url from wburl
if (index > 0) {
href = href.substr(index + 1);
+ } else {
+ index = href.indexOf(wb_replay_prefix);
+ if (index >= 0) {
+ href = href.substr(index + wb_replay_prefix.length);
+ }
+ if ((href.length > 4) &&
+ (href.charAt(2) == "_") &&
+ (href.charAt(3) == "/")) {
+ href = href.substr(4);
+ }
+
+ if (!starts_with(href, "http")) {
+ href = HTTP_PREFIX + href;
+ }
}
// remove trailing slash
@@ -171,55 +187,142 @@ WB_wombat_init = (function() {
return href;
}
-
+
//============================================
- function copy_location_obj(loc) {
- var new_loc = copy_object_fields(loc);
-
- new_loc._orig_loc = loc;
- new_loc._orig_href = loc.href;
+ // Define custom property
+ function defProp(obj, prop, value, set_func, get_func) {
+ var key = "_" + prop;
+ obj[key] = value;
+
+ try {
+ Object.defineProperty(obj, prop, {
+ configurable: false,
+ enumerable: true,
+ set: function(newval) {
+ var result = set_func.call(obj, newval);
+ if (result != undefined) {
+ obj[key] = result;
+ }
+ },
+ get: function() {
+ if (get_func) {
+ return get_func.call(obj, obj[key]);
+ } else {
+ return obj[key];
+ }
+ }
+ });
+ return true;
+ } catch (e) {
+ console.log(e);
+ obj[prop] = value;
+ return false;
+ }
+ }
+
+ //============================================
+ //Define WombatLocation
+
+ function WombatLocation(loc) {
+ this._orig_loc = loc;
+ this._orig_href = loc.href;
// Rewrite replace and assign functions
- new_loc.replace = function(url) {
- this._orig_loc.replace(rewrite_url(url));
+ this.replace = function(url) {
+ return this._orig_loc.replace(rewrite_url(url));
}
- new_loc.assign = function(url) {
- this._orig_loc.assign(rewrite_url(url));
+ this.assign = function(url) {
+ return this._orig_loc.assign(rewrite_url(url));
}
- new_loc.reload = loc.reload;
-
+ this.reload = loc.reload;
+
// Adapted from:
// https://gist.github.com/jlong/2428561
var parser = document.createElement('a');
- parser.href = extract_orig(new_loc._orig_href);
+ var href = extract_orig(this._orig_href);
+ parser.href = href;
+
+ //console.log(this._orig_href + " -> " + tmp_href);
+ this._autooverride = false;
+
+ var _set_hash = function(hash) {
+ this._orig_loc.hash = hash;
+ return this._orig_loc.hash;
+ }
+
+ var _get_hash = function() {
+ return this._orig_loc.hash;
+ }
+
+ var _get_url_with_hash = function(url) {
+ return url + this._orig_loc.hash;
+ }
+
+ href = parser.href;
+ var hash = parser.hash;
+
+ if (hash) {
+ var hidx = href.lastIndexOf("#");
+ if (hidx > 0) {
+ href = href.substring(0, hidx);
+ }
+ }
+
+ if (Object.defineProperty) {
+ var res1 = defProp(this, "href", href,
+ this.assign,
+ _get_url_with_hash);
+
+ var res2 = defProp(this, "hash", parser.hash,
+ _set_hash,
+ _get_hash);
+
+ this._autooverride = res1 && res2;
+ }
+
+ this.host = parser.host;
+ this.hostname = parser.hostname;
- new_loc.hash = parser.hash;
- new_loc.host = parser.host;
- new_loc.hostname = parser.hostname;
- new_loc.href = parser.href;
-
- if (new_loc.origin) {
- new_loc.origin = parser.origin;
+ if (parser.origin) {
+ this.origin = parser.origin;
}
- new_loc.pathname = parser.pathname;
- new_loc.port = parser.port
- new_loc.protocol = parser.protocol;
- new_loc.search = parser.search;
+ this.pathname = parser.pathname;
+ this.port = parser.port
+ this.protocol = parser.protocol;
+ this.search = parser.search;
- new_loc.toString = function() {
+ this.toString = function() {
return this.href;
}
-
- return new_loc;
+
+ // Copy any remaining properties
+ for (prop in loc) {
+ if (this.hasOwnProperty(prop)) {
+ continue;
+ }
+
+ if ((typeof loc[prop]) != "function") {
+ this[prop] = loc[prop];
+ }
+ }
}
//============================================
- function update_location(req_href, orig_href, actual_location) {
- if (!req_href || req_href == orig_href) {
+ function update_location(req_href, orig_href, actual_location, wombat_loc) {
+ if (!req_href) {
return;
}
+ if (req_href == orig_href) {
+ // Reset wombat loc to the unrewritten version
+ //if (wombat_loc) {
+ // wombat_loc.href = extract_orig(orig_href);
+ //}
+ return;
+ }
+
+
ext_orig = extract_orig(orig_href);
ext_req = extract_orig(req_href);
@@ -235,19 +338,19 @@ WB_wombat_init = (function() {
}
//============================================
- function check_location_change(loc, is_top) {
- var locType = (typeof loc);
+ function check_location_change(wombat_loc, is_top) {
+ var locType = (typeof wombat_loc);
var actual_location = (is_top ? window.top.location : window.location);
- //console.log(loc.href);
-
// String has been assigned to location, so assign it
if (locType == "string") {
- update_location(loc, actual_location.href, actual_location)
-
+ update_location(wombat_loc, actual_location.href, actual_location);
+
} else if (locType == "object") {
- update_location(loc.href, loc._orig_href, actual_location);
+ update_location(wombat_loc.href,
+ wombat_loc._orig_href,
+ actual_location);
}
}
@@ -261,10 +364,21 @@ WB_wombat_init = (function() {
check_location_change(window.WB_wombat_location, false);
- if (window.self.location != window.top.location) {
+ // Only check top if its a different window
+ if (window.self.WB_wombat_location != window.top.WB_wombat_location) {
check_location_change(window.top.WB_wombat_location, true);
}
+// lochash = window.WB_wombat_location.hash;
+//
+// if (lochash) {
+// window.location.hash = lochash;
+//
+// //if (window.top.update_wb_url) {
+// // window.top.location.hash = lochash;
+// //}
+// }
+
wb_wombat_updating = false;
}
@@ -328,6 +442,7 @@ WB_wombat_init = (function() {
window.XMLHttpRequest.prototype.open = open_rewritten;
}
+ //============================================
function init_worker_override() {
if (!window.Worker) {
return;
@@ -338,6 +453,7 @@ WB_wombat_init = (function() {
window.Worker = undefined;
}
+ //============================================
function rewrite_attr(elem, name) {
if (!elem || !elem.getAttribute) {
return;
@@ -359,6 +475,7 @@ WB_wombat_init = (function() {
elem.setAttribute(name, value);
}
+ //============================================
function init_dom_override() {
if (!Node || !Node.prototype) {
return;
@@ -376,9 +493,9 @@ WB_wombat_init = (function() {
var desc;
if (child instanceof DocumentFragment) {
- desc = child.querySelectorAll("*[href],*[src]");
+ //desc = child.querySelectorAll("*[href],*[src]");
} else if (child.getElementsByTagName) {
- desc = child.getElementsByTagName("*");
+ //desc = child.getElementsByTagName("*");
}
if (desc) {
@@ -401,19 +518,55 @@ WB_wombat_init = (function() {
//============================================
function wombat_init(replay_prefix, capture_date, orig_host, timestamp) {
wb_replay_prefix = replay_prefix;
- wb_replay_date_prefix = replay_prefix + capture_date + "/";
- wb_capture_date_part = "/" + capture_date + "/";
- wb_orig_host = "http://" + orig_host;
+ wb_replay_date_prefix = replay_prefix + capture_date + "em_/";
+
+ if (capture_date.length > 0) {
+ wb_capture_date_part = "/" + capture_date + "/";
+ } else {
+ wb_capture_date_part = "";
+ }
+
+ wb_orig_host = HTTP_PREFIX + orig_host;
// Location
- window.WB_wombat_location = copy_location_obj(window.self.location);
- document.WB_wombat_location = window.WB_wombat_location;
+ var wombat_location = new WombatLocation(window.self.location);
+
+ if (wombat_location._autooverride) {
+
+ var setter = function(val) {
+ if (typeof(val) == "string") {
+ if (starts_with(val, "about:")) {
+ return undefined;
+ }
+ this._WB_wombat_location.href = val;
+ }
+ }
+
+ defProp(window, "WB_wombat_location", wombat_location, setter);
+ defProp(document, "WB_wombat_location", wombat_location, setter);
+ } else {
+ // Check quickly after page load
+ setTimeout(check_all_locations, 500);
+
+ // Check periodically every few seconds
+ setInterval(check_all_locations, 500);
+ }
+
+ var is_framed = (window.top.update_wb_url != undefined);
- //if (window.self.location != window.top.location) {
- // window.top.WB_wombat_location = copy_location_obj(window.top.location);
- //}
- window.top.WB_wombat_location = window.WB_wombat_location;
+ if (window.self.location != window.top.location) {
+ if (is_framed) {
+ window.top.WB_wombat_location = window.WB_wombat_location;
+ window.WB_wombat_top = window.self;
+ } else {
+ window.top.WB_wombat_location = new WombatLocation(window.top.location);
+
+ window.WB_wombat_top = window.top;
+ }
+ } else {
+ window.WB_wombat_top = window.top;
+ }
//if (window.opener) {
// window.opener.WB_wombat_location = copy_location_obj(window.opener.location);
@@ -421,6 +574,7 @@ WB_wombat_init = (function() {
// Domain
document.WB_wombat_domain = orig_host;
+ document.WB_wombat_referrer = extract_orig(document.referrer);
// History
copy_history_func(window.history, 'pushState');
@@ -434,15 +588,9 @@ WB_wombat_init = (function() {
init_dom_override();
// Random
- init_seeded_random(timestamp);
+ init_seeded_random(timestamp);
}
- // Check quickly after page load
- setTimeout(check_all_locations, 100);
-
- // Check periodically every few seconds
- setInterval(check_all_locations, 500);
-
return wombat_init;
})(this);