mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
video: add video rewriting use vidrw client side and youtube-dl on the server
add vi_ modifier: -on record, gets video_info from youtube-dl, sends to proxy, if any, via PUTMETA to create metadata record -on playback, fetches special metadata record with video info and returns to client as json -vidrw script: fetches video info, if any, and attempts to replace iframe and embed tags (so far) which are videos wombat: export extract_url function, fix spaces and use object instance semantics
This commit is contained in:
parent
a3b931b45e
commit
5b9dcba15f
@ -138,11 +138,7 @@ class HttpsUrlRewriter(UrlRewriter):
|
|||||||
HTTPS = 'https://'
|
HTTPS = 'https://'
|
||||||
|
|
||||||
def rewrite(self, url, mod=None):
|
def rewrite(self, url, mod=None):
|
||||||
if url.startswith(self.HTTPS):
|
return self.remove_https(url)
|
||||||
result = self.HTTP + url[len(self.HTTPS):]
|
|
||||||
return result
|
|
||||||
else:
|
|
||||||
return url
|
|
||||||
|
|
||||||
def get_new_url(self, **kwargs):
|
def get_new_url(self, **kwargs):
|
||||||
return kwargs.get('url')
|
return kwargs.get('url')
|
||||||
@ -155,3 +151,12 @@ class HttpsUrlRewriter(UrlRewriter):
|
|||||||
|
|
||||||
def deprefix_url(self):
|
def deprefix_url(self):
|
||||||
return self.wburl.url
|
return self.wburl.url
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def remove_https(url):
|
||||||
|
rw = HttpsUrlRewriter
|
||||||
|
if url.startswith(rw.HTTPS):
|
||||||
|
result = rw.HTTP + url[len(rw.HTTPS):]
|
||||||
|
return result
|
||||||
|
else:
|
||||||
|
return url
|
||||||
|
@ -115,7 +115,22 @@ rules:
|
|||||||
- ownerId
|
- ownerId
|
||||||
- videoFileId
|
- videoFileId
|
||||||
- signature
|
- signature
|
||||||
|
|
||||||
|
|
||||||
|
# youtube rules
|
||||||
|
#=================================================================
|
||||||
|
|
||||||
|
- url_prefix: 'com,youtube)/get_video_info'
|
||||||
|
|
||||||
|
fuzzy_lookup:
|
||||||
|
- video_id
|
||||||
|
- html5
|
||||||
|
|
||||||
|
|
||||||
|
- url_prefix: 'com,googlevideo,'
|
||||||
|
|
||||||
|
fuzzy_lookup: 'com,googlevideo.*/videoplayback?.*(id=[^&]+).*(range=[^&]+)'
|
||||||
|
|
||||||
|
|
||||||
# testing rules -- not for valid domain
|
# testing rules -- not for valid domain
|
||||||
#=================================================================
|
#=================================================================
|
||||||
|
110
pywb/static/vidrw.js
Normal file
110
pywb/static/vidrw.js
Normal file
@ -0,0 +1,110 @@
|
|||||||
|
/*
|
||||||
|
Copyright(c) 2013-2014 Ilya Kreymer. Released under the GNU General Public License.
|
||||||
|
|
||||||
|
This file is part of pywb, https://github.com/ikreymer/pywb
|
||||||
|
|
||||||
|
pywb is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
pywb is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with pywb. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// VidRw 1.0 -- video rewriting
|
||||||
|
|
||||||
|
__wbvidrw = (function() {
|
||||||
|
|
||||||
|
var already_checked = false;
|
||||||
|
|
||||||
|
function check_videos() {
|
||||||
|
if (already_checked) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
var iframes = document.getElementsByTagName("iframe");
|
||||||
|
|
||||||
|
for (var i = 0; i < iframes.length; i++) {
|
||||||
|
already_checked = true;
|
||||||
|
check_replacement(iframes[i], iframes[i].getAttribute("src"));
|
||||||
|
}
|
||||||
|
|
||||||
|
var embeds = document.getElementsByTagName("embed");
|
||||||
|
|
||||||
|
for (var i = 0; i < embeds.length; i++) {
|
||||||
|
already_checked = true;
|
||||||
|
check_replacement(embeds[i], embeds[i].getAttribute("src"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function check_replacement(elem, src) {
|
||||||
|
if (!src) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
src = _wb_wombat.extract_orig(src);
|
||||||
|
|
||||||
|
var xhr = new XMLHttpRequest();
|
||||||
|
xhr._no_rewrite = true;
|
||||||
|
xhr.open('GET', wbinfo.prefix + 'vi_/' + src, true);
|
||||||
|
xhr.onload = function() {
|
||||||
|
if (xhr.status == 200) {
|
||||||
|
do_replace_video(elem, JSON.parse(xhr.responseText));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
xhr.send();
|
||||||
|
}
|
||||||
|
|
||||||
|
function do_replace_video(elem, video_info) {
|
||||||
|
// TODO: select based on size?
|
||||||
|
var video_url = video_info.url;
|
||||||
|
video_url = wbinfo.prefix + video_url;
|
||||||
|
|
||||||
|
console.log("REPLACING: " + video_url);
|
||||||
|
var width = elem.getAttribute("width");
|
||||||
|
var height = elem.getAttribute("height");
|
||||||
|
|
||||||
|
console.log(video_info.ext);
|
||||||
|
|
||||||
|
// Try HTML5 Video
|
||||||
|
var htmlvideo = document.createElement("video");
|
||||||
|
|
||||||
|
htmlvideo.setAttribute("src", video_url);
|
||||||
|
htmlvideo.setAttribute("width", width);
|
||||||
|
htmlvideo.setAttribute("height", height);
|
||||||
|
htmlvideo.setAttribute("controls", "1");
|
||||||
|
htmlvideo.style.backgroundColor = "#000";
|
||||||
|
|
||||||
|
if (video_info.thumbnail) {
|
||||||
|
var thumbnail = wbinfo.prefix + video_info.thumbnail;
|
||||||
|
htmlvideo.setAttribute("thumbnail", thumbnail);
|
||||||
|
}
|
||||||
|
|
||||||
|
htmlvideo.addEventListener("error", function() {
|
||||||
|
console.log("html5 video error");
|
||||||
|
});
|
||||||
|
|
||||||
|
htmlvideo.addEventListener("loadstart", function() {
|
||||||
|
console.log("html5 video success");
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(elem.tagName);
|
||||||
|
|
||||||
|
if (elem.tagName.toLowerCase() == "iframe") {
|
||||||
|
elem.parentNode.replaceChild(htmlvideo, elem);
|
||||||
|
} else if (elem.tagName.toLowerCase() == "embed") {
|
||||||
|
if (elem.parentNode && elem.parentElement.tagName.toLowerCase() == "object") {
|
||||||
|
elem = elem.parentNode;
|
||||||
|
}
|
||||||
|
elem.parentNode.replaceChild(htmlvideo, elem);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
document.addEventListener("DOMContentLoaded", check_videos);
|
||||||
|
})();
|
@ -20,7 +20,7 @@ This file is part of pywb, https://github.com/ikreymer/pywb
|
|||||||
//============================================
|
//============================================
|
||||||
// Wombat JS-Rewriting Library v2.0
|
// Wombat JS-Rewriting Library v2.0
|
||||||
//============================================
|
//============================================
|
||||||
WB_wombat_init = (function() {
|
var _WBWombat = (function() {
|
||||||
|
|
||||||
// Globals
|
// Globals
|
||||||
var wb_replay_prefix;
|
var wb_replay_prefix;
|
||||||
@ -64,7 +64,7 @@ WB_wombat_init = (function() {
|
|||||||
} else if (string.indexOf(arr_or_prefix) == 0) {
|
} else if (string.indexOf(arr_or_prefix) == 0) {
|
||||||
return arr_or_prefix;
|
return arr_or_prefix;
|
||||||
}
|
}
|
||||||
|
|
||||||
return undefined;
|
return undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -89,31 +89,31 @@ WB_wombat_init = (function() {
|
|||||||
}
|
}
|
||||||
return rewritten;
|
return rewritten;
|
||||||
}
|
}
|
||||||
|
|
||||||
//============================================
|
//============================================
|
||||||
var HTTP_PREFIX = "http://";
|
var HTTP_PREFIX = "http://";
|
||||||
var HTTPS_PREFIX = "https://";
|
var HTTPS_PREFIX = "https://";
|
||||||
var REL_PREFIX = "//";
|
var REL_PREFIX = "//";
|
||||||
|
|
||||||
var VALID_PREFIXES = [HTTP_PREFIX, HTTPS_PREFIX, REL_PREFIX];
|
var VALID_PREFIXES = [HTTP_PREFIX, HTTPS_PREFIX, REL_PREFIX];
|
||||||
var IGNORE_PREFIXES = ["#", "about:", "data:", "mailto:", "javascript:"];
|
var IGNORE_PREFIXES = ["#", "about:", "data:", "mailto:", "javascript:"];
|
||||||
|
|
||||||
var BAD_PREFIXES;
|
var BAD_PREFIXES;
|
||||||
|
|
||||||
function init_bad_prefixes(prefix) {
|
function init_bad_prefixes(prefix) {
|
||||||
BAD_PREFIXES = ["http:" + prefix, "https:" + prefix,
|
BAD_PREFIXES = ["http:" + prefix, "https:" + prefix,
|
||||||
"http:/" + prefix, "https:/" + prefix];
|
"http:/" + prefix, "https:/" + prefix];
|
||||||
}
|
}
|
||||||
|
|
||||||
//============================================
|
//============================================
|
||||||
function rewrite_url_(url) {
|
function rewrite_url_(url) {
|
||||||
// If undefined, just return it
|
// If undefined, just return it
|
||||||
if (!url) {
|
if (!url) {
|
||||||
return url;
|
return url;
|
||||||
}
|
}
|
||||||
|
|
||||||
var urltype_ = (typeof url);
|
var urltype_ = (typeof url);
|
||||||
|
|
||||||
// If object, use toString
|
// If object, use toString
|
||||||
if (urltype_ == "object") {
|
if (urltype_ == "object") {
|
||||||
url = url.toString();
|
url = url.toString();
|
||||||
@ -129,7 +129,7 @@ WB_wombat_init = (function() {
|
|||||||
return url;
|
return url;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// just in case wombat reference made it into url!
|
// just in case wombat reference made it into url!
|
||||||
url = url.replace("WB_wombat_", "");
|
url = url.replace("WB_wombat_", "");
|
||||||
|
|
||||||
@ -166,10 +166,10 @@ WB_wombat_init = (function() {
|
|||||||
}
|
}
|
||||||
return wb_replay_date_prefix + url;
|
return wb_replay_date_prefix + url;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check for common bad prefixes and remove them
|
// Check for common bad prefixes and remove them
|
||||||
prefix = starts_with(url, BAD_PREFIXES);
|
prefix = starts_with(url, BAD_PREFIXES);
|
||||||
|
|
||||||
if (prefix) {
|
if (prefix) {
|
||||||
url = extract_orig(url);
|
url = extract_orig(url);
|
||||||
return wb_replay_date_prefix + url;
|
return wb_replay_date_prefix + url;
|
||||||
@ -189,16 +189,16 @@ WB_wombat_init = (function() {
|
|||||||
if (!href) {
|
if (!href) {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
// proxy mode: no extraction needed
|
// proxy mode: no extraction needed
|
||||||
if (!wb_replay_prefix) {
|
if (!wb_replay_prefix) {
|
||||||
return href;
|
return href;
|
||||||
}
|
}
|
||||||
|
|
||||||
href = href.toString();
|
href = href.toString();
|
||||||
|
|
||||||
var index = href.indexOf("/http", 1);
|
var index = href.indexOf("/http", 1);
|
||||||
|
|
||||||
// extract original url from wburl
|
// extract original url from wburl
|
||||||
if (index > 0) {
|
if (index > 0) {
|
||||||
href = href.substr(index + 1);
|
href = href.substr(index + 1);
|
||||||
@ -207,12 +207,12 @@ WB_wombat_init = (function() {
|
|||||||
if (index >= 0) {
|
if (index >= 0) {
|
||||||
href = href.substr(index + wb_replay_prefix.length);
|
href = href.substr(index + wb_replay_prefix.length);
|
||||||
}
|
}
|
||||||
if ((href.length > 4) &&
|
if ((href.length > 4) &&
|
||||||
(href.charAt(2) == "_") &&
|
(href.charAt(2) == "_") &&
|
||||||
(href.charAt(3) == "/")) {
|
(href.charAt(3) == "/")) {
|
||||||
href = href.substr(4);
|
href = href.substr(4);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!starts_with(href, "http")) {
|
if (!starts_with(href, "http")) {
|
||||||
href = HTTP_PREFIX + href;
|
href = HTTP_PREFIX + href;
|
||||||
}
|
}
|
||||||
@ -225,18 +225,18 @@ WB_wombat_init = (function() {
|
|||||||
|
|
||||||
return href;
|
return href;
|
||||||
}
|
}
|
||||||
|
|
||||||
//============================================
|
//============================================
|
||||||
// Define custom property
|
// Define custom property
|
||||||
function def_prop(obj, prop, value, set_func, get_func) {
|
function def_prop(obj, prop, value, set_func, get_func) {
|
||||||
var key = "_" + prop;
|
var key = "_" + prop;
|
||||||
obj[key] = value;
|
obj[key] = value;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
Object.defineProperty(obj, prop, {
|
Object.defineProperty(obj, prop, {
|
||||||
configurable: false,
|
configurable: false,
|
||||||
enumerable: true,
|
enumerable: true,
|
||||||
set: function(newval) {
|
set: function(newval) {
|
||||||
var result = set_func.call(obj, newval);
|
var result = set_func.call(obj, newval);
|
||||||
if (result != undefined) {
|
if (result != undefined) {
|
||||||
obj[key] = result;
|
obj[key] = result;
|
||||||
@ -256,12 +256,12 @@ WB_wombat_init = (function() {
|
|||||||
obj[prop] = value;
|
obj[prop] = value;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//============================================
|
//============================================
|
||||||
//Define WombatLocation
|
//Define WombatLocation
|
||||||
|
|
||||||
function WombatLocation(loc) {
|
function WombatLocation(loc) {
|
||||||
this._orig_loc = loc;
|
this._orig_loc = loc;
|
||||||
this._orig_href = loc.href;
|
this._orig_href = loc.href;
|
||||||
|
|
||||||
@ -273,53 +273,53 @@ WB_wombat_init = (function() {
|
|||||||
return this._orig_loc.assign(rewrite_url(url));
|
return this._orig_loc.assign(rewrite_url(url));
|
||||||
}
|
}
|
||||||
this.reload = loc.reload;
|
this.reload = loc.reload;
|
||||||
|
|
||||||
// Adapted from:
|
// Adapted from:
|
||||||
// https://gist.github.com/jlong/2428561
|
// https://gist.github.com/jlong/2428561
|
||||||
var parser = document.createElement('a');
|
var parser = document.createElement('a');
|
||||||
var href = extract_orig(this._orig_href);
|
var href = extract_orig(this._orig_href);
|
||||||
parser.href = href;
|
parser.href = href;
|
||||||
|
|
||||||
this._autooverride = false;
|
this._autooverride = false;
|
||||||
|
|
||||||
var _set_hash = function(hash) {
|
var _set_hash = function(hash) {
|
||||||
this._orig_loc.hash = hash;
|
this._orig_loc.hash = hash;
|
||||||
return this._orig_loc.hash;
|
return this._orig_loc.hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
var _get_hash = function() {
|
var _get_hash = function() {
|
||||||
return this._orig_loc.hash;
|
return this._orig_loc.hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
var _get_url_with_hash = function(url) {
|
var _get_url_with_hash = function(url) {
|
||||||
return url + this._orig_loc.hash;
|
return url + this._orig_loc.hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
href = parser.href;
|
href = parser.href;
|
||||||
var hash = parser.hash;
|
var hash = parser.hash;
|
||||||
|
|
||||||
if (hash) {
|
if (hash) {
|
||||||
var hidx = href.lastIndexOf("#");
|
var hidx = href.lastIndexOf("#");
|
||||||
if (hidx > 0) {
|
if (hidx > 0) {
|
||||||
href = href.substring(0, hidx);
|
href = href.substring(0, hidx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Object.defineProperty) {
|
if (Object.defineProperty) {
|
||||||
var res1 = def_prop(this, "href", href,
|
var res1 = def_prop(this, "href", href,
|
||||||
this.assign,
|
this.assign,
|
||||||
_get_url_with_hash);
|
_get_url_with_hash);
|
||||||
|
|
||||||
var res2 = def_prop(this, "hash", parser.hash,
|
var res2 = def_prop(this, "hash", parser.hash,
|
||||||
_set_hash,
|
_set_hash,
|
||||||
_get_hash);
|
_get_hash);
|
||||||
|
|
||||||
this._autooverride = res1 && res2;
|
this._autooverride = res1 && res2;
|
||||||
} else {
|
} else {
|
||||||
this.href = href;
|
this.href = href;
|
||||||
this.hash = parser.hash;
|
this.hash = parser.hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
this.host = parser.host;
|
this.host = parser.host;
|
||||||
this.hostname = parser.hostname;
|
this.hostname = parser.hostname;
|
||||||
|
|
||||||
@ -335,17 +335,17 @@ WB_wombat_init = (function() {
|
|||||||
this.toString = function() {
|
this.toString = function() {
|
||||||
return this.href;
|
return this.href;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Copy any remaining properties
|
// Copy any remaining properties
|
||||||
for (prop in loc) {
|
for (prop in loc) {
|
||||||
if (this.hasOwnProperty(prop)) {
|
if (this.hasOwnProperty(prop)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((typeof loc[prop]) != "function") {
|
if ((typeof loc[prop]) != "function") {
|
||||||
this[prop] = loc[prop];
|
this[prop] = loc[prop];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//============================================
|
//============================================
|
||||||
@ -460,7 +460,7 @@ WB_wombat_init = (function() {
|
|||||||
|
|
||||||
//============================================
|
//============================================
|
||||||
function init_ajax_rewrite() {
|
function init_ajax_rewrite() {
|
||||||
if (!window.XMLHttpRequest ||
|
if (!window.XMLHttpRequest ||
|
||||||
!window.XMLHttpRequest.prototype ||
|
!window.XMLHttpRequest.prototype ||
|
||||||
!window.XMLHttpRequest.prototype.open) {
|
!window.XMLHttpRequest.prototype.open) {
|
||||||
return;
|
return;
|
||||||
@ -469,7 +469,9 @@ WB_wombat_init = (function() {
|
|||||||
var orig = window.XMLHttpRequest.prototype.open;
|
var orig = window.XMLHttpRequest.prototype.open;
|
||||||
|
|
||||||
function open_rewritten(method, url, async, user, password) {
|
function open_rewritten(method, url, async, user, password) {
|
||||||
url = rewrite_url(url);
|
if (!this._no_rewrite) {
|
||||||
|
url = rewrite_url(url);
|
||||||
|
}
|
||||||
|
|
||||||
// defaults to true
|
// defaults to true
|
||||||
if (async != false) {
|
if (async != false) {
|
||||||
@ -534,7 +536,7 @@ WB_wombat_init = (function() {
|
|||||||
rewrite_attr(elem, "src", rewrite_url);
|
rewrite_attr(elem, "src", rewrite_url);
|
||||||
rewrite_attr(elem, "href", rewrite_url);
|
rewrite_attr(elem, "href", rewrite_url);
|
||||||
rewrite_attr(elem, "style", rewrite_style);
|
rewrite_attr(elem, "style", rewrite_style);
|
||||||
|
|
||||||
if (elem && elem.getAttribute && elem.getAttribute("crossorigin")) {
|
if (elem && elem.getAttribute && elem.getAttribute("crossorigin")) {
|
||||||
elem.removeAttribute("crossorigin");
|
elem.removeAttribute("crossorigin");
|
||||||
}
|
}
|
||||||
@ -545,7 +547,7 @@ WB_wombat_init = (function() {
|
|||||||
if (!Node || !Node.prototype) {
|
if (!Node || !Node.prototype) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
function override_attr(obj, attr) {
|
function override_attr(obj, attr) {
|
||||||
var setter = function(orig) {
|
var setter = function(orig) {
|
||||||
var val = rewrite_url(orig);
|
var val = rewrite_url(orig);
|
||||||
@ -553,15 +555,15 @@ WB_wombat_init = (function() {
|
|||||||
this.setAttribute(attr, val);
|
this.setAttribute(attr, val);
|
||||||
return val;
|
return val;
|
||||||
}
|
}
|
||||||
|
|
||||||
var getter = function(val) {
|
var getter = function(val) {
|
||||||
var res = this.getAttribute(attr);
|
var res = this.getAttribute(attr);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
var curr_src = obj.getAttribute(attr);
|
var curr_src = obj.getAttribute(attr);
|
||||||
|
|
||||||
def_prop(obj, attr, curr_src, setter, getter);
|
def_prop(obj, attr, curr_src, setter, getter);
|
||||||
}
|
}
|
||||||
|
|
||||||
function replace_dom_func(funcname) {
|
function replace_dom_func(funcname) {
|
||||||
@ -569,7 +571,7 @@ WB_wombat_init = (function() {
|
|||||||
|
|
||||||
Node.prototype[funcname] = function() {
|
Node.prototype[funcname] = function() {
|
||||||
var child = arguments[0];
|
var child = arguments[0];
|
||||||
|
|
||||||
rewrite_elem(child);
|
rewrite_elem(child);
|
||||||
|
|
||||||
var desc;
|
var desc;
|
||||||
@ -587,19 +589,19 @@ WB_wombat_init = (function() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var created = orig.apply(this, arguments);
|
var created = orig.apply(this, arguments);
|
||||||
|
|
||||||
if (created.tagName == "IFRAME") {
|
if (created.tagName == "IFRAME") {
|
||||||
if (created.contentWindow) {
|
if (created.contentWindow) {
|
||||||
created.contentWindow.window.WB_wombat_location = created.contentWindow.window.location;
|
created.contentWindow.window.WB_wombat_location = created.contentWindow.window.location;
|
||||||
}
|
}
|
||||||
|
|
||||||
override_attr(created, "src");
|
override_attr(created, "src");
|
||||||
}
|
}
|
||||||
|
|
||||||
// } else if (created.tagName == "A") {
|
// } else if (created.tagName == "A") {
|
||||||
// override_attr(created, "href");
|
// override_attr(created, "href");
|
||||||
// }
|
// }
|
||||||
|
|
||||||
return created;
|
return created;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -608,29 +610,29 @@ WB_wombat_init = (function() {
|
|||||||
replace_dom_func("insertBefore");
|
replace_dom_func("insertBefore");
|
||||||
replace_dom_func("replaceChild");
|
replace_dom_func("replaceChild");
|
||||||
}
|
}
|
||||||
|
|
||||||
var postmessage_rewritten;
|
var postmessage_rewritten;
|
||||||
|
|
||||||
//============================================
|
//============================================
|
||||||
function init_postmessage_override()
|
function init_postmessage_override()
|
||||||
{
|
{
|
||||||
if (!Window.prototype.postMessage) {
|
if (!Window.prototype.postMessage) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
var orig = Window.prototype.postMessage;
|
var orig = Window.prototype.postMessage;
|
||||||
|
|
||||||
postmessage_rewritten = function(message, targetOrigin, transfer) {
|
postmessage_rewritten = function(message, targetOrigin, transfer) {
|
||||||
if (targetOrigin && targetOrigin != "*") {
|
if (targetOrigin && targetOrigin != "*") {
|
||||||
targetOrigin = window.location.origin;
|
targetOrigin = window.location.origin;
|
||||||
}
|
}
|
||||||
|
|
||||||
return orig.call(this, message, targetOrigin, transfer);
|
return orig.call(this, message, targetOrigin, transfer);
|
||||||
}
|
}
|
||||||
|
|
||||||
window.postMessage = postmessage_rewritten;
|
window.postMessage = postmessage_rewritten;
|
||||||
window.Window.prototype.postMessage = postmessage_rewritten;
|
window.Window.prototype.postMessage = postmessage_rewritten;
|
||||||
|
|
||||||
for (var i = 0; i < window.frames.length; i++) {
|
for (var i = 0; i < window.frames.length; i++) {
|
||||||
try {
|
try {
|
||||||
window.frames[i].postMessage = postmessage_rewritten;
|
window.frames[i].postMessage = postmessage_rewritten;
|
||||||
@ -639,24 +641,24 @@ WB_wombat_init = (function() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//============================================
|
//============================================
|
||||||
function init_open_override()
|
function init_open_override()
|
||||||
{
|
{
|
||||||
if (!Window.prototype.open) {
|
if (!Window.prototype.open) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
var orig = Window.prototype.open;
|
var orig = Window.prototype.open;
|
||||||
|
|
||||||
var open_rewritten = function(strUrl, strWindowName, strWindowFeatures) {
|
var open_rewritten = function(strUrl, strWindowName, strWindowFeatures) {
|
||||||
strUrl = rewrite_url(strUrl);
|
strUrl = rewrite_url(strUrl);
|
||||||
return orig.call(this, strUrl, strWindowName, strWindowFeatures);
|
return orig.call(this, strUrl, strWindowName, strWindowFeatures);
|
||||||
}
|
}
|
||||||
|
|
||||||
window.open = open_rewritten;
|
window.open = open_rewritten;
|
||||||
window.Window.prototype.open = open_rewritten;
|
window.Window.prototype.open = open_rewritten;
|
||||||
|
|
||||||
for (var i = 0; i < window.frames.length; i++) {
|
for (var i = 0; i < window.frames.length; i++) {
|
||||||
try {
|
try {
|
||||||
window.frames[i].open = open_rewritten;
|
window.frames[i].open = open_rewritten;
|
||||||
@ -665,41 +667,41 @@ WB_wombat_init = (function() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function init_cookies_override()
|
function init_cookies_override()
|
||||||
{
|
{
|
||||||
var cookie_path_regex = /\bPath=\'?\"?([^;'"\s]+)/i;
|
var cookie_path_regex = /\bPath=\'?\"?([^;'"\s]+)/i;
|
||||||
|
|
||||||
var get_cookie = function() {
|
var get_cookie = function() {
|
||||||
return document.cookie;
|
return document.cookie;
|
||||||
}
|
}
|
||||||
|
|
||||||
var set_cookie = function(value) {
|
var set_cookie = function(value) {
|
||||||
var matched = value.match(cookie_path_regex);
|
var matched = value.match(cookie_path_regex);
|
||||||
|
|
||||||
// if has cookie path, rewrite and replace
|
// if has cookie path, rewrite and replace
|
||||||
if (matched) {
|
if (matched) {
|
||||||
var rewritten = rewrite_url(matched[1]);
|
var rewritten = rewrite_url(matched[1]);
|
||||||
value = value.replace(matched[1], rewritten);
|
value = value.replace(matched[1], rewritten);
|
||||||
}
|
}
|
||||||
|
|
||||||
document.cookie = value;
|
document.cookie = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
def_prop(document, "WB_wombat_cookie", document.cookie,
|
def_prop(document, "WB_wombat_cookie", document.cookie,
|
||||||
set_cookie,
|
set_cookie,
|
||||||
get_cookie);
|
get_cookie);
|
||||||
}
|
}
|
||||||
|
|
||||||
//============================================
|
//============================================
|
||||||
function init_write_override()
|
function init_write_override()
|
||||||
{
|
{
|
||||||
document.write = function(string) {
|
document.write = function(string) {
|
||||||
var doc = new DOMParser().parseFromString(string, "text/html");
|
var doc = new DOMParser().parseFromString(string, "text/html");
|
||||||
|
|
||||||
if (doc) {
|
if (doc) {
|
||||||
var children = doc.body.children;
|
var children = doc.body.children;
|
||||||
|
|
||||||
for (var i = 0; i < children.length; i++) {
|
for (var i = 0; i < children.length; i++) {
|
||||||
document.body.appendChild(children[i]);
|
document.body.appendChild(children[i]);
|
||||||
}
|
}
|
||||||
@ -710,52 +712,52 @@ WB_wombat_init = (function() {
|
|||||||
//============================================
|
//============================================
|
||||||
function wombat_init(replay_prefix, capture_date, orig_scheme, orig_host, timestamp, mod) {
|
function wombat_init(replay_prefix, capture_date, orig_scheme, orig_host, timestamp, mod) {
|
||||||
wb_replay_prefix = replay_prefix;
|
wb_replay_prefix = replay_prefix;
|
||||||
|
|
||||||
if (wb_replay_prefix) {
|
if (wb_replay_prefix) {
|
||||||
wb_replay_date_prefix = replay_prefix + capture_date + mod + "/";
|
wb_replay_date_prefix = replay_prefix + capture_date + mod + "/";
|
||||||
|
|
||||||
if (capture_date.length > 0) {
|
if (capture_date.length > 0) {
|
||||||
wb_capture_date_part = "/" + capture_date + "/";
|
wb_capture_date_part = "/" + capture_date + "/";
|
||||||
} else {
|
} else {
|
||||||
wb_capture_date_part = "";
|
wb_capture_date_part = "";
|
||||||
}
|
}
|
||||||
|
|
||||||
wb_orig_scheme = orig_scheme + '://';
|
wb_orig_scheme = orig_scheme + '://';
|
||||||
|
|
||||||
wb_orig_host = wb_orig_scheme + orig_host;
|
wb_orig_host = wb_orig_scheme + orig_host;
|
||||||
|
|
||||||
init_bad_prefixes(replay_prefix);
|
init_bad_prefixes(replay_prefix);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Location
|
// Location
|
||||||
var wombat_location = new WombatLocation(window.self.location);
|
var wombat_location = new WombatLocation(window.self.location);
|
||||||
|
|
||||||
if (wombat_location._autooverride) {
|
if (wombat_location._autooverride) {
|
||||||
|
|
||||||
var setter = function(val) {
|
var setter = function(val) {
|
||||||
if (typeof(val) == "string") {
|
if (typeof(val) == "string") {
|
||||||
if (starts_with(val, "about:")) {
|
if (starts_with(val, "about:")) {
|
||||||
return undefined;
|
return undefined;
|
||||||
}
|
}
|
||||||
this._WB_wombat_location.href = val;
|
this._WB_wombat_location.href = val;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def_prop(window, "WB_wombat_location", wombat_location, setter);
|
def_prop(window, "WB_wombat_location", wombat_location, setter);
|
||||||
def_prop(document, "WB_wombat_location", wombat_location, setter);
|
def_prop(document, "WB_wombat_location", wombat_location, setter);
|
||||||
} else {
|
} else {
|
||||||
window.WB_wombat_location = wombat_location;
|
window.WB_wombat_location = wombat_location;
|
||||||
document.WB_wombat_location = wombat_location;
|
document.WB_wombat_location = wombat_location;
|
||||||
|
|
||||||
// Check quickly after page load
|
// Check quickly after page load
|
||||||
setTimeout(check_all_locations, 500);
|
setTimeout(check_all_locations, 500);
|
||||||
|
|
||||||
// Check periodically every few seconds
|
// Check periodically every few seconds
|
||||||
setInterval(check_all_locations, 500);
|
setInterval(check_all_locations, 500);
|
||||||
}
|
}
|
||||||
|
|
||||||
var is_framed = (window.top.wbinfo && window.top.wbinfo.is_frame);
|
var is_framed = (window.top.wbinfo && window.top.wbinfo.is_frame);
|
||||||
|
|
||||||
function find_next_top(win) {
|
function find_next_top(win) {
|
||||||
while ((win.parent != win) && (win.parent != win.top)) {
|
while ((win.parent != win) && (win.parent != win.top)) {
|
||||||
win = win.parent;
|
win = win.parent;
|
||||||
@ -766,9 +768,9 @@ WB_wombat_init = (function() {
|
|||||||
if (window.self.location != window.top.location) {
|
if (window.self.location != window.top.location) {
|
||||||
if (is_framed) {
|
if (is_framed) {
|
||||||
window.top.WB_wombat_location = window.WB_wombat_location;
|
window.top.WB_wombat_location = window.WB_wombat_location;
|
||||||
|
|
||||||
window.WB_wombat_top = find_next_top(window.self);
|
window.WB_wombat_top = find_next_top(window.self);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
window.top.WB_wombat_location = new WombatLocation(window.top.location);
|
window.top.WB_wombat_location = new WombatLocation(window.top.location);
|
||||||
window.WB_wombat_top = window.top;
|
window.WB_wombat_top = window.top;
|
||||||
@ -788,20 +790,20 @@ WB_wombat_init = (function() {
|
|||||||
// History
|
// History
|
||||||
copy_history_func(window.history, 'pushState');
|
copy_history_func(window.history, 'pushState');
|
||||||
copy_history_func(window.history, 'replaceState');
|
copy_history_func(window.history, 'replaceState');
|
||||||
|
|
||||||
// open
|
// open
|
||||||
init_open_override();
|
init_open_override();
|
||||||
|
|
||||||
// postMessage
|
// postMessage
|
||||||
init_postmessage_override();
|
init_postmessage_override();
|
||||||
|
|
||||||
// write
|
// write
|
||||||
init_write_override();
|
init_write_override();
|
||||||
|
|
||||||
// Ajax
|
// Ajax
|
||||||
init_ajax_rewrite();
|
init_ajax_rewrite();
|
||||||
init_worker_override();
|
init_worker_override();
|
||||||
|
|
||||||
// Cookies
|
// Cookies
|
||||||
init_cookies_override();
|
init_cookies_override();
|
||||||
|
|
||||||
@ -810,6 +812,9 @@ WB_wombat_init = (function() {
|
|||||||
|
|
||||||
// Random
|
// Random
|
||||||
init_seeded_random(timestamp);
|
init_seeded_random(timestamp);
|
||||||
|
|
||||||
|
// expose functions
|
||||||
|
this.extract_orig = extract_orig;
|
||||||
}
|
}
|
||||||
|
|
||||||
return wombat_init;
|
return wombat_init;
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
<script src='{{ wbrequest.host_prefix }}/{{ static_path }}/wombat.js'> </script>
|
<script src='{{ wbrequest.host_prefix }}/{{ static_path }}/wombat.js'> </script>
|
||||||
<script>
|
<script>
|
||||||
{% set urlsplit = cdx.original | urlsplit %}
|
{% set urlsplit = cdx.original | urlsplit %}
|
||||||
WB_wombat_init("{{ wbrequest.wb_prefix}}",
|
var _wb_wombat = new _WBWombat("{{ wbrequest.wb_prefix}}",
|
||||||
"{{ cdx['timestamp'] if include_ts else ''}}",
|
"{{ cdx['timestamp'] if include_ts else ''}}",
|
||||||
"{{ urlsplit.scheme }}",
|
"{{ urlsplit.scheme }}",
|
||||||
"{{ urlsplit.netloc }}",
|
"{{ urlsplit.netloc }}",
|
||||||
@ -24,6 +24,7 @@
|
|||||||
</script>
|
</script>
|
||||||
|
|
||||||
<script src='{{ wbrequest.host_prefix }}/{{ static_path }}/wb.js'> </script>
|
<script src='{{ wbrequest.host_prefix }}/{{ static_path }}/wb.js'> </script>
|
||||||
|
<script src='{{ wbrequest.host_prefix }}/{{ static_path }}/vidrw.js'> </script>
|
||||||
|
|
||||||
{% include banner_html ignore missing %}
|
{% include banner_html ignore missing %}
|
||||||
|
|
||||||
|
@ -4,12 +4,17 @@ from pywb.framework.archivalrouter import ArchivalRouter, Route
|
|||||||
|
|
||||||
from pywb.rewrite.rewrite_live import LiveRewriter
|
from pywb.rewrite.rewrite_live import LiveRewriter
|
||||||
from pywb.rewrite.wburl import WbUrl
|
from pywb.rewrite.wburl import WbUrl
|
||||||
|
from pywb.rewrite.url_rewriter import HttpsUrlRewriter
|
||||||
|
|
||||||
from handlers import StaticHandler, SearchPageWbUrlHandler
|
from handlers import StaticHandler, SearchPageWbUrlHandler
|
||||||
from views import HeadInsertView
|
from views import HeadInsertView
|
||||||
|
|
||||||
from pywb.utils.wbexception import WbException
|
from pywb.utils.wbexception import WbException
|
||||||
|
|
||||||
|
import json
|
||||||
|
import requests
|
||||||
|
from youtube_dl import YoutubeDL
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class LiveResourceException(WbException):
|
class LiveResourceException(WbException):
|
||||||
@ -25,14 +30,16 @@ class RewriteHandler(SearchPageWbUrlHandler):
|
|||||||
def __init__(self, config):
|
def __init__(self, config):
|
||||||
super(RewriteHandler, self).__init__(config)
|
super(RewriteHandler, self).__init__(config)
|
||||||
|
|
||||||
default_proxy = config.get('proxyhostport')
|
self.default_proxy = config.get('proxyhostport')
|
||||||
self.rewriter = LiveRewriter(is_framed_replay=self.is_frame_mode,
|
self.rewriter = LiveRewriter(is_framed_replay=self.is_frame_mode,
|
||||||
default_proxy=default_proxy)
|
default_proxy=self.default_proxy)
|
||||||
|
|
||||||
self.head_insert_view = HeadInsertView.init_from_config(config)
|
self.head_insert_view = HeadInsertView.init_from_config(config)
|
||||||
|
|
||||||
self.live_cookie = config.get('live-cookie', self.LIVE_COOKIE)
|
self.live_cookie = config.get('live-cookie', self.LIVE_COOKIE)
|
||||||
|
|
||||||
|
self.ydl = None
|
||||||
|
|
||||||
def handle_request(self, wbrequest):
|
def handle_request(self, wbrequest):
|
||||||
try:
|
try:
|
||||||
return self.render_content(wbrequest)
|
return self.render_content(wbrequest)
|
||||||
@ -50,6 +57,9 @@ class RewriteHandler(SearchPageWbUrlHandler):
|
|||||||
return {}
|
return {}
|
||||||
|
|
||||||
def render_content(self, wbrequest):
|
def render_content(self, wbrequest):
|
||||||
|
if wbrequest.wb_url.mod == 'vi_':
|
||||||
|
return self.get_video_info(wbrequest)
|
||||||
|
|
||||||
head_insert_func = self.head_insert_view.create_insert_func(wbrequest)
|
head_insert_func = self.head_insert_view.create_insert_func(wbrequest)
|
||||||
req_headers = self._live_request_headers(wbrequest)
|
req_headers = self._live_request_headers(wbrequest)
|
||||||
|
|
||||||
@ -76,6 +86,34 @@ class RewriteHandler(SearchPageWbUrlHandler):
|
|||||||
|
|
||||||
return WbResponse(status_headers, gen)
|
return WbResponse(status_headers, gen)
|
||||||
|
|
||||||
|
|
||||||
|
def get_video_info(self, wbrequest):
|
||||||
|
if not self.ydl:
|
||||||
|
self.ydl = YoutubeDL(dict(simulate=True,
|
||||||
|
youtube_include_dash_manifest=False))
|
||||||
|
|
||||||
|
self.ydl.add_default_info_extractors()
|
||||||
|
|
||||||
|
info = self.ydl.extract_info(wbrequest.wb_url.url)
|
||||||
|
content_type = 'application/vnd.youtube-dl_formats+json'
|
||||||
|
metadata = json.dumps(info)
|
||||||
|
|
||||||
|
if self.default_proxy:
|
||||||
|
proxies = {'http': self.default_proxy}
|
||||||
|
|
||||||
|
headers = {'Content-Type': content_type}
|
||||||
|
|
||||||
|
url = HttpsUrlRewriter.remove_https(wbrequest.wb_url.url)
|
||||||
|
|
||||||
|
response = requests.request(method='PUTMETA',
|
||||||
|
url=url,
|
||||||
|
data=metadata,
|
||||||
|
headers=headers,
|
||||||
|
proxies=proxies,
|
||||||
|
verify=False)
|
||||||
|
|
||||||
|
return WbResponse.text_response(metadata, content_type=content_type)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return 'Live Web Rewrite Handler'
|
return 'Live Web Rewrite Handler'
|
||||||
|
|
||||||
|
@ -68,6 +68,14 @@ class QueryHandler(object):
|
|||||||
params['url'] = wb_url.url
|
params['url'] = wb_url.url
|
||||||
params['output'] = output
|
params['output'] = output
|
||||||
|
|
||||||
|
params['filter'].append('!mimetype:-')
|
||||||
|
|
||||||
|
# get metadata
|
||||||
|
if wb_url.mod == 'vi_':
|
||||||
|
# matching metadata explicitly with special scheme
|
||||||
|
params['url'] = wb_url.url.replace('http:/', 'metadata:/')
|
||||||
|
params['filter'].append('~original:metadata://')
|
||||||
|
|
||||||
cdx_iter = self.load_cdx(wbrequest, params)
|
cdx_iter = self.load_cdx(wbrequest, params)
|
||||||
return cdx_iter, output
|
return cdx_iter, output
|
||||||
|
|
||||||
@ -132,6 +140,7 @@ class QueryHandler(object):
|
|||||||
'limit': limit,
|
'limit': limit,
|
||||||
'fl': ('urlkey,original,timestamp,' +
|
'fl': ('urlkey,original,timestamp,' +
|
||||||
'endtimestamp,groupcount,uniqcount'),
|
'endtimestamp,groupcount,uniqcount'),
|
||||||
|
'filter':[],
|
||||||
},
|
},
|
||||||
|
|
||||||
wburl.REPLAY:
|
wburl.REPLAY:
|
||||||
@ -147,6 +156,7 @@ class QueryHandler(object):
|
|||||||
# Not appropriate as default
|
# Not appropriate as default
|
||||||
# Should be an option to configure status code filtering in general
|
# Should be an option to configure status code filtering in general
|
||||||
# 'filter': ['statuscode:[23]..|-'],
|
# 'filter': ['statuscode:[23]..|-'],
|
||||||
|
'filter': [],
|
||||||
'limit': '1',
|
'limit': '1',
|
||||||
'resolveRevisits': True,
|
'resolveRevisits': True,
|
||||||
}
|
}
|
||||||
|
@ -98,7 +98,7 @@ class TestWb:
|
|||||||
|
|
||||||
assert '"20140127171238"' in resp.body
|
assert '"20140127171238"' in resp.body
|
||||||
assert 'wb.js' in resp.body
|
assert 'wb.js' in resp.body
|
||||||
assert 'WB_wombat_init' in resp.body
|
assert 'new _WBWombat' in resp.body, resp.body
|
||||||
assert '/pywb/20140127171238/http://www.iana.org/time-zones"' in resp.body
|
assert '/pywb/20140127171238/http://www.iana.org/time-zones"' in resp.body
|
||||||
|
|
||||||
def test_replay_non_frame_content(self):
|
def test_replay_non_frame_content(self):
|
||||||
@ -149,7 +149,7 @@ class TestWb:
|
|||||||
assert 'wb.js' in resp.body
|
assert 'wb.js' in resp.body
|
||||||
|
|
||||||
# no wombat present
|
# no wombat present
|
||||||
assert 'WB_wombat_init' not in resp.body
|
assert '_WBWombat' not in resp.body
|
||||||
|
|
||||||
# url not rewritten
|
# url not rewritten
|
||||||
#assert '"http://www.iana.org/domains/example"' in resp.body
|
#assert '"http://www.iana.org/domains/example"' in resp.body
|
||||||
|
Loading…
x
Reference in New Issue
Block a user