1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-24 06:59:52 +01:00

video: add video rewriting use vidrw client side and youtube-dl on the server

add vi_ modifier:
-on record, gets video_info from youtube-dl, sends to proxy,
if any, via PUTMETA to create metadata record
-on playback, fetches special metadata record with video info and
returns to client as json
-vidrw script: fetches video info, if any, and attempts to replace
iframe and embed tags (so far) which are videos
wombat: export extract_url function, fix spaces and use object instance
semantics
This commit is contained in:
Ilya Kreymer 2014-10-28 10:36:48 -07:00
parent 61ce53a0e0
commit fb85570974
8 changed files with 291 additions and 107 deletions

View File

@ -138,11 +138,7 @@ class HttpsUrlRewriter(UrlRewriter):
HTTPS = 'https://' HTTPS = 'https://'
def rewrite(self, url, mod=None): def rewrite(self, url, mod=None):
if url.startswith(self.HTTPS): return self.remove_https(url)
result = self.HTTP + url[len(self.HTTPS):]
return result
else:
return url
def get_new_url(self, **kwargs): def get_new_url(self, **kwargs):
return kwargs.get('url') return kwargs.get('url')
@ -155,3 +151,12 @@ class HttpsUrlRewriter(UrlRewriter):
def deprefix_url(self): def deprefix_url(self):
return self.wburl.url return self.wburl.url
@staticmethod
def remove_https(url):
rw = HttpsUrlRewriter
if url.startswith(rw.HTTPS):
result = rw.HTTP + url[len(rw.HTTPS):]
return result
else:
return url

View File

@ -114,7 +114,22 @@ rules:
- ownerId - ownerId
- videoFileId - videoFileId
- signature - signature
# youtube rules
#=================================================================
- url_prefix: 'com,youtube)/get_video_info'
fuzzy_lookup:
- video_id
- html5
- url_prefix: 'com,googlevideo,'
fuzzy_lookup: 'com,googlevideo.*/videoplayback?.*(id=[^&]+).*(range=[^&]+)'
# testing rules -- not for valid domain # testing rules -- not for valid domain
#================================================================= #=================================================================

110
pywb/static/vidrw.js Normal file
View File

@ -0,0 +1,110 @@
/*
Copyright(c) 2013-2014 Ilya Kreymer. Released under the GNU General Public License.
This file is part of pywb, https://github.com/ikreymer/pywb
pywb is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
pywb is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with pywb. If not, see <http://www.gnu.org/licenses/>.
*/
// VidRw 1.0 -- video rewriting
__wbvidrw = (function() {
var already_checked = false;
function check_videos() {
if (already_checked) {
return;
}
var iframes = document.getElementsByTagName("iframe");
for (var i = 0; i < iframes.length; i++) {
already_checked = true;
check_replacement(iframes[i], iframes[i].getAttribute("src"));
}
var embeds = document.getElementsByTagName("embed");
for (var i = 0; i < embeds.length; i++) {
already_checked = true;
check_replacement(embeds[i], embeds[i].getAttribute("src"));
}
}
function check_replacement(elem, src) {
if (!src) {
return;
}
src = _wb_wombat.extract_orig(src);
var xhr = new XMLHttpRequest();
xhr._no_rewrite = true;
xhr.open('GET', wbinfo.prefix + 'vi_/' + src, true);
xhr.onload = function() {
if (xhr.status == 200) {
do_replace_video(elem, JSON.parse(xhr.responseText));
}
};
xhr.send();
}
function do_replace_video(elem, video_info) {
// TODO: select based on size?
var video_url = video_info.url;
video_url = wbinfo.prefix + video_url;
console.log("REPLACING: " + video_url);
var width = elem.getAttribute("width");
var height = elem.getAttribute("height");
console.log(video_info.ext);
// Try HTML5 Video
var htmlvideo = document.createElement("video");
htmlvideo.setAttribute("src", video_url);
htmlvideo.setAttribute("width", width);
htmlvideo.setAttribute("height", height);
htmlvideo.setAttribute("controls", "1");
htmlvideo.style.backgroundColor = "#000";
if (video_info.thumbnail) {
var thumbnail = wbinfo.prefix + video_info.thumbnail;
htmlvideo.setAttribute("thumbnail", thumbnail);
}
htmlvideo.addEventListener("error", function() {
console.log("html5 video error");
});
htmlvideo.addEventListener("loadstart", function() {
console.log("html5 video success");
});
console.log(elem.tagName);
if (elem.tagName.toLowerCase() == "iframe") {
elem.parentNode.replaceChild(htmlvideo, elem);
} else if (elem.tagName.toLowerCase() == "embed") {
if (elem.parentNode && elem.parentElement.tagName.toLowerCase() == "object") {
elem = elem.parentNode;
}
elem.parentNode.replaceChild(htmlvideo, elem);
}
}
document.addEventListener("DOMContentLoaded", check_videos);
})();

View File

@ -20,7 +20,7 @@ This file is part of pywb, https://github.com/ikreymer/pywb
//============================================ //============================================
// Wombat JS-Rewriting Library v2.0 // Wombat JS-Rewriting Library v2.0
//============================================ //============================================
WB_wombat_init = (function() { var _WBWombat = (function() {
// Globals // Globals
var wb_replay_prefix; var wb_replay_prefix;
@ -64,7 +64,7 @@ WB_wombat_init = (function() {
} else if (string.indexOf(arr_or_prefix) == 0) { } else if (string.indexOf(arr_or_prefix) == 0) {
return arr_or_prefix; return arr_or_prefix;
} }
return undefined; return undefined;
} }
@ -89,31 +89,31 @@ WB_wombat_init = (function() {
} }
return rewritten; return rewritten;
} }
//============================================ //============================================
var HTTP_PREFIX = "http://"; var HTTP_PREFIX = "http://";
var HTTPS_PREFIX = "https://"; var HTTPS_PREFIX = "https://";
var REL_PREFIX = "//"; var REL_PREFIX = "//";
var VALID_PREFIXES = [HTTP_PREFIX, HTTPS_PREFIX, REL_PREFIX]; var VALID_PREFIXES = [HTTP_PREFIX, HTTPS_PREFIX, REL_PREFIX];
var IGNORE_PREFIXES = ["#", "about:", "data:", "mailto:", "javascript:"]; var IGNORE_PREFIXES = ["#", "about:", "data:", "mailto:", "javascript:"];
var BAD_PREFIXES; var BAD_PREFIXES;
function init_bad_prefixes(prefix) { function init_bad_prefixes(prefix) {
BAD_PREFIXES = ["http:" + prefix, "https:" + prefix, BAD_PREFIXES = ["http:" + prefix, "https:" + prefix,
"http:/" + prefix, "https:/" + prefix]; "http:/" + prefix, "https:/" + prefix];
} }
//============================================ //============================================
function rewrite_url_(url) { function rewrite_url_(url) {
// If undefined, just return it // If undefined, just return it
if (!url) { if (!url) {
return url; return url;
} }
var urltype_ = (typeof url); var urltype_ = (typeof url);
// If object, use toString // If object, use toString
if (urltype_ == "object") { if (urltype_ == "object") {
url = url.toString(); url = url.toString();
@ -129,7 +129,7 @@ WB_wombat_init = (function() {
return url; return url;
} }
} }
// just in case wombat reference made it into url! // just in case wombat reference made it into url!
url = url.replace("WB_wombat_", ""); url = url.replace("WB_wombat_", "");
@ -166,10 +166,10 @@ WB_wombat_init = (function() {
} }
return wb_replay_date_prefix + url; return wb_replay_date_prefix + url;
} }
// Check for common bad prefixes and remove them // Check for common bad prefixes and remove them
prefix = starts_with(url, BAD_PREFIXES); prefix = starts_with(url, BAD_PREFIXES);
if (prefix) { if (prefix) {
url = extract_orig(url); url = extract_orig(url);
return wb_replay_date_prefix + url; return wb_replay_date_prefix + url;
@ -189,16 +189,16 @@ WB_wombat_init = (function() {
if (!href) { if (!href) {
return ""; return "";
} }
// proxy mode: no extraction needed // proxy mode: no extraction needed
if (!wb_replay_prefix) { if (!wb_replay_prefix) {
return href; return href;
} }
href = href.toString(); href = href.toString();
var index = href.indexOf("/http", 1); var index = href.indexOf("/http", 1);
// extract original url from wburl // extract original url from wburl
if (index > 0) { if (index > 0) {
href = href.substr(index + 1); href = href.substr(index + 1);
@ -207,12 +207,12 @@ WB_wombat_init = (function() {
if (index >= 0) { if (index >= 0) {
href = href.substr(index + wb_replay_prefix.length); href = href.substr(index + wb_replay_prefix.length);
} }
if ((href.length > 4) && if ((href.length > 4) &&
(href.charAt(2) == "_") && (href.charAt(2) == "_") &&
(href.charAt(3) == "/")) { (href.charAt(3) == "/")) {
href = href.substr(4); href = href.substr(4);
} }
if (!starts_with(href, "http")) { if (!starts_with(href, "http")) {
href = HTTP_PREFIX + href; href = HTTP_PREFIX + href;
} }
@ -225,18 +225,18 @@ WB_wombat_init = (function() {
return href; return href;
} }
//============================================ //============================================
// Define custom property // Define custom property
function def_prop(obj, prop, value, set_func, get_func) { function def_prop(obj, prop, value, set_func, get_func) {
var key = "_" + prop; var key = "_" + prop;
obj[key] = value; obj[key] = value;
try { try {
Object.defineProperty(obj, prop, { Object.defineProperty(obj, prop, {
configurable: false, configurable: false,
enumerable: true, enumerable: true,
set: function(newval) { set: function(newval) {
var result = set_func.call(obj, newval); var result = set_func.call(obj, newval);
if (result != undefined) { if (result != undefined) {
obj[key] = result; obj[key] = result;
@ -256,12 +256,12 @@ WB_wombat_init = (function() {
obj[prop] = value; obj[prop] = value;
return false; return false;
} }
} }
//============================================ //============================================
//Define WombatLocation //Define WombatLocation
function WombatLocation(loc) { function WombatLocation(loc) {
this._orig_loc = loc; this._orig_loc = loc;
this._orig_href = loc.href; this._orig_href = loc.href;
@ -273,53 +273,53 @@ WB_wombat_init = (function() {
return this._orig_loc.assign(rewrite_url(url)); return this._orig_loc.assign(rewrite_url(url));
} }
this.reload = loc.reload; this.reload = loc.reload;
// Adapted from: // Adapted from:
// https://gist.github.com/jlong/2428561 // https://gist.github.com/jlong/2428561
var parser = document.createElement('a'); var parser = document.createElement('a');
var href = extract_orig(this._orig_href); var href = extract_orig(this._orig_href);
parser.href = href; parser.href = href;
this._autooverride = false; this._autooverride = false;
var _set_hash = function(hash) { var _set_hash = function(hash) {
this._orig_loc.hash = hash; this._orig_loc.hash = hash;
return this._orig_loc.hash; return this._orig_loc.hash;
} }
var _get_hash = function() { var _get_hash = function() {
return this._orig_loc.hash; return this._orig_loc.hash;
} }
var _get_url_with_hash = function(url) { var _get_url_with_hash = function(url) {
return url + this._orig_loc.hash; return url + this._orig_loc.hash;
} }
href = parser.href; href = parser.href;
var hash = parser.hash; var hash = parser.hash;
if (hash) { if (hash) {
var hidx = href.lastIndexOf("#"); var hidx = href.lastIndexOf("#");
if (hidx > 0) { if (hidx > 0) {
href = href.substring(0, hidx); href = href.substring(0, hidx);
} }
} }
if (Object.defineProperty) { if (Object.defineProperty) {
var res1 = def_prop(this, "href", href, var res1 = def_prop(this, "href", href,
this.assign, this.assign,
_get_url_with_hash); _get_url_with_hash);
var res2 = def_prop(this, "hash", parser.hash, var res2 = def_prop(this, "hash", parser.hash,
_set_hash, _set_hash,
_get_hash); _get_hash);
this._autooverride = res1 && res2; this._autooverride = res1 && res2;
} else { } else {
this.href = href; this.href = href;
this.hash = parser.hash; this.hash = parser.hash;
} }
this.host = parser.host; this.host = parser.host;
this.hostname = parser.hostname; this.hostname = parser.hostname;
@ -335,17 +335,17 @@ WB_wombat_init = (function() {
this.toString = function() { this.toString = function() {
return this.href; return this.href;
} }
// Copy any remaining properties // Copy any remaining properties
for (prop in loc) { for (prop in loc) {
if (this.hasOwnProperty(prop)) { if (this.hasOwnProperty(prop)) {
continue; continue;
} }
if ((typeof loc[prop]) != "function") { if ((typeof loc[prop]) != "function") {
this[prop] = loc[prop]; this[prop] = loc[prop];
} }
} }
} }
//============================================ //============================================
@ -460,7 +460,7 @@ WB_wombat_init = (function() {
//============================================ //============================================
function init_ajax_rewrite() { function init_ajax_rewrite() {
if (!window.XMLHttpRequest || if (!window.XMLHttpRequest ||
!window.XMLHttpRequest.prototype || !window.XMLHttpRequest.prototype ||
!window.XMLHttpRequest.prototype.open) { !window.XMLHttpRequest.prototype.open) {
return; return;
@ -469,7 +469,9 @@ WB_wombat_init = (function() {
var orig = window.XMLHttpRequest.prototype.open; var orig = window.XMLHttpRequest.prototype.open;
function open_rewritten(method, url, async, user, password) { function open_rewritten(method, url, async, user, password) {
url = rewrite_url(url); if (!this._no_rewrite) {
url = rewrite_url(url);
}
// defaults to true // defaults to true
if (async != false) { if (async != false) {
@ -534,7 +536,7 @@ WB_wombat_init = (function() {
rewrite_attr(elem, "src", rewrite_url); rewrite_attr(elem, "src", rewrite_url);
rewrite_attr(elem, "href", rewrite_url); rewrite_attr(elem, "href", rewrite_url);
rewrite_attr(elem, "style", rewrite_style); rewrite_attr(elem, "style", rewrite_style);
if (elem && elem.getAttribute && elem.getAttribute("crossorigin")) { if (elem && elem.getAttribute && elem.getAttribute("crossorigin")) {
elem.removeAttribute("crossorigin"); elem.removeAttribute("crossorigin");
} }
@ -545,7 +547,7 @@ WB_wombat_init = (function() {
if (!Node || !Node.prototype) { if (!Node || !Node.prototype) {
return; return;
} }
function override_attr(obj, attr) { function override_attr(obj, attr) {
var setter = function(orig) { var setter = function(orig) {
var val = rewrite_url(orig); var val = rewrite_url(orig);
@ -553,15 +555,15 @@ WB_wombat_init = (function() {
this.setAttribute(attr, val); this.setAttribute(attr, val);
return val; return val;
} }
var getter = function(val) { var getter = function(val) {
var res = this.getAttribute(attr); var res = this.getAttribute(attr);
return res; return res;
} }
var curr_src = obj.getAttribute(attr); var curr_src = obj.getAttribute(attr);
def_prop(obj, attr, curr_src, setter, getter); def_prop(obj, attr, curr_src, setter, getter);
} }
function replace_dom_func(funcname) { function replace_dom_func(funcname) {
@ -569,7 +571,7 @@ WB_wombat_init = (function() {
Node.prototype[funcname] = function() { Node.prototype[funcname] = function() {
var child = arguments[0]; var child = arguments[0];
rewrite_elem(child); rewrite_elem(child);
var desc; var desc;
@ -587,19 +589,19 @@ WB_wombat_init = (function() {
} }
var created = orig.apply(this, arguments); var created = orig.apply(this, arguments);
if (created.tagName == "IFRAME") { if (created.tagName == "IFRAME") {
if (created.contentWindow) { if (created.contentWindow) {
created.contentWindow.window.WB_wombat_location = created.contentWindow.window.location; created.contentWindow.window.WB_wombat_location = created.contentWindow.window.location;
} }
override_attr(created, "src"); override_attr(created, "src");
} }
// } else if (created.tagName == "A") { // } else if (created.tagName == "A") {
// override_attr(created, "href"); // override_attr(created, "href");
// } // }
return created; return created;
} }
} }
@ -608,29 +610,29 @@ WB_wombat_init = (function() {
replace_dom_func("insertBefore"); replace_dom_func("insertBefore");
replace_dom_func("replaceChild"); replace_dom_func("replaceChild");
} }
var postmessage_rewritten; var postmessage_rewritten;
//============================================ //============================================
function init_postmessage_override() function init_postmessage_override()
{ {
if (!Window.prototype.postMessage) { if (!Window.prototype.postMessage) {
return; return;
} }
var orig = Window.prototype.postMessage; var orig = Window.prototype.postMessage;
postmessage_rewritten = function(message, targetOrigin, transfer) { postmessage_rewritten = function(message, targetOrigin, transfer) {
if (targetOrigin && targetOrigin != "*") { if (targetOrigin && targetOrigin != "*") {
targetOrigin = window.location.origin; targetOrigin = window.location.origin;
} }
return orig.call(this, message, targetOrigin, transfer); return orig.call(this, message, targetOrigin, transfer);
} }
window.postMessage = postmessage_rewritten; window.postMessage = postmessage_rewritten;
window.Window.prototype.postMessage = postmessage_rewritten; window.Window.prototype.postMessage = postmessage_rewritten;
for (var i = 0; i < window.frames.length; i++) { for (var i = 0; i < window.frames.length; i++) {
try { try {
window.frames[i].postMessage = postmessage_rewritten; window.frames[i].postMessage = postmessage_rewritten;
@ -639,24 +641,24 @@ WB_wombat_init = (function() {
} }
} }
} }
//============================================ //============================================
function init_open_override() function init_open_override()
{ {
if (!Window.prototype.open) { if (!Window.prototype.open) {
return; return;
} }
var orig = Window.prototype.open; var orig = Window.prototype.open;
var open_rewritten = function(strUrl, strWindowName, strWindowFeatures) { var open_rewritten = function(strUrl, strWindowName, strWindowFeatures) {
strUrl = rewrite_url(strUrl); strUrl = rewrite_url(strUrl);
return orig.call(this, strUrl, strWindowName, strWindowFeatures); return orig.call(this, strUrl, strWindowName, strWindowFeatures);
} }
window.open = open_rewritten; window.open = open_rewritten;
window.Window.prototype.open = open_rewritten; window.Window.prototype.open = open_rewritten;
for (var i = 0; i < window.frames.length; i++) { for (var i = 0; i < window.frames.length; i++) {
try { try {
window.frames[i].open = open_rewritten; window.frames[i].open = open_rewritten;
@ -665,41 +667,41 @@ WB_wombat_init = (function() {
} }
} }
} }
function init_cookies_override() function init_cookies_override()
{ {
var cookie_path_regex = /\bPath=\'?\"?([^;'"\s]+)/i; var cookie_path_regex = /\bPath=\'?\"?([^;'"\s]+)/i;
var get_cookie = function() { var get_cookie = function() {
return document.cookie; return document.cookie;
} }
var set_cookie = function(value) { var set_cookie = function(value) {
var matched = value.match(cookie_path_regex); var matched = value.match(cookie_path_regex);
// if has cookie path, rewrite and replace // if has cookie path, rewrite and replace
if (matched) { if (matched) {
var rewritten = rewrite_url(matched[1]); var rewritten = rewrite_url(matched[1]);
value = value.replace(matched[1], rewritten); value = value.replace(matched[1], rewritten);
} }
document.cookie = value; document.cookie = value;
} }
def_prop(document, "WB_wombat_cookie", document.cookie, def_prop(document, "WB_wombat_cookie", document.cookie,
set_cookie, set_cookie,
get_cookie); get_cookie);
} }
//============================================ //============================================
function init_write_override() function init_write_override()
{ {
document.write = function(string) { document.write = function(string) {
var doc = new DOMParser().parseFromString(string, "text/html"); var doc = new DOMParser().parseFromString(string, "text/html");
if (doc) { if (doc) {
var children = doc.body.children; var children = doc.body.children;
for (var i = 0; i < children.length; i++) { for (var i = 0; i < children.length; i++) {
document.body.appendChild(children[i]); document.body.appendChild(children[i]);
} }
@ -710,52 +712,52 @@ WB_wombat_init = (function() {
//============================================ //============================================
function wombat_init(replay_prefix, capture_date, orig_scheme, orig_host, timestamp, mod) { function wombat_init(replay_prefix, capture_date, orig_scheme, orig_host, timestamp, mod) {
wb_replay_prefix = replay_prefix; wb_replay_prefix = replay_prefix;
if (wb_replay_prefix) { if (wb_replay_prefix) {
wb_replay_date_prefix = replay_prefix + capture_date + mod + "/"; wb_replay_date_prefix = replay_prefix + capture_date + mod + "/";
if (capture_date.length > 0) { if (capture_date.length > 0) {
wb_capture_date_part = "/" + capture_date + "/"; wb_capture_date_part = "/" + capture_date + "/";
} else { } else {
wb_capture_date_part = ""; wb_capture_date_part = "";
} }
wb_orig_scheme = orig_scheme + '://'; wb_orig_scheme = orig_scheme + '://';
wb_orig_host = wb_orig_scheme + orig_host; wb_orig_host = wb_orig_scheme + orig_host;
init_bad_prefixes(replay_prefix); init_bad_prefixes(replay_prefix);
} }
// Location // Location
var wombat_location = new WombatLocation(window.self.location); var wombat_location = new WombatLocation(window.self.location);
if (wombat_location._autooverride) { if (wombat_location._autooverride) {
var setter = function(val) { var setter = function(val) {
if (typeof(val) == "string") { if (typeof(val) == "string") {
if (starts_with(val, "about:")) { if (starts_with(val, "about:")) {
return undefined; return undefined;
} }
this._WB_wombat_location.href = val; this._WB_wombat_location.href = val;
} }
} }
def_prop(window, "WB_wombat_location", wombat_location, setter); def_prop(window, "WB_wombat_location", wombat_location, setter);
def_prop(document, "WB_wombat_location", wombat_location, setter); def_prop(document, "WB_wombat_location", wombat_location, setter);
} else { } else {
window.WB_wombat_location = wombat_location; window.WB_wombat_location = wombat_location;
document.WB_wombat_location = wombat_location; document.WB_wombat_location = wombat_location;
// Check quickly after page load // Check quickly after page load
setTimeout(check_all_locations, 500); setTimeout(check_all_locations, 500);
// Check periodically every few seconds // Check periodically every few seconds
setInterval(check_all_locations, 500); setInterval(check_all_locations, 500);
} }
var is_framed = (window.top.wbinfo && window.top.wbinfo.is_frame); var is_framed = (window.top.wbinfo && window.top.wbinfo.is_frame);
function find_next_top(win) { function find_next_top(win) {
while ((win.parent != win) && (win.parent != win.top)) { while ((win.parent != win) && (win.parent != win.top)) {
win = win.parent; win = win.parent;
@ -766,9 +768,9 @@ WB_wombat_init = (function() {
if (window.self.location != window.top.location) { if (window.self.location != window.top.location) {
if (is_framed) { if (is_framed) {
window.top.WB_wombat_location = window.WB_wombat_location; window.top.WB_wombat_location = window.WB_wombat_location;
window.WB_wombat_top = find_next_top(window.self); window.WB_wombat_top = find_next_top(window.self);
} else { } else {
window.top.WB_wombat_location = new WombatLocation(window.top.location); window.top.WB_wombat_location = new WombatLocation(window.top.location);
window.WB_wombat_top = window.top; window.WB_wombat_top = window.top;
@ -788,20 +790,20 @@ WB_wombat_init = (function() {
// History // History
copy_history_func(window.history, 'pushState'); copy_history_func(window.history, 'pushState');
copy_history_func(window.history, 'replaceState'); copy_history_func(window.history, 'replaceState');
// open // open
init_open_override(); init_open_override();
// postMessage // postMessage
init_postmessage_override(); init_postmessage_override();
// write // write
init_write_override(); init_write_override();
// Ajax // Ajax
init_ajax_rewrite(); init_ajax_rewrite();
init_worker_override(); init_worker_override();
// Cookies // Cookies
init_cookies_override(); init_cookies_override();
@ -810,6 +812,9 @@ WB_wombat_init = (function() {
// Random // Random
init_seeded_random(timestamp); init_seeded_random(timestamp);
// expose functions
this.extract_orig = extract_orig;
} }
return wombat_init; return wombat_init;

View File

@ -3,7 +3,7 @@
<script src='{{ wbrequest.host_prefix }}/{{ static_path }}/wombat.js'> </script> <script src='{{ wbrequest.host_prefix }}/{{ static_path }}/wombat.js'> </script>
<script> <script>
{% set urlsplit = cdx.original | urlsplit %} {% set urlsplit = cdx.original | urlsplit %}
WB_wombat_init("{{ wbrequest.wb_prefix}}", var _wb_wombat = new _WBWombat("{{ wbrequest.wb_prefix}}",
"{{ cdx['timestamp'] if include_ts else ''}}", "{{ cdx['timestamp'] if include_ts else ''}}",
"{{ urlsplit.scheme }}", "{{ urlsplit.scheme }}",
"{{ urlsplit.netloc }}", "{{ urlsplit.netloc }}",
@ -24,6 +24,7 @@
</script> </script>
<script src='{{ wbrequest.host_prefix }}/{{ static_path }}/wb.js'> </script> <script src='{{ wbrequest.host_prefix }}/{{ static_path }}/wb.js'> </script>
<script src='{{ wbrequest.host_prefix }}/{{ static_path }}/vidrw.js'> </script>
{% include banner_html ignore missing %} {% include banner_html ignore missing %}

View File

@ -4,12 +4,17 @@ from pywb.framework.archivalrouter import ArchivalRouter, Route
from pywb.rewrite.rewrite_live import LiveRewriter from pywb.rewrite.rewrite_live import LiveRewriter
from pywb.rewrite.wburl import WbUrl from pywb.rewrite.wburl import WbUrl
from pywb.rewrite.url_rewriter import HttpsUrlRewriter
from handlers import StaticHandler, SearchPageWbUrlHandler from handlers import StaticHandler, SearchPageWbUrlHandler
from views import HeadInsertView from views import HeadInsertView
from pywb.utils.wbexception import WbException from pywb.utils.wbexception import WbException
import json
import requests
from youtube_dl import YoutubeDL
#================================================================= #=================================================================
class LiveResourceException(WbException): class LiveResourceException(WbException):
@ -25,14 +30,16 @@ class RewriteHandler(SearchPageWbUrlHandler):
def __init__(self, config): def __init__(self, config):
super(RewriteHandler, self).__init__(config) super(RewriteHandler, self).__init__(config)
default_proxy = config.get('proxyhostport') self.default_proxy = config.get('proxyhostport')
self.rewriter = LiveRewriter(is_framed_replay=self.is_frame_mode, self.rewriter = LiveRewriter(is_framed_replay=self.is_frame_mode,
default_proxy=default_proxy) default_proxy=self.default_proxy)
self.head_insert_view = HeadInsertView.init_from_config(config) self.head_insert_view = HeadInsertView.init_from_config(config)
self.live_cookie = config.get('live-cookie', self.LIVE_COOKIE) self.live_cookie = config.get('live-cookie', self.LIVE_COOKIE)
self.ydl = None
def handle_request(self, wbrequest): def handle_request(self, wbrequest):
try: try:
return self.render_content(wbrequest) return self.render_content(wbrequest)
@ -50,6 +57,9 @@ class RewriteHandler(SearchPageWbUrlHandler):
return {} return {}
def render_content(self, wbrequest): def render_content(self, wbrequest):
if wbrequest.wb_url.mod == 'vi_':
return self.get_video_info(wbrequest)
head_insert_func = self.head_insert_view.create_insert_func(wbrequest) head_insert_func = self.head_insert_view.create_insert_func(wbrequest)
req_headers = self._live_request_headers(wbrequest) req_headers = self._live_request_headers(wbrequest)
@ -76,6 +86,34 @@ class RewriteHandler(SearchPageWbUrlHandler):
return WbResponse(status_headers, gen) return WbResponse(status_headers, gen)
def get_video_info(self, wbrequest):
if not self.ydl:
self.ydl = YoutubeDL(dict(simulate=True,
youtube_include_dash_manifest=False))
self.ydl.add_default_info_extractors()
info = self.ydl.extract_info(wbrequest.wb_url.url)
content_type = 'application/vnd.youtube-dl_formats+json'
metadata = json.dumps(info)
if self.default_proxy:
proxies = {'http': self.default_proxy}
headers = {'Content-Type': content_type}
url = HttpsUrlRewriter.remove_https(wbrequest.wb_url.url)
response = requests.request(method='PUTMETA',
url=url,
data=metadata,
headers=headers,
proxies=proxies,
verify=False)
return WbResponse.text_response(metadata, content_type=content_type)
def __str__(self): def __str__(self):
return 'Live Web Rewrite Handler' return 'Live Web Rewrite Handler'

View File

@ -68,6 +68,14 @@ class QueryHandler(object):
params['url'] = wb_url.url params['url'] = wb_url.url
params['output'] = output params['output'] = output
params['filter'].append('!mimetype:-')
# get metadata
if wb_url.mod == 'vi_':
# matching metadata explicitly with special scheme
params['url'] = wb_url.url.replace('http:/', 'metadata:/')
params['filter'].append('~original:metadata://')
cdx_iter = self.load_cdx(wbrequest, params) cdx_iter = self.load_cdx(wbrequest, params)
return cdx_iter, output return cdx_iter, output
@ -132,6 +140,7 @@ class QueryHandler(object):
'limit': limit, 'limit': limit,
'fl': ('urlkey,original,timestamp,' + 'fl': ('urlkey,original,timestamp,' +
'endtimestamp,groupcount,uniqcount'), 'endtimestamp,groupcount,uniqcount'),
'filter':[],
}, },
wburl.REPLAY: wburl.REPLAY:
@ -147,6 +156,7 @@ class QueryHandler(object):
# Not appropriate as default # Not appropriate as default
# Should be an option to configure status code filtering in general # Should be an option to configure status code filtering in general
# 'filter': ['statuscode:[23]..|-'], # 'filter': ['statuscode:[23]..|-'],
'filter': [],
'limit': '1', 'limit': '1',
'resolveRevisits': True, 'resolveRevisits': True,
} }

View File

@ -98,7 +98,7 @@ class TestWb:
assert '"20140127171238"' in resp.body assert '"20140127171238"' in resp.body
assert 'wb.js' in resp.body assert 'wb.js' in resp.body
assert 'WB_wombat_init' in resp.body assert 'new _WBWombat' in resp.body, resp.body
assert '/pywb/20140127171238/http://www.iana.org/time-zones"' in resp.body assert '/pywb/20140127171238/http://www.iana.org/time-zones"' in resp.body
def test_replay_non_frame_content(self): def test_replay_non_frame_content(self):
@ -149,7 +149,7 @@ class TestWb:
assert 'wb.js' in resp.body assert 'wb.js' in resp.body
# no wombat present # no wombat present
assert 'WB_wombat_init' not in resp.body assert '_WBWombat' not in resp.body
# url not rewritten # url not rewritten
#assert '"http://www.iana.org/domains/example"' in resp.body #assert '"http://www.iana.org/domains/example"' in resp.body