mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
video work: improved yt handling:
- disable yt using yt api, for forced html/flash, diable on load - use yt error event to detect error - better fallback on recorded video use seperate cache for range and video info tracking fix yt rules query to account for & and ?
This commit is contained in:
parent
ca17410056
commit
4c08a6a064
@ -148,12 +148,12 @@ rules:
|
||||
|
||||
- url_prefix: 'com,youtube,c'
|
||||
|
||||
fuzzy_lookup: 'com,youtube,c.*/videogoodput.*(id=[^&]+)'
|
||||
fuzzy_lookup: 'com,youtube,c.*/videogoodput.*([?&]id=[^&]+)'
|
||||
|
||||
- url_prefix: 'com,googlevideo,'
|
||||
|
||||
fuzzy_lookup:
|
||||
match: 'com,googlevideo.*/videoplayback.*(id=[^&]+).*(itag=[^&]+).*(mime=[^&]+)'
|
||||
match: 'com,googlevideo.*/videoplayback.*([?&]id=[^&]+).*([?&]itag=[^&]+).*([?&]mime=[^&]+)'
|
||||
filter:
|
||||
- '~urlkey:{0}'
|
||||
- '!mimetype:text/plain'
|
||||
|
@ -18,13 +18,35 @@ This file is part of pywb, https://github.com/ikreymer/pywb
|
||||
*/
|
||||
|
||||
// VidRw 1.0 -- video rewriting
|
||||
//
|
||||
//
|
||||
|
||||
var _pywbvid = "default";
|
||||
|
||||
var _pywb_yt_err = undefined;
|
||||
|
||||
if (window.location.hash) {
|
||||
var m = window.location.hash.match(/_pywbvid=([\w]+)/);
|
||||
if (m) {
|
||||
_pywbvid = m[1];
|
||||
}
|
||||
|
||||
if (_pywbvid == "html" || _pywbvid == "flash") {
|
||||
var YT_W_E_RX = /^(https?:\/\/.*youtube.com)\/(watch|embed).*$/;
|
||||
|
||||
if (wbinfo.url.match(YT_W_E_RX)) {
|
||||
// special case: prevent yt player from being inited
|
||||
Object.defineProperty(window, 'yt', {writeable: false});
|
||||
Object.defineProperty(window, 'ytplayer', {writeable: false});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__wbvidrw = (function() {
|
||||
|
||||
var found_embeds = false;
|
||||
|
||||
var vid_type = "default";
|
||||
|
||||
var FLASH_PLAYER = wbinfo.static_prefix + "/flowplayer/flowplayer-3.2.18.swf";
|
||||
|
||||
function check_videos() {
|
||||
@ -32,14 +54,6 @@ __wbvidrw = (function() {
|
||||
return;
|
||||
}
|
||||
|
||||
// extract_typ
|
||||
if (window.location.hash) {
|
||||
var m = window.location.hash.match(/_pywbvid=([\w]+)/);
|
||||
if (m) {
|
||||
vid_type = m[1];
|
||||
}
|
||||
}
|
||||
|
||||
function handle_all_embeds() {
|
||||
var embeds = document.getElementsByTagName("embed");
|
||||
|
||||
@ -61,10 +75,9 @@ __wbvidrw = (function() {
|
||||
|
||||
found_embeds = true;
|
||||
|
||||
handle_yt_videos(vid_type);
|
||||
|
||||
//window.setInterval(handle_all_embeds, 1000);
|
||||
handle_yt_videos(_pywbvid);
|
||||
|
||||
//window.setInterval(handle_all_embeds, 2000);
|
||||
//_wb_wombat.add_tag_handler("embed", handle_all_embeds);
|
||||
//_wb_wombat.add_tag_handler("object", handle_all_objects);
|
||||
}
|
||||
@ -100,8 +113,8 @@ __wbvidrw = (function() {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (var j = 0; j < objects[i].children.length; j++) {
|
||||
var child = objects[i].children[j];
|
||||
for (var j = 0; j < elem.children.length; j++) {
|
||||
var child = elem.children[j];
|
||||
|
||||
if (child.tagName == "EMBED") {
|
||||
return false;
|
||||
@ -125,7 +138,7 @@ __wbvidrw = (function() {
|
||||
|
||||
elem._vidrw = true;
|
||||
|
||||
check_replacement(elem, src);
|
||||
check_replacement(elem, obj_url);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -136,41 +149,85 @@ __wbvidrw = (function() {
|
||||
var YT_V_RX = /^(https?:\/\/.*youtube.com)\/v\/([^&?]+)(.*)$/;
|
||||
var VIMEO_RX = /^https?:\/\/.*vimeo.*clip_id=([^&]+)/;
|
||||
|
||||
|
||||
function handle_yt_videos(vid_type)
|
||||
function remove_yt()
|
||||
{
|
||||
function do_yt_video_replace()
|
||||
// yt special case
|
||||
if (window.yt && window.yt.player && window.yt.player.getPlayerByElement) {
|
||||
//yt.player.Application.create("player-api", ytplayer.config).dispose();
|
||||
|
||||
var elem = window.yt.player.getPlayerByElement("player-api");
|
||||
|
||||
if (!elem) {
|
||||
elem = window.yt.player.getPlayerByElement("player");
|
||||
}
|
||||
|
||||
if (elem) {
|
||||
elem.destroy();
|
||||
}
|
||||
|
||||
delete window.yt;
|
||||
if (window.ytplayer) {
|
||||
delete window.ytplayer;
|
||||
}
|
||||
}
|
||||
// end yt special case
|
||||
}
|
||||
|
||||
function handle_yt_videos(_pywbvid)
|
||||
{
|
||||
function do_yt_video_replace(elem)
|
||||
{
|
||||
console.log("REPLACING YT: " + wbinfo.url);
|
||||
ytvideo[0].autoplay = false;
|
||||
ytvideo[0].preload = "none";
|
||||
remove_yt();
|
||||
|
||||
var elem = ytvideo[0];
|
||||
// get ancestor 'div'
|
||||
if (elem.parentElement) {
|
||||
elem = elem.parentElement;
|
||||
while (elem.hasChildNodes()) {
|
||||
elem.removeChild(elem.lastChild);
|
||||
}
|
||||
if (elem.parentElement) {
|
||||
elem = elem.parentElement;
|
||||
}
|
||||
console.log(elem);
|
||||
|
||||
// Experimental
|
||||
|
||||
check_replacement(elem, wbinfo.url);
|
||||
//add placeholder child to remove
|
||||
var placeholder = document.createElement("div");
|
||||
elem.appendChild(placeholder);
|
||||
check_replacement(placeholder, wbinfo.url);
|
||||
}
|
||||
|
||||
// special case: yt
|
||||
if (wbinfo.url.match(YT_W_E_RX)) {
|
||||
var ytvideo = document.getElementsByTagName("video");
|
||||
//var ytvideo = document.getElementsByTagName("video");
|
||||
var player_div = document.getElementById("player-api");
|
||||
if (!player_div) {
|
||||
player_div = document.getElementById("player");
|
||||
}
|
||||
|
||||
//if (ytvideo.length == 1 && ytvideo[0].getAttribute("data-youtube-id") != "") {
|
||||
if (player_div) {
|
||||
if (_pywbvid == "html" || _pywbvid == "flash") {
|
||||
do_yt_video_replace(player_div);
|
||||
} else if (!wbinfo.is_live) {
|
||||
var player = window.yt.player.getPlayerByElement(player_div);
|
||||
|
||||
if (player) {
|
||||
_pywb_yt_err = function() {
|
||||
do_yt_video_replace(player_div);
|
||||
}
|
||||
|
||||
player.addEventListener("onError", "_pywb_yt_err");
|
||||
}
|
||||
|
||||
if (ytvideo.length == 1 && ytvideo[0].getAttribute("data-youtube-id") != "") {
|
||||
if (vid_type == "html") {
|
||||
do_yt_video_replace();
|
||||
} else {
|
||||
setTimeout(function() {
|
||||
if (!ytvideo || !ytvideo.length || ytvideo[0].readyState == 0) {
|
||||
do_yt_video_replace();
|
||||
if (!window.yt || !window.yt.player) {
|
||||
do_yt_video_replace(player_div);
|
||||
return;
|
||||
}
|
||||
|
||||
var state = -1;
|
||||
|
||||
if (player && player.getPlayerState) {
|
||||
state = player.getPlayerState();
|
||||
}
|
||||
|
||||
// if no player or player is still buffering (is this ok), then replace
|
||||
if (state < 0 || state == 3) {
|
||||
do_yt_video_replace(player_div);
|
||||
return;
|
||||
}
|
||||
}, 4000);
|
||||
}
|
||||
@ -197,7 +254,7 @@ __wbvidrw = (function() {
|
||||
|
||||
src = src.replace(VIMEO_RX, "http://player.vimeo.com/video/$1");
|
||||
|
||||
if (vid_type == "orig") {
|
||||
if (_pywbvid == "orig") {
|
||||
var repl_src = src.replace(YT_V_RX, "$1/embed/$2?$3&controls=0");
|
||||
if (repl_src != src) {
|
||||
do_replace_iframe(elem, repl_src);
|
||||
@ -275,12 +332,6 @@ __wbvidrw = (function() {
|
||||
} else {
|
||||
elem.parentNode.replaceChild(replacement, elem);
|
||||
}
|
||||
|
||||
if (window.yt) {
|
||||
yt.player.Application.create("player-api", ytplayer.config).dispose();
|
||||
delete window.yt;
|
||||
delete window.ytplayer;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -315,7 +366,7 @@ __wbvidrw = (function() {
|
||||
if (type == "audio") {
|
||||
htmlelem = document.createElement("audio");
|
||||
}
|
||||
if (vid_type != "flash") {
|
||||
if (_pywbvid != "flash") {
|
||||
replacement = init_html_player(htmlelem, type, width, height, info, thumb_url);
|
||||
}
|
||||
}
|
||||
@ -402,7 +453,6 @@ __wbvidrw = (function() {
|
||||
return;
|
||||
}
|
||||
|
||||
//console.log("html5 " + type +" error");
|
||||
var replacement = document.createElement("div");
|
||||
|
||||
var vidId = "_wb_vid" + Date.now();
|
||||
|
@ -67,7 +67,7 @@ class RewriteHandler(SearchPageWbUrlHandler):
|
||||
|
||||
def render_content(self, wbrequest):
|
||||
if wbrequest.wb_url.mod == 'vi_':
|
||||
return self.get_video_info(wbrequest)
|
||||
return self._get_video_info(wbrequest)
|
||||
|
||||
head_insert_func = self.head_insert_view.create_insert_func(wbrequest)
|
||||
req_headers = self._live_request_headers(wbrequest)
|
||||
@ -79,6 +79,7 @@ class RewriteHandler(SearchPageWbUrlHandler):
|
||||
ignore_proxies = False
|
||||
use_206 = False
|
||||
url = None
|
||||
rangeres = None
|
||||
|
||||
readd_range = False
|
||||
cache_key = None
|
||||
@ -100,7 +101,7 @@ class RewriteHandler(SearchPageWbUrlHandler):
|
||||
ignore_proxies = True
|
||||
|
||||
# sets cache_key only if not already cached
|
||||
cache_key = self._check_url_cache(url)
|
||||
cache_key = self._get_cache_key('r:', url)
|
||||
|
||||
result = self.rewriter.fetch_request(wbrequest.wb_url.url,
|
||||
wbrequest.urlrewriter,
|
||||
@ -124,6 +125,18 @@ class RewriteHandler(SearchPageWbUrlHandler):
|
||||
if cache_key:
|
||||
self._add_proxy_ping(cache_key, url, wbrequest, wbresponse)
|
||||
|
||||
if rangeres:
|
||||
referrer = wbrequest.env.get('REL_REFERER')
|
||||
|
||||
# also ping video info
|
||||
if referrer:
|
||||
try:
|
||||
resp = self._get_video_info(wbrequest,
|
||||
info_url=referrer,
|
||||
video_url=url)
|
||||
except:
|
||||
print('Error getting video info')
|
||||
|
||||
return wbresponse
|
||||
|
||||
def _make_response(self, wbrequest, status_headers, gen, is_rewritten):
|
||||
@ -138,22 +151,26 @@ class RewriteHandler(SearchPageWbUrlHandler):
|
||||
|
||||
return WbResponse(status_headers, gen)
|
||||
|
||||
def _check_url_cache(self, url):
|
||||
def _get_cache_key(self, prefix, url):
|
||||
if not self._cache:
|
||||
self._cache = create_cache()
|
||||
|
||||
hash_ = hashlib.md5()
|
||||
hash_.update(url)
|
||||
key = hash_.hexdigest()
|
||||
key = self.create_cache_key(prefix, url)
|
||||
|
||||
if key in self._cache:
|
||||
return None
|
||||
|
||||
return key
|
||||
|
||||
def _add_proxy_ping(self, key, url, wbrequest, wbresponse):
|
||||
referrer = wbrequest.env.get('REL_REFERER')
|
||||
@staticmethod
|
||||
def create_cache_key(prefix, url):
|
||||
hash_ = hashlib.md5()
|
||||
hash_.update(url)
|
||||
key = hash_.hexdigest()
|
||||
key = prefix + key
|
||||
return key
|
||||
|
||||
def _add_proxy_ping(self, key, url, wbrequest, wbresponse):
|
||||
def do_ping():
|
||||
headers = self._live_request_headers(wbrequest)
|
||||
headers['Connection'] = 'close'
|
||||
@ -175,12 +192,6 @@ class RewriteHandler(SearchPageWbUrlHandler):
|
||||
del self._cache[key]
|
||||
raise
|
||||
|
||||
# also ping video info
|
||||
if referrer:
|
||||
resp = self.get_video_info(wbrequest,
|
||||
info_url=referrer,
|
||||
video_url=url)
|
||||
|
||||
def wrap_buff_gen(gen):
|
||||
for x in gen:
|
||||
yield x
|
||||
@ -194,7 +205,7 @@ class RewriteHandler(SearchPageWbUrlHandler):
|
||||
wbresponse.body = wrap_buff_gen(wbresponse.body)
|
||||
return wbresponse
|
||||
|
||||
def get_video_info(self, wbrequest, info_url=None, video_url=None):
|
||||
def _get_video_info(self, wbrequest, info_url=None, video_url=None):
|
||||
if not self.youtubedl:
|
||||
self.youtubedl = YoutubeDLWrapper()
|
||||
|
||||
@ -204,12 +215,18 @@ class RewriteHandler(SearchPageWbUrlHandler):
|
||||
if not info_url:
|
||||
info_url = wbrequest.wb_url.url
|
||||
|
||||
cache_key = None
|
||||
if self.proxies:
|
||||
cache_key = self._get_cache_key('v:', video_url)
|
||||
|
||||
info = self.youtubedl.extract_info(video_url)
|
||||
|
||||
#if info and info.formats and len(info.formats) == 1:
|
||||
|
||||
content_type = self.YT_DL_TYPE
|
||||
metadata = json.dumps(info)
|
||||
|
||||
if self.proxies:
|
||||
if (self.proxies and cache_key):
|
||||
headers = self._live_request_headers(wbrequest)
|
||||
headers['Content-Type'] = content_type
|
||||
|
||||
@ -222,6 +239,8 @@ class RewriteHandler(SearchPageWbUrlHandler):
|
||||
proxies=self.proxies,
|
||||
verify=False)
|
||||
|
||||
self._cache[cache_key] = '1'
|
||||
|
||||
return WbResponse.text_response(metadata, content_type=content_type)
|
||||
|
||||
def __str__(self):
|
||||
|
@ -6,6 +6,7 @@ from tempfile import NamedTemporaryFile, mkdtemp
|
||||
|
||||
import yaml
|
||||
import os
|
||||
from shutil import rmtree
|
||||
|
||||
import atexit
|
||||
|
||||
@ -19,9 +20,8 @@ class RangeCache(object):
|
||||
|
||||
def cleanup(self):
|
||||
if self.temp_dir: # pragma: no cover
|
||||
import shutil
|
||||
print('Removing: ' + self.temp_dir)
|
||||
shutil.rmtree(self.temp_dir, True)
|
||||
rmtree(self.temp_dir, True)
|
||||
self.temp_dir = None
|
||||
|
||||
def handle_range(self, wbrequest, digest, wbresponse_func,
|
||||
|
@ -9,9 +9,10 @@ from pywb.framework.wsgi_wrappers import init_app
|
||||
import webtest
|
||||
import shutil
|
||||
|
||||
import pywb.webapp.live_rewrite_handler
|
||||
|
||||
|
||||
#=================================================================
|
||||
#ThreadingMixIn.deamon_threads = True
|
||||
|
||||
#class ProxyServer(ThreadingMixIn, HTTPServer):
|
||||
class ProxyServer(HTTPServer):
|
||||
@ -49,6 +50,7 @@ class ProxyRequest(BaseHTTPRequestHandler):
|
||||
class TestProxyLiveRewriter:
|
||||
def setup(self):
|
||||
self.requestlog = []
|
||||
self.cache = {}
|
||||
|
||||
def make_httpd(app):
|
||||
proxyserv = ProxyServer(('', 0), ProxyRequest)
|
||||
@ -63,7 +65,11 @@ class TestProxyLiveRewriter:
|
||||
config=dict(framed_replay=True,
|
||||
proxyhostport=self.server.proxy_dict))
|
||||
|
||||
print(self.server.proxy_dict)
|
||||
def create_cache():
|
||||
return self.cache
|
||||
|
||||
pywb.webapp.live_rewrite_handler.create_cache = create_cache
|
||||
|
||||
self.testapp = webtest.TestApp(self.app)
|
||||
|
||||
def teardown(self):
|
||||
@ -83,6 +89,8 @@ class TestProxyLiveRewriter:
|
||||
assert resp.body.startswith('GET http://example.com/ HTTP/1.1')
|
||||
assert 'referer: http://other.example.com' in resp.body
|
||||
|
||||
assert len(self.cache) == 0
|
||||
|
||||
def test_echo_proxy_start_unbounded_remove_range(self):
|
||||
headers = [('Range', 'bytes=0-')]
|
||||
resp = self.testapp.get('/rewrite/http://example.com/', headers=headers)
|
||||
@ -101,6 +109,8 @@ class TestProxyLiveRewriter:
|
||||
assert self.requestlog[0].startswith('GET http://example.com/ HTTP/1.1')
|
||||
assert 'range: ' not in self.requestlog[0]
|
||||
|
||||
assert len(self.cache) == 0
|
||||
|
||||
def test_echo_proxy_bounded_noproxy_range(self):
|
||||
headers = [('Range', 'bytes=10-1000')]
|
||||
resp = self.testapp.get('/rewrite/http://example.com/foobar', headers=headers)
|
||||
@ -124,6 +134,10 @@ class TestProxyLiveRewriter:
|
||||
# no range request
|
||||
assert 'range: ' not in self.requestlog[0]
|
||||
|
||||
# r: key cached
|
||||
assert len(self.cache) == 1
|
||||
assert RewriteHandler.create_cache_key('r:', 'http://example.com/foobar') in self.cache
|
||||
|
||||
# Second Request
|
||||
# clear log
|
||||
self.requestlog.pop()
|
||||
@ -140,6 +154,7 @@ class TestProxyLiveRewriter:
|
||||
|
||||
# already pinged proxy, no additional requests set to proxy
|
||||
assert len(self.requestlog) == 0
|
||||
assert len(self.cache) == 1
|
||||
|
||||
def test_echo_proxy_video_info(self):
|
||||
resp = self.testapp.get('/rewrite/vi_/https://www.youtube.com/watch?v=DjFZyFWSt1M')
|
||||
@ -149,6 +164,14 @@ class TestProxyLiveRewriter:
|
||||
assert len(self.requestlog) == 1
|
||||
assert self.requestlog[0].startswith('PUTMETA http://www.youtube.com/watch?v=DjFZyFWSt1M HTTP/1.1')
|
||||
|
||||
# second request, not sent to proxy
|
||||
resp = self.testapp.get('/rewrite/vi_/https://www.youtube.com/watch?v=DjFZyFWSt1M')
|
||||
assert len(self.requestlog) == 1
|
||||
|
||||
# v: video info cache
|
||||
assert len(self.cache) == 1
|
||||
assert RewriteHandler.create_cache_key('v:', 'https://www.youtube.com/watch?v=DjFZyFWSt1M') in self.cache
|
||||
|
||||
def test_echo_proxy_video_with_referrer(self):
|
||||
headers = [('Range', 'bytes=1000-2000'), ('Referer', 'http://localhost:80/rewrite/https://example.com/')]
|
||||
resp = self.testapp.get('/rewrite/http://www.youtube.com/watch?v=DjFZyFWSt1M', headers=headers)
|
||||
@ -159,12 +182,18 @@ class TestProxyLiveRewriter:
|
||||
# proxy receives two requests
|
||||
assert len(self.requestlog) == 2
|
||||
|
||||
# first, non-ranged request for page
|
||||
assert self.requestlog[0].startswith('GET http://www.youtube.com/watch?v=DjFZyFWSt1M HTTP/1.1')
|
||||
assert 'range' not in self.requestlog[0]
|
||||
# first, a video info request recording the page
|
||||
assert self.requestlog[0].startswith('PUTMETA http://example.com/ HTTP/1.1')
|
||||
|
||||
# second, non-ranged request for page
|
||||
assert self.requestlog[1].startswith('GET http://www.youtube.com/watch?v=DjFZyFWSt1M HTTP/1.1')
|
||||
assert 'range' not in self.requestlog[1]
|
||||
|
||||
# both video info and range cached
|
||||
assert len(self.cache) == 2
|
||||
assert RewriteHandler.create_cache_key('v:', 'http://www.youtube.com/watch?v=DjFZyFWSt1M') in self.cache
|
||||
assert RewriteHandler.create_cache_key('r:', 'http://www.youtube.com/watch?v=DjFZyFWSt1M') in self.cache
|
||||
|
||||
# also a video info request recording the page
|
||||
assert self.requestlog[1].startswith('PUTMETA http://example.com/ HTTP/1.1')
|
||||
|
||||
def test_echo_proxy_error(self):
|
||||
headers = [('Range', 'bytes=1000-2000'), ('Referer', 'http://localhost:80/rewrite/https://example.com/')]
|
||||
@ -177,3 +206,6 @@ class TestProxyLiveRewriter:
|
||||
|
||||
# no proxy requests as we're forcing exception
|
||||
assert len(self.requestlog) == 0
|
||||
|
||||
assert len(self.cache) == 0
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user