mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 08:04:49 +01:00
video: work on domain-specific range cache rewrites
This commit is contained in:
parent
703ec0eb5e
commit
72aa921ce5
@ -129,7 +129,7 @@ rules:
|
||||
|
||||
- url_prefix: 'com,googlevideo,'
|
||||
|
||||
fuzzy_lookup: 'com,googlevideo.*/videoplayback?.*(id=[^&]+).*(range=[^&]+)'
|
||||
fuzzy_lookup: 'com,googlevideo.*/videoplayback?.*(id=[^&]).*(mime=[^&]).*(signature=[^&])'
|
||||
|
||||
|
||||
# testing rules -- not for valid domain
|
||||
|
@ -70,32 +70,37 @@ class RewriteHandler(SearchPageWbUrlHandler):
|
||||
if ref_wburl_str:
|
||||
wbrequest.env['REL_REFERER'] = WbUrl(ref_wburl_str).url
|
||||
|
||||
result = self.rewriter.fetch_request(wbrequest.wb_url.url,
|
||||
wbrequest.urlrewriter,
|
||||
head_insert_func=head_insert_func,
|
||||
req_headers=req_headers,
|
||||
env=wbrequest.env)
|
||||
def do_req():
|
||||
result = self.rewriter.fetch_request(wbrequest.wb_url.url,
|
||||
wbrequest.urlrewriter,
|
||||
head_insert_func=head_insert_func,
|
||||
req_headers=req_headers,
|
||||
env=wbrequest.env)
|
||||
|
||||
return self._make_response(wbrequest, *result)
|
||||
|
||||
cdx = dict(url=wbrequest.wb_url.url)
|
||||
|
||||
range_status, range_iter = range_cache(wbrequest, cdx, do_req)
|
||||
|
||||
if not range_status or not range_iter:
|
||||
return do_req()
|
||||
else:
|
||||
result = range_status, range_iter, False
|
||||
return self._make_response(wbrequest, *result)
|
||||
|
||||
return self._make_response(wbrequest, *result)
|
||||
|
||||
def _make_response(self, wbrequest, status_headers, gen, is_rewritten):
|
||||
# if cookie set, pass recorded timestamp info via cookie
|
||||
# so that client side may be able to access it
|
||||
# used by framed mode to update frame banner
|
||||
if self.live_cookie:
|
||||
cdx = wbrequest.env['pywb.cdx']
|
||||
value = self.live_cookie.format(cdx['timestamp'])
|
||||
status_headers.headers.append(('Set-Cookie', value))
|
||||
|
||||
def resp_func():
|
||||
return WbResponse(status_headers, gen)
|
||||
|
||||
#range_status, range_iter = range_cache(wbrequest, cdx, resp_func)
|
||||
#if range_status and range_iter:
|
||||
# return WbResponse(range_status, range_iter)
|
||||
#else:
|
||||
return resp_func()
|
||||
cdx = wbrequest.env.get('pywb.cdx')
|
||||
if cdx:
|
||||
value = self.live_cookie.format(cdx['timestamp'])
|
||||
status_headers.headers.append(('Set-Cookie', value))
|
||||
|
||||
return WbResponse(status_headers, gen)
|
||||
|
||||
def get_video_info(self, wbrequest):
|
||||
if not self.youtubedl:
|
||||
|
@ -7,28 +7,71 @@ from tempfile import NamedTemporaryFile
|
||||
import hashlib
|
||||
import yaml
|
||||
import os
|
||||
import re
|
||||
|
||||
|
||||
#=================================================================
|
||||
class RangeCache(object):
|
||||
YOUTUBE_RX = re.compile('.*.googlevideo.com/videoplayback')
|
||||
YT_EXTRACT_RX = re.compile('&range=([^&]+)')
|
||||
|
||||
@staticmethod
|
||||
def match_yt(url):
|
||||
if not RangeCache.YOUTUBE_RX.match(url):
|
||||
return None
|
||||
|
||||
range_h_res = []
|
||||
|
||||
def repl_range(matcher):
|
||||
range_h_res.append(matcher.group(1))
|
||||
return ''
|
||||
|
||||
new_url = RangeCache.YT_EXTRACT_RX.sub(repl_range, url)
|
||||
if range_h_res:
|
||||
print('MATCHED')
|
||||
return range_h_res[0], new_url
|
||||
else:
|
||||
return None, url
|
||||
|
||||
def __init__(self):
|
||||
self.cache = create_cache()
|
||||
print(type(self.cache))
|
||||
|
||||
def __call__(self, wbrequest, cdx, wbresponse_func):
|
||||
range_h = wbrequest.env.get('HTTP_RANGE')
|
||||
if not range_h:
|
||||
return None, None
|
||||
url = wbrequest.wb_url.url
|
||||
range_h = None
|
||||
use_206 = False
|
||||
|
||||
result = self.match_yt(url)
|
||||
if result:
|
||||
range_h, url = result
|
||||
wbrequest.wb_url.url = url
|
||||
print(range_h)
|
||||
|
||||
# check for standard range header
|
||||
if not range_h:
|
||||
range_h = wbrequest.env.get('HTTP_RANGE')
|
||||
if not range_h:
|
||||
return None, None
|
||||
range_h = True
|
||||
|
||||
return self.handle_range(wbrequest, cdx, url,
|
||||
wbresponse_func,
|
||||
range_h, use_206)
|
||||
|
||||
def handle_range(self, wbrequest, cdx, url, wbresponse_func,
|
||||
range_h, use_206):
|
||||
|
||||
range_h = range_h.split('=')[-1]
|
||||
key = cdx.get('digest')
|
||||
if not key:
|
||||
hash_ = hashlib.md5()
|
||||
hash_.update(cdx['urlkey'])
|
||||
hash_.update(cdx['timestamp'])
|
||||
hash_.update(url)
|
||||
#hash_.update(cdx['timestamp'])
|
||||
key = hash_.hexdigest()
|
||||
|
||||
print('KEY: ', key)
|
||||
print('CACHE: ', str(self.cache))
|
||||
print('RANGE: ', range_h)
|
||||
|
||||
if not key in self.cache:
|
||||
print('MISS')
|
||||
@ -56,13 +99,13 @@ class RangeCache(object):
|
||||
|
||||
range_h = range_h.rstrip()
|
||||
|
||||
if range_h == 'bytes=0-':
|
||||
if range_h == '0-':
|
||||
print('FIX RANGE')
|
||||
range_h = 'bytes=0-120000'
|
||||
range_h = '0-120000'
|
||||
|
||||
parts = range_h.rstrip().split('-')
|
||||
start = parts[0]
|
||||
start = start.split('=')[1]
|
||||
#start = start.split('=')[1]
|
||||
start = int(start)
|
||||
|
||||
maxlen = filelen - start
|
||||
@ -82,14 +125,22 @@ class RangeCache(object):
|
||||
|
||||
yield buf
|
||||
|
||||
if use_206:
|
||||
content_range = 'bytes {0}-{1}/{2}'.format(start,
|
||||
start + maxlen - 1,
|
||||
filelen)
|
||||
print('CONTENT_RANGE: ', content_range)
|
||||
|
||||
content_range = 'bytes {0}-{1}/{2}'.format(start,
|
||||
start + maxlen - 1,
|
||||
filelen)
|
||||
status_headers = StatusAndHeaders('206 Partial Content', spec['headers'])
|
||||
status_headers.replace_header('Content-Range', content_range)
|
||||
else:
|
||||
status_headers = StatusAndHeaders('200 OK', spec['headers'])
|
||||
|
||||
status_headers.headers.append(('Accept-Ranges', 'bytes'))
|
||||
status_headers.headers.append(('Access-Control-Allow-Credentials', 'true'))
|
||||
status_headers.headers.append(('Access-Control-Allow-Origin', 'http://localhost:8080'))
|
||||
status_headers.headers.append(('Timing-Allow-Origin', 'http://localhost:8080'))
|
||||
|
||||
print('CONTENT_RANGE: ', content_range)
|
||||
status_headers = StatusAndHeaders('206 Partial Content', spec['headers'])
|
||||
status_headers.replace_header('Content-Range', content_range)
|
||||
status_headers.replace_header('Content-Length', str(maxlen))
|
||||
return status_headers, read_range()
|
||||
|
||||
|
@ -201,6 +201,9 @@ class ReplayView(object):
|
||||
if wbrequest.options['is_proxy']:
|
||||
return None
|
||||
|
||||
if range_cache and range_cache.match_yt(wbrequest.wb_url.url):
|
||||
return None
|
||||
|
||||
redir_needed = (wbrequest.options.get('is_timegate', False))
|
||||
|
||||
if not redir_needed and self.redir_to_exact:
|
||||
|
Loading…
x
Reference in New Issue
Block a user