mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
video: work on domain-specific range cache rewrites
This commit is contained in:
parent
703ec0eb5e
commit
72aa921ce5
@ -129,7 +129,7 @@ rules:
|
|||||||
|
|
||||||
- url_prefix: 'com,googlevideo,'
|
- url_prefix: 'com,googlevideo,'
|
||||||
|
|
||||||
fuzzy_lookup: 'com,googlevideo.*/videoplayback?.*(id=[^&]+).*(range=[^&]+)'
|
fuzzy_lookup: 'com,googlevideo.*/videoplayback?.*(id=[^&]).*(mime=[^&]).*(signature=[^&])'
|
||||||
|
|
||||||
|
|
||||||
# testing rules -- not for valid domain
|
# testing rules -- not for valid domain
|
||||||
|
@ -70,32 +70,37 @@ class RewriteHandler(SearchPageWbUrlHandler):
|
|||||||
if ref_wburl_str:
|
if ref_wburl_str:
|
||||||
wbrequest.env['REL_REFERER'] = WbUrl(ref_wburl_str).url
|
wbrequest.env['REL_REFERER'] = WbUrl(ref_wburl_str).url
|
||||||
|
|
||||||
result = self.rewriter.fetch_request(wbrequest.wb_url.url,
|
def do_req():
|
||||||
wbrequest.urlrewriter,
|
result = self.rewriter.fetch_request(wbrequest.wb_url.url,
|
||||||
head_insert_func=head_insert_func,
|
wbrequest.urlrewriter,
|
||||||
req_headers=req_headers,
|
head_insert_func=head_insert_func,
|
||||||
env=wbrequest.env)
|
req_headers=req_headers,
|
||||||
|
env=wbrequest.env)
|
||||||
|
|
||||||
|
return self._make_response(wbrequest, *result)
|
||||||
|
|
||||||
|
cdx = dict(url=wbrequest.wb_url.url)
|
||||||
|
|
||||||
|
range_status, range_iter = range_cache(wbrequest, cdx, do_req)
|
||||||
|
|
||||||
|
if not range_status or not range_iter:
|
||||||
|
return do_req()
|
||||||
|
else:
|
||||||
|
result = range_status, range_iter, False
|
||||||
|
return self._make_response(wbrequest, *result)
|
||||||
|
|
||||||
return self._make_response(wbrequest, *result)
|
|
||||||
|
|
||||||
def _make_response(self, wbrequest, status_headers, gen, is_rewritten):
|
def _make_response(self, wbrequest, status_headers, gen, is_rewritten):
|
||||||
# if cookie set, pass recorded timestamp info via cookie
|
# if cookie set, pass recorded timestamp info via cookie
|
||||||
# so that client side may be able to access it
|
# so that client side may be able to access it
|
||||||
# used by framed mode to update frame banner
|
# used by framed mode to update frame banner
|
||||||
if self.live_cookie:
|
if self.live_cookie:
|
||||||
cdx = wbrequest.env['pywb.cdx']
|
cdx = wbrequest.env.get('pywb.cdx')
|
||||||
value = self.live_cookie.format(cdx['timestamp'])
|
if cdx:
|
||||||
status_headers.headers.append(('Set-Cookie', value))
|
value = self.live_cookie.format(cdx['timestamp'])
|
||||||
|
status_headers.headers.append(('Set-Cookie', value))
|
||||||
def resp_func():
|
|
||||||
return WbResponse(status_headers, gen)
|
|
||||||
|
|
||||||
#range_status, range_iter = range_cache(wbrequest, cdx, resp_func)
|
|
||||||
#if range_status and range_iter:
|
|
||||||
# return WbResponse(range_status, range_iter)
|
|
||||||
#else:
|
|
||||||
return resp_func()
|
|
||||||
|
|
||||||
|
return WbResponse(status_headers, gen)
|
||||||
|
|
||||||
def get_video_info(self, wbrequest):
|
def get_video_info(self, wbrequest):
|
||||||
if not self.youtubedl:
|
if not self.youtubedl:
|
||||||
|
@ -7,28 +7,71 @@ from tempfile import NamedTemporaryFile
|
|||||||
import hashlib
|
import hashlib
|
||||||
import yaml
|
import yaml
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class RangeCache(object):
|
class RangeCache(object):
|
||||||
|
YOUTUBE_RX = re.compile('.*.googlevideo.com/videoplayback')
|
||||||
|
YT_EXTRACT_RX = re.compile('&range=([^&]+)')
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def match_yt(url):
|
||||||
|
if not RangeCache.YOUTUBE_RX.match(url):
|
||||||
|
return None
|
||||||
|
|
||||||
|
range_h_res = []
|
||||||
|
|
||||||
|
def repl_range(matcher):
|
||||||
|
range_h_res.append(matcher.group(1))
|
||||||
|
return ''
|
||||||
|
|
||||||
|
new_url = RangeCache.YT_EXTRACT_RX.sub(repl_range, url)
|
||||||
|
if range_h_res:
|
||||||
|
print('MATCHED')
|
||||||
|
return range_h_res[0], new_url
|
||||||
|
else:
|
||||||
|
return None, url
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.cache = create_cache()
|
self.cache = create_cache()
|
||||||
print(type(self.cache))
|
print(type(self.cache))
|
||||||
|
|
||||||
def __call__(self, wbrequest, cdx, wbresponse_func):
|
def __call__(self, wbrequest, cdx, wbresponse_func):
|
||||||
range_h = wbrequest.env.get('HTTP_RANGE')
|
url = wbrequest.wb_url.url
|
||||||
if not range_h:
|
range_h = None
|
||||||
return None, None
|
use_206 = False
|
||||||
|
|
||||||
|
result = self.match_yt(url)
|
||||||
|
if result:
|
||||||
|
range_h, url = result
|
||||||
|
wbrequest.wb_url.url = url
|
||||||
|
print(range_h)
|
||||||
|
|
||||||
|
# check for standard range header
|
||||||
|
if not range_h:
|
||||||
|
range_h = wbrequest.env.get('HTTP_RANGE')
|
||||||
|
if not range_h:
|
||||||
|
return None, None
|
||||||
|
range_h = True
|
||||||
|
|
||||||
|
return self.handle_range(wbrequest, cdx, url,
|
||||||
|
wbresponse_func,
|
||||||
|
range_h, use_206)
|
||||||
|
|
||||||
|
def handle_range(self, wbrequest, cdx, url, wbresponse_func,
|
||||||
|
range_h, use_206):
|
||||||
|
|
||||||
|
range_h = range_h.split('=')[-1]
|
||||||
key = cdx.get('digest')
|
key = cdx.get('digest')
|
||||||
if not key:
|
if not key:
|
||||||
hash_ = hashlib.md5()
|
hash_ = hashlib.md5()
|
||||||
hash_.update(cdx['urlkey'])
|
hash_.update(url)
|
||||||
hash_.update(cdx['timestamp'])
|
#hash_.update(cdx['timestamp'])
|
||||||
key = hash_.hexdigest()
|
key = hash_.hexdigest()
|
||||||
|
|
||||||
print('KEY: ', key)
|
print('KEY: ', key)
|
||||||
print('CACHE: ', str(self.cache))
|
print('RANGE: ', range_h)
|
||||||
|
|
||||||
if not key in self.cache:
|
if not key in self.cache:
|
||||||
print('MISS')
|
print('MISS')
|
||||||
@ -56,13 +99,13 @@ class RangeCache(object):
|
|||||||
|
|
||||||
range_h = range_h.rstrip()
|
range_h = range_h.rstrip()
|
||||||
|
|
||||||
if range_h == 'bytes=0-':
|
if range_h == '0-':
|
||||||
print('FIX RANGE')
|
print('FIX RANGE')
|
||||||
range_h = 'bytes=0-120000'
|
range_h = '0-120000'
|
||||||
|
|
||||||
parts = range_h.rstrip().split('-')
|
parts = range_h.rstrip().split('-')
|
||||||
start = parts[0]
|
start = parts[0]
|
||||||
start = start.split('=')[1]
|
#start = start.split('=')[1]
|
||||||
start = int(start)
|
start = int(start)
|
||||||
|
|
||||||
maxlen = filelen - start
|
maxlen = filelen - start
|
||||||
@ -82,14 +125,22 @@ class RangeCache(object):
|
|||||||
|
|
||||||
yield buf
|
yield buf
|
||||||
|
|
||||||
|
if use_206:
|
||||||
|
content_range = 'bytes {0}-{1}/{2}'.format(start,
|
||||||
|
start + maxlen - 1,
|
||||||
|
filelen)
|
||||||
|
print('CONTENT_RANGE: ', content_range)
|
||||||
|
|
||||||
content_range = 'bytes {0}-{1}/{2}'.format(start,
|
status_headers = StatusAndHeaders('206 Partial Content', spec['headers'])
|
||||||
start + maxlen - 1,
|
status_headers.replace_header('Content-Range', content_range)
|
||||||
filelen)
|
else:
|
||||||
|
status_headers = StatusAndHeaders('200 OK', spec['headers'])
|
||||||
|
|
||||||
|
status_headers.headers.append(('Accept-Ranges', 'bytes'))
|
||||||
|
status_headers.headers.append(('Access-Control-Allow-Credentials', 'true'))
|
||||||
|
status_headers.headers.append(('Access-Control-Allow-Origin', 'http://localhost:8080'))
|
||||||
|
status_headers.headers.append(('Timing-Allow-Origin', 'http://localhost:8080'))
|
||||||
|
|
||||||
print('CONTENT_RANGE: ', content_range)
|
|
||||||
status_headers = StatusAndHeaders('206 Partial Content', spec['headers'])
|
|
||||||
status_headers.replace_header('Content-Range', content_range)
|
|
||||||
status_headers.replace_header('Content-Length', str(maxlen))
|
status_headers.replace_header('Content-Length', str(maxlen))
|
||||||
return status_headers, read_range()
|
return status_headers, read_range()
|
||||||
|
|
||||||
|
@ -201,6 +201,9 @@ class ReplayView(object):
|
|||||||
if wbrequest.options['is_proxy']:
|
if wbrequest.options['is_proxy']:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
if range_cache and range_cache.match_yt(wbrequest.wb_url.url):
|
||||||
|
return None
|
||||||
|
|
||||||
redir_needed = (wbrequest.options.get('is_timegate', False))
|
redir_needed = (wbrequest.options.get('is_timegate', False))
|
||||||
|
|
||||||
if not redir_needed and self.redir_to_exact:
|
if not redir_needed and self.redir_to_exact:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user