1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 08:04:49 +01:00

video: work on domain-specific range cache rewrites

This commit is contained in:
Ilya Kreymer 2014-11-04 08:44:45 -08:00
parent 703ec0eb5e
commit 72aa921ce5
4 changed files with 93 additions and 34 deletions

View File

@ -129,7 +129,7 @@ rules:
- url_prefix: 'com,googlevideo,'
fuzzy_lookup: 'com,googlevideo.*/videoplayback?.*(id=[^&]+).*(range=[^&]+)'
fuzzy_lookup: 'com,googlevideo.*/videoplayback?.*(id=[^&]).*(mime=[^&]).*(signature=[^&])'
# testing rules -- not for valid domain

View File

@ -70,32 +70,37 @@ class RewriteHandler(SearchPageWbUrlHandler):
if ref_wburl_str:
wbrequest.env['REL_REFERER'] = WbUrl(ref_wburl_str).url
result = self.rewriter.fetch_request(wbrequest.wb_url.url,
wbrequest.urlrewriter,
head_insert_func=head_insert_func,
req_headers=req_headers,
env=wbrequest.env)
def do_req():
result = self.rewriter.fetch_request(wbrequest.wb_url.url,
wbrequest.urlrewriter,
head_insert_func=head_insert_func,
req_headers=req_headers,
env=wbrequest.env)
return self._make_response(wbrequest, *result)
cdx = dict(url=wbrequest.wb_url.url)
range_status, range_iter = range_cache(wbrequest, cdx, do_req)
if not range_status or not range_iter:
return do_req()
else:
result = range_status, range_iter, False
return self._make_response(wbrequest, *result)
return self._make_response(wbrequest, *result)
def _make_response(self, wbrequest, status_headers, gen, is_rewritten):
# if cookie set, pass recorded timestamp info via cookie
# so that client side may be able to access it
# used by framed mode to update frame banner
if self.live_cookie:
cdx = wbrequest.env['pywb.cdx']
value = self.live_cookie.format(cdx['timestamp'])
status_headers.headers.append(('Set-Cookie', value))
def resp_func():
return WbResponse(status_headers, gen)
#range_status, range_iter = range_cache(wbrequest, cdx, resp_func)
#if range_status and range_iter:
# return WbResponse(range_status, range_iter)
#else:
return resp_func()
cdx = wbrequest.env.get('pywb.cdx')
if cdx:
value = self.live_cookie.format(cdx['timestamp'])
status_headers.headers.append(('Set-Cookie', value))
return WbResponse(status_headers, gen)
def get_video_info(self, wbrequest):
if not self.youtubedl:

View File

@ -7,28 +7,71 @@ from tempfile import NamedTemporaryFile
import hashlib
import yaml
import os
import re
#=================================================================
class RangeCache(object):
YOUTUBE_RX = re.compile('.*.googlevideo.com/videoplayback')
YT_EXTRACT_RX = re.compile('&range=([^&]+)')
@staticmethod
def match_yt(url):
if not RangeCache.YOUTUBE_RX.match(url):
return None
range_h_res = []
def repl_range(matcher):
range_h_res.append(matcher.group(1))
return ''
new_url = RangeCache.YT_EXTRACT_RX.sub(repl_range, url)
if range_h_res:
print('MATCHED')
return range_h_res[0], new_url
else:
return None, url
def __init__(self):
self.cache = create_cache()
print(type(self.cache))
def __call__(self, wbrequest, cdx, wbresponse_func):
range_h = wbrequest.env.get('HTTP_RANGE')
if not range_h:
return None, None
url = wbrequest.wb_url.url
range_h = None
use_206 = False
result = self.match_yt(url)
if result:
range_h, url = result
wbrequest.wb_url.url = url
print(range_h)
# check for standard range header
if not range_h:
range_h = wbrequest.env.get('HTTP_RANGE')
if not range_h:
return None, None
range_h = True
return self.handle_range(wbrequest, cdx, url,
wbresponse_func,
range_h, use_206)
def handle_range(self, wbrequest, cdx, url, wbresponse_func,
range_h, use_206):
range_h = range_h.split('=')[-1]
key = cdx.get('digest')
if not key:
hash_ = hashlib.md5()
hash_.update(cdx['urlkey'])
hash_.update(cdx['timestamp'])
hash_.update(url)
#hash_.update(cdx['timestamp'])
key = hash_.hexdigest()
print('KEY: ', key)
print('CACHE: ', str(self.cache))
print('RANGE: ', range_h)
if not key in self.cache:
print('MISS')
@ -56,13 +99,13 @@ class RangeCache(object):
range_h = range_h.rstrip()
if range_h == 'bytes=0-':
if range_h == '0-':
print('FIX RANGE')
range_h = 'bytes=0-120000'
range_h = '0-120000'
parts = range_h.rstrip().split('-')
start = parts[0]
start = start.split('=')[1]
#start = start.split('=')[1]
start = int(start)
maxlen = filelen - start
@ -82,14 +125,22 @@ class RangeCache(object):
yield buf
if use_206:
content_range = 'bytes {0}-{1}/{2}'.format(start,
start + maxlen - 1,
filelen)
print('CONTENT_RANGE: ', content_range)
content_range = 'bytes {0}-{1}/{2}'.format(start,
start + maxlen - 1,
filelen)
status_headers = StatusAndHeaders('206 Partial Content', spec['headers'])
status_headers.replace_header('Content-Range', content_range)
else:
status_headers = StatusAndHeaders('200 OK', spec['headers'])
status_headers.headers.append(('Accept-Ranges', 'bytes'))
status_headers.headers.append(('Access-Control-Allow-Credentials', 'true'))
status_headers.headers.append(('Access-Control-Allow-Origin', 'http://localhost:8080'))
status_headers.headers.append(('Timing-Allow-Origin', 'http://localhost:8080'))
print('CONTENT_RANGE: ', content_range)
status_headers = StatusAndHeaders('206 Partial Content', spec['headers'])
status_headers.replace_header('Content-Range', content_range)
status_headers.replace_header('Content-Length', str(maxlen))
return status_headers, read_range()

View File

@ -201,6 +201,9 @@ class ReplayView(object):
if wbrequest.options['is_proxy']:
return None
if range_cache and range_cache.match_yt(wbrequest.wb_url.url):
return None
redir_needed = (wbrequest.options.get('is_timegate', False))
if not redir_needed and self.redir_to_exact: