mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
refactor: split out optional cached replay components into cached_replay,
toggleable via 'enable_cache' in config -- regular replayview does not need any cache info move add_range() components to statusandheaders from wbrequestresponse add x-pywb-noredirect' header which disables date related redirect video replay works w/o cache if supported by frontend (nginx)
This commit is contained in:
parent
b86517b246
commit
0f2c96879c
@ -100,6 +100,9 @@ class WbRequest(object):
|
|||||||
# PERF
|
# PERF
|
||||||
env['X_PERF'] = {}
|
env['X_PERF'] = {}
|
||||||
|
|
||||||
|
if env.get('HTTP_X_PYWB_NOREDIRECT'):
|
||||||
|
self.custom_params['noredir'] = True
|
||||||
|
|
||||||
self._parse_extra()
|
self._parse_extra()
|
||||||
|
|
||||||
def _is_ajax(self):
|
def _is_ajax(self):
|
||||||
@ -145,7 +148,8 @@ class WbRequest(object):
|
|||||||
else:
|
else:
|
||||||
end = ''
|
end = ''
|
||||||
|
|
||||||
return (url, start, end, use_206)
|
result = (url, start, end, use_206)
|
||||||
|
return result
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
varlist = vars(self)
|
varlist = vars(self)
|
||||||
@ -225,16 +229,6 @@ class WbResponse(object):
|
|||||||
|
|
||||||
return WbResponse(StatusAndHeaders(status, redir_headers))
|
return WbResponse(StatusAndHeaders(status, redir_headers))
|
||||||
|
|
||||||
def add_range(self, start, part_len, total_len):
|
|
||||||
content_range = 'bytes {0}-{1}/{2}'.format(start,
|
|
||||||
start + part_len - 1,
|
|
||||||
total_len)
|
|
||||||
|
|
||||||
self.status_headers.statusline = '206 Partial Content'
|
|
||||||
self.status_headers.replace_header('Content-Range', content_range)
|
|
||||||
self.status_headers.replace_header('Accept-Ranges', 'bytes')
|
|
||||||
return self
|
|
||||||
|
|
||||||
def __call__(self, env, start_response):
|
def __call__(self, env, start_response):
|
||||||
start_response(self.status_headers.statusline,
|
start_response(self.status_headers.statusline,
|
||||||
self.status_headers.headers)
|
self.status_headers.headers)
|
||||||
@ -246,5 +240,9 @@ class WbResponse(object):
|
|||||||
|
|
||||||
return self.body
|
return self.body
|
||||||
|
|
||||||
|
def add_range(self, *args):
|
||||||
|
self.status_headers.add_range(*args)
|
||||||
|
return self
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return str(vars(self))
|
return str(vars(self))
|
||||||
|
@ -97,6 +97,19 @@ class StatusAndHeaders(object):
|
|||||||
self.statusline = valid_statusline
|
self.statusline = valid_statusline
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def add_range(self, start, part_len, total_len):
|
||||||
|
"""
|
||||||
|
Add range headers indicating that this a partial response
|
||||||
|
"""
|
||||||
|
content_range = 'bytes {0}-{1}/{2}'.format(start,
|
||||||
|
start + part_len - 1,
|
||||||
|
total_len)
|
||||||
|
|
||||||
|
self.statusline = '206 Partial Content'
|
||||||
|
self.replace_header('Content-Range', content_range)
|
||||||
|
self.replace_header('Accept-Ranges', 'bytes')
|
||||||
|
return self
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
headers_str = pprint.pformat(self.headers, indent=2)
|
headers_str = pprint.pformat(self.headers, indent=2)
|
||||||
return "StatusAndHeaders(protocol = '{0}', statusline = '{1}', \
|
return "StatusAndHeaders(protocol = '{0}', statusline = '{1}', \
|
||||||
|
@ -5,6 +5,12 @@ StatusAndHeaders(protocol = 'HTTP/1.0', statusline = '200 OK', headers = [ ('Con
|
|||||||
('Some', 'Value'),
|
('Some', 'Value'),
|
||||||
('Multi-Line', 'Value1 Also This')])
|
('Multi-Line', 'Value1 Also This')])
|
||||||
|
|
||||||
|
# add range
|
||||||
|
>>> StatusAndHeaders(statusline = '200 OK', headers=[('Content-Type', 'text/plain')]).add_range(10, 4, 100)
|
||||||
|
StatusAndHeaders(protocol = '', statusline = '206 Partial Content', headers = [ ('Content-Type', 'text/plain'),
|
||||||
|
('Content-Range', 'bytes 10-13/100'),
|
||||||
|
('Accept-Ranges', 'bytes')])
|
||||||
|
|
||||||
>>> StatusAndHeadersParser(['Other']).parse(BytesIO(status_headers_1))
|
>>> StatusAndHeadersParser(['Other']).parse(BytesIO(status_headers_1))
|
||||||
Traceback (most recent call last):
|
Traceback (most recent call last):
|
||||||
StatusAndHeadersParserException: Expected Status Line starting with ['Other'] - Found: HTTP/1.0 200 OK
|
StatusAndHeadersParserException: Expected Status Line starting with ['Other'] - Found: HTTP/1.0 200 OK
|
||||||
@ -36,10 +42,12 @@ StatusAndHeaders(protocol = '', statusline = '204 No Content', headers = [])
|
|||||||
|
|
||||||
>>> StatusAndHeadersParser(['HTTP/1.0']).parse(BytesIO(status_headers_3))
|
>>> StatusAndHeadersParser(['HTTP/1.0']).parse(BytesIO(status_headers_3))
|
||||||
StatusAndHeaders(protocol = 'HTTP/1.0', statusline = '204 Empty', headers = [('Content-Type', 'Value'), ('Content-Length', '0')])
|
StatusAndHeaders(protocol = 'HTTP/1.0', statusline = '204 Empty', headers = [('Content-Type', 'Value'), ('Content-Length', '0')])
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
from pywb.utils.statusandheaders import StatusAndHeadersParser
|
from pywb.utils.statusandheaders import StatusAndHeadersParser, StatusAndHeaders
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
|
||||||
|
|
||||||
|
34
pywb/webapp/cached_replay.py
Normal file
34
pywb/webapp/cached_replay.py
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
from rangecache import range_cache
|
||||||
|
from replay_views import ReplayView
|
||||||
|
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
class CachedReplayView(ReplayView):
|
||||||
|
"""
|
||||||
|
Extension for ReplayView supporting loading via the rangecache
|
||||||
|
"""
|
||||||
|
def replay_capture(self, wbrequest, cdx, cdx_loader, failed_files):
|
||||||
|
def get_capture():
|
||||||
|
return super(CachedReplayView, self).replay_capture(
|
||||||
|
wbrequest,
|
||||||
|
cdx,
|
||||||
|
cdx_loader,
|
||||||
|
failed_files)
|
||||||
|
|
||||||
|
range_status, range_iter = range_cache(wbrequest,
|
||||||
|
cdx.get('digest'),
|
||||||
|
get_capture)
|
||||||
|
if range_status and range_iter:
|
||||||
|
response = self.response_class(range_status,
|
||||||
|
range_iter,
|
||||||
|
wbrequest=wbrequest,
|
||||||
|
cdx=cdx)
|
||||||
|
return response
|
||||||
|
|
||||||
|
return get_capture()
|
||||||
|
|
||||||
|
def _redirect_if_needed(self, wbrequest, cdx):
|
||||||
|
if wbrequest.extract_range():
|
||||||
|
return None
|
||||||
|
|
||||||
|
return super(CachedReplayView, self)._redirect_if_needed(wbrequest, cdx)
|
@ -16,6 +16,7 @@ from pywb.warc.resolvingloader import ResolvingLoader
|
|||||||
|
|
||||||
from views import J2TemplateView
|
from views import J2TemplateView
|
||||||
from replay_views import ReplayView
|
from replay_views import ReplayView
|
||||||
|
from cached_replay import CachedReplayView
|
||||||
from pywb.framework.memento import MementoResponse
|
from pywb.framework.memento import MementoResponse
|
||||||
from pywb.utils.timeutils import datetime_to_timestamp
|
from pywb.utils.timeutils import datetime_to_timestamp
|
||||||
|
|
||||||
@ -119,7 +120,11 @@ class WBHandler(SearchPageWbUrlHandler):
|
|||||||
resolving_loader = ResolvingLoader(paths=paths,
|
resolving_loader = ResolvingLoader(paths=paths,
|
||||||
record_loader=record_loader)
|
record_loader=record_loader)
|
||||||
|
|
||||||
self.replay = ReplayView(resolving_loader, config)
|
enable_cache = config.get('enable_cache')
|
||||||
|
if enable_cache:
|
||||||
|
self.replay = CachedReplayView(resolving_loader, config)
|
||||||
|
else:
|
||||||
|
self.replay = ReplayView(resolving_loader, config)
|
||||||
|
|
||||||
self.fallback_handler = None
|
self.fallback_handler = None
|
||||||
self.fallback_name = config.get('fallback')
|
self.fallback_name = config.get('fallback')
|
||||||
|
@ -112,7 +112,7 @@ class RewriteHandler(SearchPageWbUrlHandler):
|
|||||||
content_length = wbresponse.status_headers.get_header('Content-Length')
|
content_length = wbresponse.status_headers.get_header('Content-Length')
|
||||||
try:
|
try:
|
||||||
content_length = int(content_length)
|
content_length = int(content_length)
|
||||||
wbresponse.add_range(0, content_length, content_length)
|
wbresponse.status_headers.add_range(0, content_length, content_length)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -12,23 +12,6 @@ import atexit
|
|||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class RangeCache(object):
|
class RangeCache(object):
|
||||||
@staticmethod
|
|
||||||
def match_yt(url):
|
|
||||||
if not RangeCache.YOUTUBE_RX.match(url):
|
|
||||||
return None
|
|
||||||
|
|
||||||
range_h_res = []
|
|
||||||
|
|
||||||
def repl_range(matcher):
|
|
||||||
range_h_res.append(matcher.group(1))
|
|
||||||
return ''
|
|
||||||
|
|
||||||
new_url = RangeCache.YT_EXTRACT_RX.sub(repl_range, url)
|
|
||||||
if range_h_res:
|
|
||||||
return range_h_res[0], new_url
|
|
||||||
else:
|
|
||||||
return None, url
|
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.cache = create_cache()
|
self.cache = create_cache()
|
||||||
self.temp_dir = None
|
self.temp_dir = None
|
||||||
@ -107,13 +90,7 @@ class RangeCache(object):
|
|||||||
yield buf
|
yield buf
|
||||||
|
|
||||||
if use_206:
|
if use_206:
|
||||||
content_range = 'bytes {0}-{1}/{2}'.format(start,
|
WbResponse.add_range_status_h(status_headers)
|
||||||
start + maxlen - 1,
|
|
||||||
filelen)
|
|
||||||
|
|
||||||
status_headers = StatusAndHeaders('206 Partial Content', spec['headers'])
|
|
||||||
status_headers.replace_header('Content-Range', content_range)
|
|
||||||
status_headers.replace_header('Accept-Ranges', 'bytes')
|
|
||||||
else:
|
else:
|
||||||
status_headers = StatusAndHeaders('200 OK', spec['headers'])
|
status_headers = StatusAndHeaders('200 OK', spec['headers'])
|
||||||
|
|
||||||
|
@ -15,8 +15,6 @@ from pywb.warc.recordloader import ArchiveLoadFailed
|
|||||||
from views import J2TemplateView, add_env_globals
|
from views import J2TemplateView, add_env_globals
|
||||||
from views import J2HtmlCapturesView, HeadInsertView
|
from views import J2HtmlCapturesView, HeadInsertView
|
||||||
|
|
||||||
from rangecache import range_cache
|
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class CaptureException(WbException):
|
class CaptureException(WbException):
|
||||||
@ -79,7 +77,7 @@ class ReplayView(object):
|
|||||||
|
|
||||||
first = False
|
first = False
|
||||||
|
|
||||||
response = self.cached_replay_capture(wbrequest,
|
response = self.replay_capture(wbrequest,
|
||||||
cdx,
|
cdx,
|
||||||
cdx_loader,
|
cdx_loader,
|
||||||
failed_files)
|
failed_files)
|
||||||
@ -101,23 +99,6 @@ class ReplayView(object):
|
|||||||
|
|
||||||
raise last_e
|
raise last_e
|
||||||
|
|
||||||
|
|
||||||
def cached_replay_capture(self, wbrequest, cdx, cdx_loader, failed_files):
|
|
||||||
def get_capture():
|
|
||||||
return self.replay_capture(wbrequest, cdx, cdx_loader, failed_files)
|
|
||||||
|
|
||||||
range_status, range_iter = range_cache(wbrequest,
|
|
||||||
cdx.get('digest'),
|
|
||||||
get_capture)
|
|
||||||
if range_status and range_iter:
|
|
||||||
response = self.response_class(range_status,
|
|
||||||
range_iter,
|
|
||||||
wbrequest=wbrequest,
|
|
||||||
cdx=cdx)
|
|
||||||
return response
|
|
||||||
|
|
||||||
return get_capture()
|
|
||||||
|
|
||||||
def replay_capture(self, wbrequest, cdx, cdx_loader, failed_files):
|
def replay_capture(self, wbrequest, cdx, cdx_loader, failed_files):
|
||||||
(status_headers, stream) = (self.content_loader.
|
(status_headers, stream) = (self.content_loader.
|
||||||
resolve_headers_and_payload(cdx,
|
resolve_headers_and_payload(cdx,
|
||||||
@ -201,10 +182,6 @@ class ReplayView(object):
|
|||||||
if wbrequest.options['is_proxy']:
|
if wbrequest.options['is_proxy']:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if range_cache:
|
|
||||||
if range_cache.match_yt(wbrequest.wb_url.url) or wbrequest.env.get('HTTP_RANGE'):
|
|
||||||
return None
|
|
||||||
|
|
||||||
if wbrequest.custom_params.get('noredir'):
|
if wbrequest.custom_params.get('noredir'):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user