mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 08:04:49 +01:00
refactor: split out optional cached replay components into cached_replay,
toggleable via 'enable_cache' in config -- regular replayview does not need any cache info move add_range() components to statusandheaders from wbrequestresponse add x-pywb-noredirect' header which disables date related redirect video replay works w/o cache if supported by frontend (nginx)
This commit is contained in:
parent
b86517b246
commit
0f2c96879c
@ -100,6 +100,9 @@ class WbRequest(object):
|
||||
# PERF
|
||||
env['X_PERF'] = {}
|
||||
|
||||
if env.get('HTTP_X_PYWB_NOREDIRECT'):
|
||||
self.custom_params['noredir'] = True
|
||||
|
||||
self._parse_extra()
|
||||
|
||||
def _is_ajax(self):
|
||||
@ -145,7 +148,8 @@ class WbRequest(object):
|
||||
else:
|
||||
end = ''
|
||||
|
||||
return (url, start, end, use_206)
|
||||
result = (url, start, end, use_206)
|
||||
return result
|
||||
|
||||
def __repr__(self):
|
||||
varlist = vars(self)
|
||||
@ -225,16 +229,6 @@ class WbResponse(object):
|
||||
|
||||
return WbResponse(StatusAndHeaders(status, redir_headers))
|
||||
|
||||
def add_range(self, start, part_len, total_len):
|
||||
content_range = 'bytes {0}-{1}/{2}'.format(start,
|
||||
start + part_len - 1,
|
||||
total_len)
|
||||
|
||||
self.status_headers.statusline = '206 Partial Content'
|
||||
self.status_headers.replace_header('Content-Range', content_range)
|
||||
self.status_headers.replace_header('Accept-Ranges', 'bytes')
|
||||
return self
|
||||
|
||||
def __call__(self, env, start_response):
|
||||
start_response(self.status_headers.statusline,
|
||||
self.status_headers.headers)
|
||||
@ -246,5 +240,9 @@ class WbResponse(object):
|
||||
|
||||
return self.body
|
||||
|
||||
def add_range(self, *args):
|
||||
self.status_headers.add_range(*args)
|
||||
return self
|
||||
|
||||
def __repr__(self):
|
||||
return str(vars(self))
|
||||
|
@ -97,6 +97,19 @@ class StatusAndHeaders(object):
|
||||
self.statusline = valid_statusline
|
||||
return False
|
||||
|
||||
def add_range(self, start, part_len, total_len):
|
||||
"""
|
||||
Add range headers indicating that this a partial response
|
||||
"""
|
||||
content_range = 'bytes {0}-{1}/{2}'.format(start,
|
||||
start + part_len - 1,
|
||||
total_len)
|
||||
|
||||
self.statusline = '206 Partial Content'
|
||||
self.replace_header('Content-Range', content_range)
|
||||
self.replace_header('Accept-Ranges', 'bytes')
|
||||
return self
|
||||
|
||||
def __repr__(self):
|
||||
headers_str = pprint.pformat(self.headers, indent=2)
|
||||
return "StatusAndHeaders(protocol = '{0}', statusline = '{1}', \
|
||||
|
@ -5,6 +5,12 @@ StatusAndHeaders(protocol = 'HTTP/1.0', statusline = '200 OK', headers = [ ('Con
|
||||
('Some', 'Value'),
|
||||
('Multi-Line', 'Value1 Also This')])
|
||||
|
||||
# add range
|
||||
>>> StatusAndHeaders(statusline = '200 OK', headers=[('Content-Type', 'text/plain')]).add_range(10, 4, 100)
|
||||
StatusAndHeaders(protocol = '', statusline = '206 Partial Content', headers = [ ('Content-Type', 'text/plain'),
|
||||
('Content-Range', 'bytes 10-13/100'),
|
||||
('Accept-Ranges', 'bytes')])
|
||||
|
||||
>>> StatusAndHeadersParser(['Other']).parse(BytesIO(status_headers_1))
|
||||
Traceback (most recent call last):
|
||||
StatusAndHeadersParserException: Expected Status Line starting with ['Other'] - Found: HTTP/1.0 200 OK
|
||||
@ -36,10 +42,12 @@ StatusAndHeaders(protocol = '', statusline = '204 No Content', headers = [])
|
||||
|
||||
>>> StatusAndHeadersParser(['HTTP/1.0']).parse(BytesIO(status_headers_3))
|
||||
StatusAndHeaders(protocol = 'HTTP/1.0', statusline = '204 Empty', headers = [('Content-Type', 'Value'), ('Content-Length', '0')])
|
||||
|
||||
|
||||
"""
|
||||
|
||||
|
||||
from pywb.utils.statusandheaders import StatusAndHeadersParser
|
||||
from pywb.utils.statusandheaders import StatusAndHeadersParser, StatusAndHeaders
|
||||
from io import BytesIO
|
||||
|
||||
|
||||
|
34
pywb/webapp/cached_replay.py
Normal file
34
pywb/webapp/cached_replay.py
Normal file
@ -0,0 +1,34 @@
|
||||
from rangecache import range_cache
|
||||
from replay_views import ReplayView
|
||||
|
||||
|
||||
#=================================================================
|
||||
class CachedReplayView(ReplayView):
|
||||
"""
|
||||
Extension for ReplayView supporting loading via the rangecache
|
||||
"""
|
||||
def replay_capture(self, wbrequest, cdx, cdx_loader, failed_files):
|
||||
def get_capture():
|
||||
return super(CachedReplayView, self).replay_capture(
|
||||
wbrequest,
|
||||
cdx,
|
||||
cdx_loader,
|
||||
failed_files)
|
||||
|
||||
range_status, range_iter = range_cache(wbrequest,
|
||||
cdx.get('digest'),
|
||||
get_capture)
|
||||
if range_status and range_iter:
|
||||
response = self.response_class(range_status,
|
||||
range_iter,
|
||||
wbrequest=wbrequest,
|
||||
cdx=cdx)
|
||||
return response
|
||||
|
||||
return get_capture()
|
||||
|
||||
def _redirect_if_needed(self, wbrequest, cdx):
|
||||
if wbrequest.extract_range():
|
||||
return None
|
||||
|
||||
return super(CachedReplayView, self)._redirect_if_needed(wbrequest, cdx)
|
@ -16,6 +16,7 @@ from pywb.warc.resolvingloader import ResolvingLoader
|
||||
|
||||
from views import J2TemplateView
|
||||
from replay_views import ReplayView
|
||||
from cached_replay import CachedReplayView
|
||||
from pywb.framework.memento import MementoResponse
|
||||
from pywb.utils.timeutils import datetime_to_timestamp
|
||||
|
||||
@ -119,7 +120,11 @@ class WBHandler(SearchPageWbUrlHandler):
|
||||
resolving_loader = ResolvingLoader(paths=paths,
|
||||
record_loader=record_loader)
|
||||
|
||||
self.replay = ReplayView(resolving_loader, config)
|
||||
enable_cache = config.get('enable_cache')
|
||||
if enable_cache:
|
||||
self.replay = CachedReplayView(resolving_loader, config)
|
||||
else:
|
||||
self.replay = ReplayView(resolving_loader, config)
|
||||
|
||||
self.fallback_handler = None
|
||||
self.fallback_name = config.get('fallback')
|
||||
|
@ -112,7 +112,7 @@ class RewriteHandler(SearchPageWbUrlHandler):
|
||||
content_length = wbresponse.status_headers.get_header('Content-Length')
|
||||
try:
|
||||
content_length = int(content_length)
|
||||
wbresponse.add_range(0, content_length, content_length)
|
||||
wbresponse.status_headers.add_range(0, content_length, content_length)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
|
@ -12,23 +12,6 @@ import atexit
|
||||
|
||||
#=================================================================
|
||||
class RangeCache(object):
|
||||
@staticmethod
|
||||
def match_yt(url):
|
||||
if not RangeCache.YOUTUBE_RX.match(url):
|
||||
return None
|
||||
|
||||
range_h_res = []
|
||||
|
||||
def repl_range(matcher):
|
||||
range_h_res.append(matcher.group(1))
|
||||
return ''
|
||||
|
||||
new_url = RangeCache.YT_EXTRACT_RX.sub(repl_range, url)
|
||||
if range_h_res:
|
||||
return range_h_res[0], new_url
|
||||
else:
|
||||
return None, url
|
||||
|
||||
def __init__(self):
|
||||
self.cache = create_cache()
|
||||
self.temp_dir = None
|
||||
@ -107,13 +90,7 @@ class RangeCache(object):
|
||||
yield buf
|
||||
|
||||
if use_206:
|
||||
content_range = 'bytes {0}-{1}/{2}'.format(start,
|
||||
start + maxlen - 1,
|
||||
filelen)
|
||||
|
||||
status_headers = StatusAndHeaders('206 Partial Content', spec['headers'])
|
||||
status_headers.replace_header('Content-Range', content_range)
|
||||
status_headers.replace_header('Accept-Ranges', 'bytes')
|
||||
WbResponse.add_range_status_h(status_headers)
|
||||
else:
|
||||
status_headers = StatusAndHeaders('200 OK', spec['headers'])
|
||||
|
||||
|
@ -15,8 +15,6 @@ from pywb.warc.recordloader import ArchiveLoadFailed
|
||||
from views import J2TemplateView, add_env_globals
|
||||
from views import J2HtmlCapturesView, HeadInsertView
|
||||
|
||||
from rangecache import range_cache
|
||||
|
||||
|
||||
#=================================================================
|
||||
class CaptureException(WbException):
|
||||
@ -79,7 +77,7 @@ class ReplayView(object):
|
||||
|
||||
first = False
|
||||
|
||||
response = self.cached_replay_capture(wbrequest,
|
||||
response = self.replay_capture(wbrequest,
|
||||
cdx,
|
||||
cdx_loader,
|
||||
failed_files)
|
||||
@ -101,23 +99,6 @@ class ReplayView(object):
|
||||
|
||||
raise last_e
|
||||
|
||||
|
||||
def cached_replay_capture(self, wbrequest, cdx, cdx_loader, failed_files):
|
||||
def get_capture():
|
||||
return self.replay_capture(wbrequest, cdx, cdx_loader, failed_files)
|
||||
|
||||
range_status, range_iter = range_cache(wbrequest,
|
||||
cdx.get('digest'),
|
||||
get_capture)
|
||||
if range_status and range_iter:
|
||||
response = self.response_class(range_status,
|
||||
range_iter,
|
||||
wbrequest=wbrequest,
|
||||
cdx=cdx)
|
||||
return response
|
||||
|
||||
return get_capture()
|
||||
|
||||
def replay_capture(self, wbrequest, cdx, cdx_loader, failed_files):
|
||||
(status_headers, stream) = (self.content_loader.
|
||||
resolve_headers_and_payload(cdx,
|
||||
@ -201,10 +182,6 @@ class ReplayView(object):
|
||||
if wbrequest.options['is_proxy']:
|
||||
return None
|
||||
|
||||
if range_cache:
|
||||
if range_cache.match_yt(wbrequest.wb_url.url) or wbrequest.env.get('HTTP_RANGE'):
|
||||
return None
|
||||
|
||||
if wbrequest.custom_params.get('noredir'):
|
||||
return None
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user