1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 08:04:49 +01:00

refactor: split out optional cached replay components into cached_replay,

toggleable via 'enable_cache' in config -- regular replayview does not
need any cache info
move add_range() components to statusandheaders from wbrequestresponse
add x-pywb-noredirect' header which disables date related redirect
video replay works w/o cache if supported by frontend (nginx)
This commit is contained in:
Ilya Kreymer 2014-12-19 18:40:45 -08:00
parent b86517b246
commit 0f2c96879c
8 changed files with 74 additions and 62 deletions

View File

@ -100,6 +100,9 @@ class WbRequest(object):
# PERF
env['X_PERF'] = {}
if env.get('HTTP_X_PYWB_NOREDIRECT'):
self.custom_params['noredir'] = True
self._parse_extra()
def _is_ajax(self):
@ -145,7 +148,8 @@ class WbRequest(object):
else:
end = ''
return (url, start, end, use_206)
result = (url, start, end, use_206)
return result
def __repr__(self):
varlist = vars(self)
@ -225,16 +229,6 @@ class WbResponse(object):
return WbResponse(StatusAndHeaders(status, redir_headers))
def add_range(self, start, part_len, total_len):
content_range = 'bytes {0}-{1}/{2}'.format(start,
start + part_len - 1,
total_len)
self.status_headers.statusline = '206 Partial Content'
self.status_headers.replace_header('Content-Range', content_range)
self.status_headers.replace_header('Accept-Ranges', 'bytes')
return self
def __call__(self, env, start_response):
start_response(self.status_headers.statusline,
self.status_headers.headers)
@ -246,5 +240,9 @@ class WbResponse(object):
return self.body
def add_range(self, *args):
self.status_headers.add_range(*args)
return self
def __repr__(self):
return str(vars(self))

View File

@ -97,6 +97,19 @@ class StatusAndHeaders(object):
self.statusline = valid_statusline
return False
def add_range(self, start, part_len, total_len):
"""
Add range headers indicating that this a partial response
"""
content_range = 'bytes {0}-{1}/{2}'.format(start,
start + part_len - 1,
total_len)
self.statusline = '206 Partial Content'
self.replace_header('Content-Range', content_range)
self.replace_header('Accept-Ranges', 'bytes')
return self
def __repr__(self):
headers_str = pprint.pformat(self.headers, indent=2)
return "StatusAndHeaders(protocol = '{0}', statusline = '{1}', \

View File

@ -5,6 +5,12 @@ StatusAndHeaders(protocol = 'HTTP/1.0', statusline = '200 OK', headers = [ ('Con
('Some', 'Value'),
('Multi-Line', 'Value1 Also This')])
# add range
>>> StatusAndHeaders(statusline = '200 OK', headers=[('Content-Type', 'text/plain')]).add_range(10, 4, 100)
StatusAndHeaders(protocol = '', statusline = '206 Partial Content', headers = [ ('Content-Type', 'text/plain'),
('Content-Range', 'bytes 10-13/100'),
('Accept-Ranges', 'bytes')])
>>> StatusAndHeadersParser(['Other']).parse(BytesIO(status_headers_1))
Traceback (most recent call last):
StatusAndHeadersParserException: Expected Status Line starting with ['Other'] - Found: HTTP/1.0 200 OK
@ -36,10 +42,12 @@ StatusAndHeaders(protocol = '', statusline = '204 No Content', headers = [])
>>> StatusAndHeadersParser(['HTTP/1.0']).parse(BytesIO(status_headers_3))
StatusAndHeaders(protocol = 'HTTP/1.0', statusline = '204 Empty', headers = [('Content-Type', 'Value'), ('Content-Length', '0')])
"""
from pywb.utils.statusandheaders import StatusAndHeadersParser
from pywb.utils.statusandheaders import StatusAndHeadersParser, StatusAndHeaders
from io import BytesIO

View File

@ -0,0 +1,34 @@
from rangecache import range_cache
from replay_views import ReplayView
#=================================================================
class CachedReplayView(ReplayView):
"""
Extension for ReplayView supporting loading via the rangecache
"""
def replay_capture(self, wbrequest, cdx, cdx_loader, failed_files):
def get_capture():
return super(CachedReplayView, self).replay_capture(
wbrequest,
cdx,
cdx_loader,
failed_files)
range_status, range_iter = range_cache(wbrequest,
cdx.get('digest'),
get_capture)
if range_status and range_iter:
response = self.response_class(range_status,
range_iter,
wbrequest=wbrequest,
cdx=cdx)
return response
return get_capture()
def _redirect_if_needed(self, wbrequest, cdx):
if wbrequest.extract_range():
return None
return super(CachedReplayView, self)._redirect_if_needed(wbrequest, cdx)

View File

@ -16,6 +16,7 @@ from pywb.warc.resolvingloader import ResolvingLoader
from views import J2TemplateView
from replay_views import ReplayView
from cached_replay import CachedReplayView
from pywb.framework.memento import MementoResponse
from pywb.utils.timeutils import datetime_to_timestamp
@ -119,7 +120,11 @@ class WBHandler(SearchPageWbUrlHandler):
resolving_loader = ResolvingLoader(paths=paths,
record_loader=record_loader)
self.replay = ReplayView(resolving_loader, config)
enable_cache = config.get('enable_cache')
if enable_cache:
self.replay = CachedReplayView(resolving_loader, config)
else:
self.replay = ReplayView(resolving_loader, config)
self.fallback_handler = None
self.fallback_name = config.get('fallback')

View File

@ -112,7 +112,7 @@ class RewriteHandler(SearchPageWbUrlHandler):
content_length = wbresponse.status_headers.get_header('Content-Length')
try:
content_length = int(content_length)
wbresponse.add_range(0, content_length, content_length)
wbresponse.status_headers.add_range(0, content_length, content_length)
except ValueError:
pass

View File

@ -12,23 +12,6 @@ import atexit
#=================================================================
class RangeCache(object):
@staticmethod
def match_yt(url):
if not RangeCache.YOUTUBE_RX.match(url):
return None
range_h_res = []
def repl_range(matcher):
range_h_res.append(matcher.group(1))
return ''
new_url = RangeCache.YT_EXTRACT_RX.sub(repl_range, url)
if range_h_res:
return range_h_res[0], new_url
else:
return None, url
def __init__(self):
self.cache = create_cache()
self.temp_dir = None
@ -107,13 +90,7 @@ class RangeCache(object):
yield buf
if use_206:
content_range = 'bytes {0}-{1}/{2}'.format(start,
start + maxlen - 1,
filelen)
status_headers = StatusAndHeaders('206 Partial Content', spec['headers'])
status_headers.replace_header('Content-Range', content_range)
status_headers.replace_header('Accept-Ranges', 'bytes')
WbResponse.add_range_status_h(status_headers)
else:
status_headers = StatusAndHeaders('200 OK', spec['headers'])

View File

@ -15,8 +15,6 @@ from pywb.warc.recordloader import ArchiveLoadFailed
from views import J2TemplateView, add_env_globals
from views import J2HtmlCapturesView, HeadInsertView
from rangecache import range_cache
#=================================================================
class CaptureException(WbException):
@ -79,7 +77,7 @@ class ReplayView(object):
first = False
response = self.cached_replay_capture(wbrequest,
response = self.replay_capture(wbrequest,
cdx,
cdx_loader,
failed_files)
@ -101,23 +99,6 @@ class ReplayView(object):
raise last_e
def cached_replay_capture(self, wbrequest, cdx, cdx_loader, failed_files):
def get_capture():
return self.replay_capture(wbrequest, cdx, cdx_loader, failed_files)
range_status, range_iter = range_cache(wbrequest,
cdx.get('digest'),
get_capture)
if range_status and range_iter:
response = self.response_class(range_status,
range_iter,
wbrequest=wbrequest,
cdx=cdx)
return response
return get_capture()
def replay_capture(self, wbrequest, cdx, cdx_loader, failed_files):
(status_headers, stream) = (self.content_loader.
resolve_headers_and_payload(cdx,
@ -201,10 +182,6 @@ class ReplayView(object):
if wbrequest.options['is_proxy']:
return None
if range_cache:
if range_cache.match_yt(wbrequest.wb_url.url) or wbrequest.env.get('HTTP_RANGE'):
return None
if wbrequest.custom_params.get('noredir'):
return None