diff --git a/pywb/framework/wbrequestresponse.py b/pywb/framework/wbrequestresponse.py index d5f96a1e..d4990217 100644 --- a/pywb/framework/wbrequestresponse.py +++ b/pywb/framework/wbrequestresponse.py @@ -100,6 +100,9 @@ class WbRequest(object): # PERF env['X_PERF'] = {} + if env.get('HTTP_X_PYWB_NOREDIRECT'): + self.custom_params['noredir'] = True + self._parse_extra() def _is_ajax(self): @@ -145,7 +148,8 @@ class WbRequest(object): else: end = '' - return (url, start, end, use_206) + result = (url, start, end, use_206) + return result def __repr__(self): varlist = vars(self) @@ -225,16 +229,6 @@ class WbResponse(object): return WbResponse(StatusAndHeaders(status, redir_headers)) - def add_range(self, start, part_len, total_len): - content_range = 'bytes {0}-{1}/{2}'.format(start, - start + part_len - 1, - total_len) - - self.status_headers.statusline = '206 Partial Content' - self.status_headers.replace_header('Content-Range', content_range) - self.status_headers.replace_header('Accept-Ranges', 'bytes') - return self - def __call__(self, env, start_response): start_response(self.status_headers.statusline, self.status_headers.headers) @@ -246,5 +240,9 @@ class WbResponse(object): return self.body + def add_range(self, *args): + self.status_headers.add_range(*args) + return self + def __repr__(self): return str(vars(self)) diff --git a/pywb/utils/statusandheaders.py b/pywb/utils/statusandheaders.py index 3f429814..7f08cbdb 100644 --- a/pywb/utils/statusandheaders.py +++ b/pywb/utils/statusandheaders.py @@ -97,6 +97,19 @@ class StatusAndHeaders(object): self.statusline = valid_statusline return False + def add_range(self, start, part_len, total_len): + """ + Add range headers indicating that this a partial response + """ + content_range = 'bytes {0}-{1}/{2}'.format(start, + start + part_len - 1, + total_len) + + self.statusline = '206 Partial Content' + self.replace_header('Content-Range', content_range) + self.replace_header('Accept-Ranges', 'bytes') + return self + def __repr__(self): headers_str = pprint.pformat(self.headers, indent=2) return "StatusAndHeaders(protocol = '{0}', statusline = '{1}', \ diff --git a/pywb/utils/test/test_statusandheaders.py b/pywb/utils/test/test_statusandheaders.py index 1929d17d..5fe96d95 100644 --- a/pywb/utils/test/test_statusandheaders.py +++ b/pywb/utils/test/test_statusandheaders.py @@ -5,6 +5,12 @@ StatusAndHeaders(protocol = 'HTTP/1.0', statusline = '200 OK', headers = [ ('Con ('Some', 'Value'), ('Multi-Line', 'Value1 Also This')]) +# add range +>>> StatusAndHeaders(statusline = '200 OK', headers=[('Content-Type', 'text/plain')]).add_range(10, 4, 100) +StatusAndHeaders(protocol = '', statusline = '206 Partial Content', headers = [ ('Content-Type', 'text/plain'), + ('Content-Range', 'bytes 10-13/100'), + ('Accept-Ranges', 'bytes')]) + >>> StatusAndHeadersParser(['Other']).parse(BytesIO(status_headers_1)) Traceback (most recent call last): StatusAndHeadersParserException: Expected Status Line starting with ['Other'] - Found: HTTP/1.0 200 OK @@ -36,10 +42,12 @@ StatusAndHeaders(protocol = '', statusline = '204 No Content', headers = []) >>> StatusAndHeadersParser(['HTTP/1.0']).parse(BytesIO(status_headers_3)) StatusAndHeaders(protocol = 'HTTP/1.0', statusline = '204 Empty', headers = [('Content-Type', 'Value'), ('Content-Length', '0')]) + + """ -from pywb.utils.statusandheaders import StatusAndHeadersParser +from pywb.utils.statusandheaders import StatusAndHeadersParser, StatusAndHeaders from io import BytesIO diff --git a/pywb/webapp/cached_replay.py b/pywb/webapp/cached_replay.py new file mode 100644 index 00000000..ea2b293d --- /dev/null +++ b/pywb/webapp/cached_replay.py @@ -0,0 +1,34 @@ +from rangecache import range_cache +from replay_views import ReplayView + + +#================================================================= +class CachedReplayView(ReplayView): + """ + Extension for ReplayView supporting loading via the rangecache + """ + def replay_capture(self, wbrequest, cdx, cdx_loader, failed_files): + def get_capture(): + return super(CachedReplayView, self).replay_capture( + wbrequest, + cdx, + cdx_loader, + failed_files) + + range_status, range_iter = range_cache(wbrequest, + cdx.get('digest'), + get_capture) + if range_status and range_iter: + response = self.response_class(range_status, + range_iter, + wbrequest=wbrequest, + cdx=cdx) + return response + + return get_capture() + + def _redirect_if_needed(self, wbrequest, cdx): + if wbrequest.extract_range(): + return None + + return super(CachedReplayView, self)._redirect_if_needed(wbrequest, cdx) diff --git a/pywb/webapp/handlers.py b/pywb/webapp/handlers.py index 4018c23d..fe885fa6 100644 --- a/pywb/webapp/handlers.py +++ b/pywb/webapp/handlers.py @@ -16,6 +16,7 @@ from pywb.warc.resolvingloader import ResolvingLoader from views import J2TemplateView from replay_views import ReplayView +from cached_replay import CachedReplayView from pywb.framework.memento import MementoResponse from pywb.utils.timeutils import datetime_to_timestamp @@ -119,7 +120,11 @@ class WBHandler(SearchPageWbUrlHandler): resolving_loader = ResolvingLoader(paths=paths, record_loader=record_loader) - self.replay = ReplayView(resolving_loader, config) + enable_cache = config.get('enable_cache') + if enable_cache: + self.replay = CachedReplayView(resolving_loader, config) + else: + self.replay = ReplayView(resolving_loader, config) self.fallback_handler = None self.fallback_name = config.get('fallback') diff --git a/pywb/webapp/live_rewrite_handler.py b/pywb/webapp/live_rewrite_handler.py index 9c218470..98d6c5db 100644 --- a/pywb/webapp/live_rewrite_handler.py +++ b/pywb/webapp/live_rewrite_handler.py @@ -112,7 +112,7 @@ class RewriteHandler(SearchPageWbUrlHandler): content_length = wbresponse.status_headers.get_header('Content-Length') try: content_length = int(content_length) - wbresponse.add_range(0, content_length, content_length) + wbresponse.status_headers.add_range(0, content_length, content_length) except ValueError: pass diff --git a/pywb/webapp/rangecache.py b/pywb/webapp/rangecache.py index 79796c6f..dd6b8bdc 100644 --- a/pywb/webapp/rangecache.py +++ b/pywb/webapp/rangecache.py @@ -12,23 +12,6 @@ import atexit #================================================================= class RangeCache(object): - @staticmethod - def match_yt(url): - if not RangeCache.YOUTUBE_RX.match(url): - return None - - range_h_res = [] - - def repl_range(matcher): - range_h_res.append(matcher.group(1)) - return '' - - new_url = RangeCache.YT_EXTRACT_RX.sub(repl_range, url) - if range_h_res: - return range_h_res[0], new_url - else: - return None, url - def __init__(self): self.cache = create_cache() self.temp_dir = None @@ -107,13 +90,7 @@ class RangeCache(object): yield buf if use_206: - content_range = 'bytes {0}-{1}/{2}'.format(start, - start + maxlen - 1, - filelen) - - status_headers = StatusAndHeaders('206 Partial Content', spec['headers']) - status_headers.replace_header('Content-Range', content_range) - status_headers.replace_header('Accept-Ranges', 'bytes') + WbResponse.add_range_status_h(status_headers) else: status_headers = StatusAndHeaders('200 OK', spec['headers']) diff --git a/pywb/webapp/replay_views.py b/pywb/webapp/replay_views.py index f7d430f7..b0b8bffc 100644 --- a/pywb/webapp/replay_views.py +++ b/pywb/webapp/replay_views.py @@ -15,8 +15,6 @@ from pywb.warc.recordloader import ArchiveLoadFailed from views import J2TemplateView, add_env_globals from views import J2HtmlCapturesView, HeadInsertView -from rangecache import range_cache - #================================================================= class CaptureException(WbException): @@ -79,7 +77,7 @@ class ReplayView(object): first = False - response = self.cached_replay_capture(wbrequest, + response = self.replay_capture(wbrequest, cdx, cdx_loader, failed_files) @@ -101,23 +99,6 @@ class ReplayView(object): raise last_e - - def cached_replay_capture(self, wbrequest, cdx, cdx_loader, failed_files): - def get_capture(): - return self.replay_capture(wbrequest, cdx, cdx_loader, failed_files) - - range_status, range_iter = range_cache(wbrequest, - cdx.get('digest'), - get_capture) - if range_status and range_iter: - response = self.response_class(range_status, - range_iter, - wbrequest=wbrequest, - cdx=cdx) - return response - - return get_capture() - def replay_capture(self, wbrequest, cdx, cdx_loader, failed_files): (status_headers, stream) = (self.content_loader. resolve_headers_and_payload(cdx, @@ -201,10 +182,6 @@ class ReplayView(object): if wbrequest.options['is_proxy']: return None - if range_cache: - if range_cache.match_yt(wbrequest.wb_url.url) or wbrequest.env.get('HTTP_RANGE'): - return None - if wbrequest.custom_params.get('noredir'): return None