From 07e46f4b6c50f47605945598d25f4cb0a6491aa4 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Thu, 18 Dec 2014 13:19:38 -0800 Subject: [PATCH] request/response: add range extraction and settings functions to wbrequest/response (with tests) --- pywb/framework/test/test_wbrequestresponse.py | 25 ++++++++++ pywb/framework/wbrequestresponse.py | 49 +++++++++++++++++++ 2 files changed, 74 insertions(+) diff --git a/pywb/framework/test/test_wbrequestresponse.py b/pywb/framework/test/test_wbrequestresponse.py index 5bbb65b8..1f1f5a39 100644 --- a/pywb/framework/test/test_wbrequestresponse.py +++ b/pywb/framework/test/test_wbrequestresponse.py @@ -36,6 +36,25 @@ # no referer >>> req_from_uri('/web/2010/example.com', {'wsgi.url_scheme': 'http', 'HTTP_HOST': 'localhost:8080'}).extract_referrer_wburl_str() +# range requests +>>> req_from_uri('/web/2014/example.com', dict(HTTP_RANGE='bytes=10-100')).extract_range() +('http://example.com', 10, 100, True) + +>>> req_from_uri('/web/2014/example.com', dict(HTTP_RANGE='bytes=0-')).extract_range() +('http://example.com', 0, '', True) + +>>> req_from_uri('/web/www.googlevideo.com/videoplayback?id=123&range=0-65535').extract_range() +('http://www.googlevideo.com/videoplayback?id=123', 0, 65535, False) + +>>> req_from_uri('/web/www.googlevideo.com/videoplayback?id=123&range=100-200').extract_range() +('http://www.googlevideo.com/videoplayback?id=123', 100, 200, False) + +# invalid range requests +>>> req_from_uri('/web/2014/example.com', dict(HTTP_RANGE='10-20')).extract_range() + +>>> req_from_uri('/web/2014/example.com', dict(HTTP_RANGE='A-5')).extract_range() + +>>> req_from_uri('/web/www.googlevideo.com/videoplayback?id=123&range=100-').extract_range() # WbResponse Tests # ================= @@ -48,6 +67,12 @@ >>> WbResponse.redir_response('http://example.com/otherfile') {'body': [], 'status_headers': StatusAndHeaders(protocol = '', statusline = '302 Redirect', headers = [('Location', 'http://example.com/otherfile'), ('Content-Length', '0')])} +>>> WbResponse.text_response('Test').add_range(10, 4, 100) +{'body': ['Test'], 'status_headers': StatusAndHeaders(protocol = '', statusline = '206 Partial Content', headers = [ ('Content-Type', 'text/plain'), + ('Content-Length', '4'), + ('Content-Range', 'bytes 10-13/100'), + ('Accept-Ranges', 'bytes')])} + """ diff --git a/pywb/framework/wbrequestresponse.py b/pywb/framework/wbrequestresponse.py index 7c48dbb3..d5f96a1e 100644 --- a/pywb/framework/wbrequestresponse.py +++ b/pywb/framework/wbrequestresponse.py @@ -2,6 +2,7 @@ from pywb.utils.statusandheaders import StatusAndHeaders from pywb.utils.loaders import extract_post_query, append_post_query import pprint +import re #================================================================= @@ -108,6 +109,44 @@ class WbRequest(object): return False + RANGE_ARG_RX = re.compile('.*.googlevideo.com/videoplayback.*([&?]range=(\d+)-(\d+))') + RANGE_HEADER = re.compile('bytes=(\d+)-(\d+)?') + + def extract_range(self): + url = self.wb_url.url + use_206 = False + start = None + end = None + + range_h = self.env.get('HTTP_RANGE') + + if range_h: + m = self.RANGE_HEADER.match(range_h) + if m: + start = m.group(1) + end = m.group(2) + use_206 = True + + else: + m = self.RANGE_ARG_RX.match(url) + if m: + start = m.group(2) + end = m.group(3) + url = url[:m.start(1)] + url[m.end(1):] + use_206 = False + + if not start: + return None + + start = int(start) + + if end: + end = int(end) + else: + end = '' + + return (url, start, end, use_206) + def __repr__(self): varlist = vars(self) varstr = pprint.pformat(varlist) @@ -186,6 +225,16 @@ class WbResponse(object): return WbResponse(StatusAndHeaders(status, redir_headers)) + def add_range(self, start, part_len, total_len): + content_range = 'bytes {0}-{1}/{2}'.format(start, + start + part_len - 1, + total_len) + + self.status_headers.statusline = '206 Partial Content' + self.status_headers.replace_header('Content-Range', content_range) + self.status_headers.replace_header('Accept-Ranges', 'bytes') + return self + def __call__(self, env, start_response): start_response(self.status_headers.statusline, self.status_headers.headers)