1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 08:04:49 +01:00

request/response: add range extraction and settings functions to

wbrequest/response (with tests)
This commit is contained in:
Ilya Kreymer 2014-12-18 13:19:38 -08:00
parent f3dc256503
commit 07e46f4b6c
2 changed files with 74 additions and 0 deletions

View File

@ -36,6 +36,25 @@
# no referer
>>> req_from_uri('/web/2010/example.com', {'wsgi.url_scheme': 'http', 'HTTP_HOST': 'localhost:8080'}).extract_referrer_wburl_str()
# range requests
>>> req_from_uri('/web/2014/example.com', dict(HTTP_RANGE='bytes=10-100')).extract_range()
('http://example.com', 10, 100, True)
>>> req_from_uri('/web/2014/example.com', dict(HTTP_RANGE='bytes=0-')).extract_range()
('http://example.com', 0, '', True)
>>> req_from_uri('/web/www.googlevideo.com/videoplayback?id=123&range=0-65535').extract_range()
('http://www.googlevideo.com/videoplayback?id=123', 0, 65535, False)
>>> req_from_uri('/web/www.googlevideo.com/videoplayback?id=123&range=100-200').extract_range()
('http://www.googlevideo.com/videoplayback?id=123', 100, 200, False)
# invalid range requests
>>> req_from_uri('/web/2014/example.com', dict(HTTP_RANGE='10-20')).extract_range()
>>> req_from_uri('/web/2014/example.com', dict(HTTP_RANGE='A-5')).extract_range()
>>> req_from_uri('/web/www.googlevideo.com/videoplayback?id=123&range=100-').extract_range()
# WbResponse Tests
# =================
@ -48,6 +67,12 @@
>>> WbResponse.redir_response('http://example.com/otherfile')
{'body': [], 'status_headers': StatusAndHeaders(protocol = '', statusline = '302 Redirect', headers = [('Location', 'http://example.com/otherfile'), ('Content-Length', '0')])}
>>> WbResponse.text_response('Test').add_range(10, 4, 100)
{'body': ['Test'], 'status_headers': StatusAndHeaders(protocol = '', statusline = '206 Partial Content', headers = [ ('Content-Type', 'text/plain'),
('Content-Length', '4'),
('Content-Range', 'bytes 10-13/100'),
('Accept-Ranges', 'bytes')])}
"""

View File

@ -2,6 +2,7 @@ from pywb.utils.statusandheaders import StatusAndHeaders
from pywb.utils.loaders import extract_post_query, append_post_query
import pprint
import re
#=================================================================
@ -108,6 +109,44 @@ class WbRequest(object):
return False
RANGE_ARG_RX = re.compile('.*.googlevideo.com/videoplayback.*([&?]range=(\d+)-(\d+))')
RANGE_HEADER = re.compile('bytes=(\d+)-(\d+)?')
def extract_range(self):
url = self.wb_url.url
use_206 = False
start = None
end = None
range_h = self.env.get('HTTP_RANGE')
if range_h:
m = self.RANGE_HEADER.match(range_h)
if m:
start = m.group(1)
end = m.group(2)
use_206 = True
else:
m = self.RANGE_ARG_RX.match(url)
if m:
start = m.group(2)
end = m.group(3)
url = url[:m.start(1)] + url[m.end(1):]
use_206 = False
if not start:
return None
start = int(start)
if end:
end = int(end)
else:
end = ''
return (url, start, end, use_206)
def __repr__(self):
varlist = vars(self)
varstr = pprint.pformat(varlist)
@ -186,6 +225,16 @@ class WbResponse(object):
return WbResponse(StatusAndHeaders(status, redir_headers))
def add_range(self, start, part_len, total_len):
content_range = 'bytes {0}-{1}/{2}'.format(start,
start + part_len - 1,
total_len)
self.status_headers.statusline = '206 Partial Content'
self.status_headers.replace_header('Content-Range', content_range)
self.status_headers.replace_header('Accept-Ranges', 'bytes')
return self
def __call__(self, env, start_response):
start_response(self.status_headers.statusline,
self.status_headers.headers)