mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
Adaptive Streaming Improvements (#236)
* adaptive rewrite improvements: - Add 'application/vnd.apple.mpegurl' as HLS type in rules.yaml and default_rewriter.py - Support setting max resolution and max bandwidth to choose, defaults to 480x854 and 200000 respectively - LiveWebLoader provides a get_custom_metadata for specifying WARC-JSON-Metadata header, per mime type (TODO: support customization via rules) - When filtering, first limiting by resolution (if set), then by bandwidth (if set), otherwise default to max bandwidth - Max resoluton/max bandwidth stored in WARC record under WARC-JSON-Metadata as 'adaptive_max_resolution' and 'adaptive_max_bandwidth' to ensure replayability. If absent, choose absolute max in manifest to be backwards compatible - Add sample HLS and DASH manifests for testing, with and without max resolution/bandwidth settings.
This commit is contained in:
parent
5a0867fed9
commit
772993ba53
@ -8,6 +8,7 @@ from warcio.utils import to_native_str
|
|||||||
import re
|
import re
|
||||||
import webencodings
|
import webencodings
|
||||||
import tempfile
|
import tempfile
|
||||||
|
import json
|
||||||
|
|
||||||
from pywb.utils.io import StreamIter, BUFF_SIZE
|
from pywb.utils.io import StreamIter, BUFF_SIZE
|
||||||
|
|
||||||
@ -197,11 +198,27 @@ class BufferedRewriter(object):
|
|||||||
stream_buffer.write(buff)
|
stream_buffer.write(buff)
|
||||||
|
|
||||||
stream_buffer.seek(0)
|
stream_buffer.seek(0)
|
||||||
return StreamIter(self.rewrite_stream(stream_buffer))
|
return StreamIter(self.rewrite_stream(stream_buffer, rwinfo))
|
||||||
|
|
||||||
def rewrite_stream(self, stream):
|
def rewrite_stream(self, stream, rwinfo):
|
||||||
raise NotImplemented('implement in subclass')
|
raise NotImplemented('implement in subclass')
|
||||||
|
|
||||||
|
def _get_record_metadata(self, rwinfo):
|
||||||
|
client_metadata = rwinfo.record.rec_headers.get_header('WARC-JSON-Metadata')
|
||||||
|
if client_metadata:
|
||||||
|
try:
|
||||||
|
return json.loads(client_metadata)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def _get_adaptive_metadata(self, rwinfo):
|
||||||
|
metadata = self._get_record_metadata(rwinfo)
|
||||||
|
max_resolution = int(metadata.get('adaptive_max_resolution', 0))
|
||||||
|
max_bandwidth = int(metadata.get('adaptive_max_bandwidth', 1000000000))
|
||||||
|
return max_resolution, max_bandwidth
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
class StreamingRewriter(object):
|
class StreamingRewriter(object):
|
||||||
|
@ -63,6 +63,7 @@ class DefaultRewriter(BaseContentRewriter):
|
|||||||
|
|
||||||
# HLS
|
# HLS
|
||||||
'application/x-mpegURL': 'hls',
|
'application/x-mpegURL': 'hls',
|
||||||
|
'application/vnd.apple.mpegurl': 'hls',
|
||||||
|
|
||||||
# DASH
|
# DASH
|
||||||
'application/dash+xml': 'dash',
|
'application/dash+xml': 'dash',
|
||||||
|
@ -7,7 +7,7 @@ from pywb.rewrite.content_rewriter import BufferedRewriter
|
|||||||
# ============================================================================
|
# ============================================================================
|
||||||
# Experimental: not fully tested
|
# Experimental: not fully tested
|
||||||
class RewriteAMF(BufferedRewriter): #pragma: no cover
|
class RewriteAMF(BufferedRewriter): #pragma: no cover
|
||||||
def rewrite_stream(self, stream):
|
def rewrite_stream(self, stream, rwinfo):
|
||||||
try:
|
try:
|
||||||
from pyamf import remoting
|
from pyamf import remoting
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
from contextlib import closing
|
from contextlib import closing
|
||||||
from io import BytesIO, StringIO
|
from io import BytesIO
|
||||||
import json
|
import json
|
||||||
|
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
@ -9,11 +9,12 @@ from pywb.rewrite.content_rewriter import BufferedRewriter
|
|||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
class RewriteDASH(BufferedRewriter):
|
class RewriteDASH(BufferedRewriter):
|
||||||
def rewrite_stream(self, stream):
|
def rewrite_stream(self, stream, rwinfo):
|
||||||
res_buff, best_ids = self.rewrite_dash(stream)
|
res_buff, best_ids = self.rewrite_dash(stream, rwinfo)
|
||||||
return res_buff
|
return res_buff
|
||||||
|
|
||||||
def rewrite_dash(self, stream):
|
def rewrite_dash(self, stream, rwinfo):
|
||||||
|
max_resolution, max_bandwidth = self._get_adaptive_metadata(rwinfo)
|
||||||
ET.register_namespace('', 'urn:mpeg:dash:schema:mpd:2011')
|
ET.register_namespace('', 'urn:mpeg:dash:schema:mpd:2011')
|
||||||
namespaces = {'mpd': 'urn:mpeg:dash:schema:mpd:2011'}
|
namespaces = {'mpd': 'urn:mpeg:dash:schema:mpd:2011'}
|
||||||
|
|
||||||
@ -26,24 +27,32 @@ class RewriteDASH(BufferedRewriter):
|
|||||||
|
|
||||||
for period in root.findall('mpd:Period', namespaces):
|
for period in root.findall('mpd:Period', namespaces):
|
||||||
for adaptset in period.findall('mpd:AdaptationSet', namespaces):
|
for adaptset in period.findall('mpd:AdaptationSet', namespaces):
|
||||||
|
|
||||||
best = None
|
best = None
|
||||||
|
best_resolution = 0
|
||||||
|
best_bandwidth = 0
|
||||||
|
|
||||||
for repres in adaptset.findall('mpd:Representation', namespaces):
|
for repres in adaptset.findall('mpd:Representation', namespaces):
|
||||||
bandwidth = int(repres.get('bandwidth', '0'))
|
curr_resolution = int(repres.get('width', '0')) * int(repres.get('height', '0'))
|
||||||
if not best or bandwidth > int(best.get('bandwidth', '0')):
|
curr_bandwidth = int(repres.get('bandwidth', 0))
|
||||||
|
if curr_resolution and max_resolution:
|
||||||
|
if curr_resolution <= max_resolution and curr_resolution > best_resolution:
|
||||||
|
best_resolution = curr_resolution
|
||||||
|
best_bandwidth = curr_bandwidth
|
||||||
|
best = repres
|
||||||
|
elif curr_bandwidth <= max_bandwidth and curr_bandwidth > best_bandwidth:
|
||||||
|
best_resolution = curr_resolution
|
||||||
|
best_bandwidth = curr_bandwidth
|
||||||
best = repres
|
best = repres
|
||||||
|
|
||||||
if best:
|
if best is not None:
|
||||||
best_ids.append(best.get('id'))
|
best_ids.append(best.get('id'))
|
||||||
|
|
||||||
for repres in adaptset.findall('mpd:Representation', namespaces):
|
for repres in adaptset.findall('mpd:Representation', namespaces):
|
||||||
if repres != best:
|
if repres != best:
|
||||||
adaptset.remove(repres)
|
adaptset.remove(repres)
|
||||||
|
|
||||||
string_io = StringIO()
|
|
||||||
tree.write(string_io, encoding='unicode', xml_declaration=True)
|
|
||||||
buff_io = BytesIO()
|
buff_io = BytesIO()
|
||||||
buff_io.write(string_io.getvalue().encode('utf-8'))
|
tree.write(buff_io, encoding='UTF-8', xml_declaration=True)
|
||||||
buff_io.seek(0)
|
buff_io.seek(0)
|
||||||
return buff_io, best_ids
|
return buff_io, best_ids
|
||||||
|
|
||||||
|
@ -7,23 +7,43 @@ from pywb.rewrite.content_rewriter import BufferedRewriter
|
|||||||
# ============================================================================
|
# ============================================================================
|
||||||
class RewriteHLS(BufferedRewriter):
|
class RewriteHLS(BufferedRewriter):
|
||||||
EXT_INF = re.compile('#EXT-X-STREAM-INF:(?:.*[,])?BANDWIDTH=([\d]+)')
|
EXT_INF = re.compile('#EXT-X-STREAM-INF:(?:.*[,])?BANDWIDTH=([\d]+)')
|
||||||
|
EXT_RESOLUTION = re.compile('RESOLUTION=([\d]+)x([\d]+)')
|
||||||
|
|
||||||
|
def rewrite_stream(self, stream, rwinfo):
|
||||||
|
max_resolution, max_bandwidth = self._get_adaptive_metadata(rwinfo)
|
||||||
|
|
||||||
def rewrite_stream(self, stream):
|
|
||||||
buff = stream.read()
|
buff = stream.read()
|
||||||
|
|
||||||
lines = buff.decode('utf-8').split('\n')
|
lines = buff.decode('utf-8').split('\n')
|
||||||
best = None
|
|
||||||
indexes = []
|
indexes = []
|
||||||
count = 0
|
count = 0
|
||||||
best_index = None
|
best_index = None
|
||||||
|
|
||||||
|
best_bandwidth = 0
|
||||||
|
best_resolution = 0
|
||||||
|
|
||||||
for line in lines:
|
for line in lines:
|
||||||
m = self.EXT_INF.match(line)
|
m = self.EXT_INF.match(line)
|
||||||
if m:
|
if m:
|
||||||
indexes.append(count)
|
indexes.append(count)
|
||||||
bandwidth = int(m.group(1))
|
curr_bandwidth = int(m.group(1))
|
||||||
if not best or bandwidth > best:
|
|
||||||
best = bandwidth
|
# resolution
|
||||||
|
m2 = self.EXT_RESOLUTION.search(line)
|
||||||
|
if m2:
|
||||||
|
curr_resolution = int(m2.group(1)) * int(m2.group(2))
|
||||||
|
else:
|
||||||
|
curr_resolution = 0
|
||||||
|
|
||||||
|
if max_resolution and curr_resolution:
|
||||||
|
if curr_resolution > best_resolution and curr_resolution <= max_resolution:
|
||||||
|
best_resolution = curr_resolution
|
||||||
|
best_bandwidth = curr_bandwidth
|
||||||
|
best_index = count
|
||||||
|
|
||||||
|
elif curr_bandwidth > best_bandwidth and curr_bandwidth <= max_bandwidth:
|
||||||
|
best_resolution = curr_resolution
|
||||||
|
best_bandwidth = curr_bandwidth
|
||||||
best_index = count
|
best_index = count
|
||||||
|
|
||||||
count = count + 1
|
count = count + 1
|
||||||
|
@ -10,8 +10,12 @@ from pywb.rewrite.wburl import WbUrl
|
|||||||
from pywb.rewrite.url_rewriter import UrlRewriter
|
from pywb.rewrite.url_rewriter import UrlRewriter
|
||||||
from pywb.rewrite.default_rewriter import DefaultRewriter
|
from pywb.rewrite.default_rewriter import DefaultRewriter
|
||||||
|
|
||||||
|
from pywb import get_test_dir
|
||||||
|
import os
|
||||||
|
import json
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(params=[{'Content-Type': 'text/html'},
|
@pytest.fixture(params=[{'Content-Type': 'text/html'},
|
||||||
{'Content-Type': 'application/xhtml+xml'},
|
{'Content-Type': 'application/xhtml+xml'},
|
||||||
{'Content-Type': 'application/octet-stream'},
|
{'Content-Type': 'application/octet-stream'},
|
||||||
@ -28,9 +32,11 @@ class TestContentRewriter(object):
|
|||||||
def setup_class(self):
|
def setup_class(self):
|
||||||
self.content_rewriter = DefaultRewriter()
|
self.content_rewriter = DefaultRewriter()
|
||||||
|
|
||||||
def _create_response_record(self, url, headers, payload):
|
def _create_response_record(self, url, headers, payload, warc_headers):
|
||||||
writer = BufferWARCWriter()
|
writer = BufferWARCWriter()
|
||||||
|
|
||||||
|
warc_headers = warc_headers or {}
|
||||||
|
|
||||||
payload = payload.encode('utf-8')
|
payload = payload.encode('utf-8')
|
||||||
|
|
||||||
http_headers = StatusAndHeaders('200 OK', headers, protocol='HTTP/1.0')
|
http_headers = StatusAndHeaders('200 OK', headers, protocol='HTTP/1.0')
|
||||||
@ -38,12 +44,13 @@ class TestContentRewriter(object):
|
|||||||
return writer.create_warc_record(url, 'response',
|
return writer.create_warc_record(url, 'response',
|
||||||
payload=BytesIO(payload),
|
payload=BytesIO(payload),
|
||||||
length=len(payload),
|
length=len(payload),
|
||||||
http_headers=http_headers)
|
http_headers=http_headers,
|
||||||
|
warc_headers_dict=warc_headers)
|
||||||
|
|
||||||
def rewrite_record(self, headers, content, ts, url='http://example.com/',
|
def rewrite_record(self, headers, content, ts, url='http://example.com/',
|
||||||
prefix='http://localhost:8080/prefix/'):
|
prefix='http://localhost:8080/prefix/', warc_headers=None):
|
||||||
|
|
||||||
record = self._create_response_record(url, headers, content)
|
record = self._create_response_record(url, headers, content, warc_headers)
|
||||||
|
|
||||||
wburl = WbUrl(ts + '/' + url)
|
wburl = WbUrl(ts + '/' + url)
|
||||||
url_rewriter = UrlRewriter(wburl, prefix)
|
url_rewriter = UrlRewriter(wburl, prefix)
|
||||||
@ -217,5 +224,184 @@ class TestContentRewriter(object):
|
|||||||
|
|
||||||
assert b''.join(gen).decode('utf-8') == content
|
assert b''.join(gen).decode('utf-8') == content
|
||||||
|
|
||||||
|
def test_hls_default_max(self):
|
||||||
|
headers = {'Content-Type': 'application/vnd.apple.mpegurl'}
|
||||||
|
with open(os.path.join(get_test_dir(), 'text_content', 'sample_hls.m3u8'), 'rt') as fh:
|
||||||
|
content = fh.read()
|
||||||
|
|
||||||
|
headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701oe_',
|
||||||
|
url='http://example.com/path/master.m3u8')
|
||||||
|
|
||||||
|
assert headers.headers == [('Content-Type', 'application/vnd.apple.mpegurl')]
|
||||||
|
filtered = """\
|
||||||
|
#EXTM3U
|
||||||
|
#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="WebVTT",NAME="English",DEFAULT=YES,AUTOSELECT=YES,FORCED=NO,URI="https://example.com/subtitles/"
|
||||||
|
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=4495000,RESOLUTION=1920x1080,CODECS="avc1.640028, mp4a.40.2",SUBTITLES="WebVTT"
|
||||||
|
http://example.com/video_6.m3u8
|
||||||
|
"""
|
||||||
|
|
||||||
|
assert b''.join(gen).decode('utf-8') == filtered
|
||||||
|
|
||||||
|
def test_hls_custom_max_resolution(self):
|
||||||
|
headers = {'Content-Type': 'application/x-mpegURL'}
|
||||||
|
with open(os.path.join(get_test_dir(), 'text_content', 'sample_hls.m3u8'), 'rt') as fh:
|
||||||
|
content = fh.read()
|
||||||
|
|
||||||
|
metadata = {'adaptive_max_resolution': 921600,
|
||||||
|
'adaptive_max_bandwidth': 2000000}
|
||||||
|
|
||||||
|
headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701oe_',
|
||||||
|
url='http://example.com/path/master.m3u8',
|
||||||
|
warc_headers={'WARC-JSON-Metadata': json.dumps(metadata)})
|
||||||
|
|
||||||
|
assert headers.headers == [('Content-Type', 'application/x-mpegURL')]
|
||||||
|
filtered = """\
|
||||||
|
#EXTM3U
|
||||||
|
#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="WebVTT",NAME="English",DEFAULT=YES,AUTOSELECT=YES,FORCED=NO,URI="https://example.com/subtitles/"
|
||||||
|
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=2505000,RESOLUTION=1280x720,CODECS="avc1.77.30, mp4a.40.2",SUBTITLES="WebVTT"
|
||||||
|
http://example.com/video_5.m3u8
|
||||||
|
"""
|
||||||
|
|
||||||
|
assert b''.join(gen).decode('utf-8') == filtered
|
||||||
|
|
||||||
|
def test_hls_custom_max_bandwidth(self):
|
||||||
|
headers = {'Content-Type': 'application/x-mpegURL'}
|
||||||
|
with open(os.path.join(get_test_dir(), 'text_content', 'sample_hls.m3u8'), 'rt') as fh:
|
||||||
|
content = fh.read()
|
||||||
|
|
||||||
|
metadata = {'adaptive_max_bandwidth': 2000000}
|
||||||
|
|
||||||
|
headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701oe_',
|
||||||
|
url='http://example.com/path/master.m3u8',
|
||||||
|
warc_headers={'WARC-JSON-Metadata': json.dumps(metadata)})
|
||||||
|
|
||||||
|
assert headers.headers == [('Content-Type', 'application/x-mpegURL')]
|
||||||
|
filtered = """\
|
||||||
|
#EXTM3U
|
||||||
|
#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="WebVTT",NAME="English",DEFAULT=YES,AUTOSELECT=YES,FORCED=NO,URI="https://example.com/subtitles/"
|
||||||
|
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1002000,RESOLUTION=640x360,CODECS="avc1.77.30, mp4a.40.2",SUBTITLES="WebVTT"
|
||||||
|
http://example.com/video_4.m3u8
|
||||||
|
"""
|
||||||
|
|
||||||
|
assert b''.join(gen).decode('utf-8') == filtered
|
||||||
|
|
||||||
|
def test_dash_default_max(self):
|
||||||
|
headers = {'Content-Type': 'application/dash+xml'}
|
||||||
|
with open(os.path.join(get_test_dir(), 'text_content', 'sample_dash.mpd'), 'rt') as fh:
|
||||||
|
content = fh.read()
|
||||||
|
|
||||||
|
headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701oe_',
|
||||||
|
url='http://example.com/path/manifest.mpd')
|
||||||
|
|
||||||
|
assert headers.headers == [('Content-Type', 'application/dash+xml')]
|
||||||
|
|
||||||
|
filtered = """\
|
||||||
|
<?xml version='1.0' encoding='UTF-8'?>
|
||||||
|
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" mediaPresentationDuration="PT0H3M1.63S" minBufferTime="PT1.5S" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" type="static">
|
||||||
|
<Period duration="PT0H3M1.63S" start="PT0S">
|
||||||
|
<AdaptationSet>
|
||||||
|
<ContentComponent contentType="video" id="1" />
|
||||||
|
<Representation bandwidth="4190760" codecs="avc1.640028" height="1080" id="1" mimeType="video/mp4" width="1920">
|
||||||
|
<BaseURL>http://example.com/video-10.mp4</BaseURL>
|
||||||
|
<SegmentBase indexRange="674-1149">
|
||||||
|
<Initialization range="0-673" />
|
||||||
|
</SegmentBase>
|
||||||
|
</Representation>
|
||||||
|
</AdaptationSet>
|
||||||
|
<AdaptationSet>
|
||||||
|
<ContentComponent contentType="audio" id="2" />
|
||||||
|
<Representation bandwidth="255236" codecs="mp4a.40.2" id="7" mimeType="audio/mp4" numChannels="2" sampleRate="44100">
|
||||||
|
<BaseURL>http://example.com/audio-2.mp4</BaseURL>
|
||||||
|
<SegmentBase indexRange="592-851">
|
||||||
|
<Initialization range="0-591" />
|
||||||
|
</SegmentBase>
|
||||||
|
</Representation>
|
||||||
|
</AdaptationSet>
|
||||||
|
</Period>
|
||||||
|
</MPD>"""
|
||||||
|
assert b''.join(gen).decode('utf-8') == filtered
|
||||||
|
|
||||||
|
def test_dash_custom_max_resolution(self):
|
||||||
|
headers = {'Content-Type': 'application/dash+xml'}
|
||||||
|
with open(os.path.join(get_test_dir(), 'text_content', 'sample_dash.mpd'), 'rt') as fh:
|
||||||
|
content = fh.read()
|
||||||
|
|
||||||
|
metadata = {'adaptive_max_resolution': 921600,
|
||||||
|
'adaptive_max_bandwidth': 2000000}
|
||||||
|
|
||||||
|
headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701oe_',
|
||||||
|
url='http://example.com/path/manifest.mpd',
|
||||||
|
warc_headers={'WARC-JSON-Metadata': json.dumps(metadata)})
|
||||||
|
|
||||||
|
assert headers.headers == [('Content-Type', 'application/dash+xml')]
|
||||||
|
|
||||||
|
filtered = """\
|
||||||
|
<?xml version='1.0' encoding='UTF-8'?>
|
||||||
|
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" mediaPresentationDuration="PT0H3M1.63S" minBufferTime="PT1.5S" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" type="static">
|
||||||
|
<Period duration="PT0H3M1.63S" start="PT0S">
|
||||||
|
<AdaptationSet>
|
||||||
|
<ContentComponent contentType="video" id="1" />
|
||||||
|
<Representation bandwidth="2073921" codecs="avc1.4d401f" height="720" id="2" mimeType="video/mp4" width="1280">
|
||||||
|
<BaseURL>http://example.com/video-9.mp4</BaseURL>
|
||||||
|
<SegmentBase indexRange="708-1183">
|
||||||
|
<Initialization range="0-707" />
|
||||||
|
</SegmentBase>
|
||||||
|
</Representation>
|
||||||
|
</AdaptationSet>
|
||||||
|
<AdaptationSet>
|
||||||
|
<ContentComponent contentType="audio" id="2" />
|
||||||
|
<Representation bandwidth="255236" codecs="mp4a.40.2" id="7" mimeType="audio/mp4" numChannels="2" sampleRate="44100">
|
||||||
|
<BaseURL>http://example.com/audio-2.mp4</BaseURL>
|
||||||
|
<SegmentBase indexRange="592-851">
|
||||||
|
<Initialization range="0-591" />
|
||||||
|
</SegmentBase>
|
||||||
|
</Representation>
|
||||||
|
</AdaptationSet>
|
||||||
|
</Period>
|
||||||
|
</MPD>"""
|
||||||
|
|
||||||
|
assert b''.join(gen).decode('utf-8') == filtered
|
||||||
|
|
||||||
|
|
||||||
|
def test_dash_custom_max_bandwidth(self):
|
||||||
|
headers = {'Content-Type': 'application/dash+xml'}
|
||||||
|
with open(os.path.join(get_test_dir(), 'text_content', 'sample_dash.mpd'), 'rt') as fh:
|
||||||
|
content = fh.read()
|
||||||
|
|
||||||
|
metadata = {'adaptive_max_bandwidth': 2000000}
|
||||||
|
|
||||||
|
headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701oe_',
|
||||||
|
url='http://example.com/path/manifest.mpd',
|
||||||
|
warc_headers={'WARC-JSON-Metadata': json.dumps(metadata)})
|
||||||
|
|
||||||
|
assert headers.headers == [('Content-Type', 'application/dash+xml')]
|
||||||
|
|
||||||
|
filtered = """\
|
||||||
|
<?xml version='1.0' encoding='UTF-8'?>
|
||||||
|
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" mediaPresentationDuration="PT0H3M1.63S" minBufferTime="PT1.5S" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" type="static">
|
||||||
|
<Period duration="PT0H3M1.63S" start="PT0S">
|
||||||
|
<AdaptationSet>
|
||||||
|
<ContentComponent contentType="video" id="1" />
|
||||||
|
<Representation bandwidth="869460" codecs="avc1.4d401e" height="480" id="3" mimeType="video/mp4" width="854">
|
||||||
|
<BaseURL>http://example.com/video-8.mp4</BaseURL>
|
||||||
|
<SegmentBase indexRange="708-1183">
|
||||||
|
<Initialization range="0-707" />
|
||||||
|
</SegmentBase>
|
||||||
|
</Representation>
|
||||||
|
</AdaptationSet>
|
||||||
|
<AdaptationSet>
|
||||||
|
<ContentComponent contentType="audio" id="2" />
|
||||||
|
<Representation bandwidth="255236" codecs="mp4a.40.2" id="7" mimeType="audio/mp4" numChannels="2" sampleRate="44100">
|
||||||
|
<BaseURL>http://example.com/audio-2.mp4</BaseURL>
|
||||||
|
<SegmentBase indexRange="592-851">
|
||||||
|
<Initialization range="0-591" />
|
||||||
|
</SegmentBase>
|
||||||
|
</Representation>
|
||||||
|
</AdaptationSet>
|
||||||
|
</Period>
|
||||||
|
</MPD>"""
|
||||||
|
|
||||||
|
assert b''.join(gen).decode('utf-8') == filtered
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -12,9 +12,16 @@ default_filters:
|
|||||||
|
|
||||||
# ignore query args for the following mime types
|
# ignore query args for the following mime types
|
||||||
mimes:
|
mimes:
|
||||||
- 'application/dash+xml'
|
# flash
|
||||||
- 'application/x-shockwave-flash'
|
- 'application/x-shockwave-flash'
|
||||||
|
|
||||||
|
# dash
|
||||||
|
- 'application/dash+xml'
|
||||||
|
|
||||||
|
# hls
|
||||||
|
- 'application/x-mpegURL'
|
||||||
|
- 'application/vnd.apple.mpegurl'
|
||||||
|
|
||||||
# apply following url normalization rules
|
# apply following url normalization rules
|
||||||
# on both match url and request url
|
# on both match url and request url
|
||||||
# to find a match (not limited to query argument removal)
|
# to find a match (not limited to query argument removal)
|
||||||
|
@ -237,6 +237,10 @@ class LiveWebLoader(BaseLoader):
|
|||||||
|
|
||||||
UNREWRITE_HEADERS = ('location', 'content-location')
|
UNREWRITE_HEADERS = ('location', 'content-location')
|
||||||
|
|
||||||
|
VIDEO_MIMES = ('application/x-mpegURL',
|
||||||
|
'application/vnd.apple.mpegurl',
|
||||||
|
'application/dash+xml')
|
||||||
|
|
||||||
def __init__(self, forward_proxy_prefix=None, adapter=None):
|
def __init__(self, forward_proxy_prefix=None, adapter=None):
|
||||||
self.forward_proxy_prefix = forward_proxy_prefix
|
self.forward_proxy_prefix = forward_proxy_prefix
|
||||||
|
|
||||||
@ -378,8 +382,15 @@ class LiveWebLoader(BaseLoader):
|
|||||||
if remote_ip:
|
if remote_ip:
|
||||||
warc_headers['WARC-IP-Address'] = remote_ip
|
warc_headers['WARC-IP-Address'] = remote_ip
|
||||||
|
|
||||||
|
ct = upstream_res.headers.get('Content-Type')
|
||||||
|
if ct:
|
||||||
|
metadata = self.get_custom_metadata(ct, dt)
|
||||||
|
if metadata:
|
||||||
|
warc_headers['WARC-JSON-Metadata'] = json.dumps(metadata)
|
||||||
|
|
||||||
warc_headers['Content-Type'] = 'application/http; msgtype=response'
|
warc_headers['Content-Type'] = 'application/http; msgtype=response'
|
||||||
|
|
||||||
|
|
||||||
self._set_content_len(upstream_res.headers.get('Content-Length', -1),
|
self._set_content_len(upstream_res.headers.get('Content-Length', -1),
|
||||||
warc_headers,
|
warc_headers,
|
||||||
len(http_headers_buff))
|
len(http_headers_buff))
|
||||||
@ -455,6 +466,11 @@ class LiveWebLoader(BaseLoader):
|
|||||||
logger.debug('FAILED: ' + method + ' ' + load_url + ': ' + str(e))
|
logger.debug('FAILED: ' + method + ' ' + load_url + ': ' + str(e))
|
||||||
raise LiveResourceException(load_url)
|
raise LiveResourceException(load_url)
|
||||||
|
|
||||||
|
def get_custom_metadata(self, content_type, dt):
|
||||||
|
if content_type in self.VIDEO_MIMES:
|
||||||
|
return {'adaptive_max_resolution': 1280 * 720,
|
||||||
|
'adaptive_max_bandwidth': 2000000}
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return 'LiveWebLoader'
|
return 'LiveWebLoader'
|
||||||
|
|
||||||
|
65
sample_archive/text_content/sample_dash.mpd
Normal file
65
sample_archive/text_content/sample_dash.mpd
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" mediaPresentationDuration="PT0H3M1.63S" minBufferTime="PT1.5S" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011"
|
||||||
|
type="static">
|
||||||
|
<Period duration="PT0H3M1.63S" start="PT0S">
|
||||||
|
<AdaptationSet>
|
||||||
|
<ContentComponent contentType="video" id="1" />
|
||||||
|
<Representation bandwidth="4190760" codecs="avc1.640028" height="1080" id="1" mimeType="video/mp4" width="1920">
|
||||||
|
<BaseURL>http://example.com/video-10.mp4</BaseURL>
|
||||||
|
<SegmentBase indexRange="674-1149">
|
||||||
|
<Initialization range="0-673" />
|
||||||
|
</SegmentBase>
|
||||||
|
</Representation>
|
||||||
|
<Representation bandwidth="2073921" codecs="avc1.4d401f" height="720" id="2" mimeType="video/mp4" width="1280">
|
||||||
|
<BaseURL>http://example.com/video-9.mp4</BaseURL>
|
||||||
|
<SegmentBase indexRange="708-1183">
|
||||||
|
<Initialization range="0-707" />
|
||||||
|
</SegmentBase>
|
||||||
|
</Representation>
|
||||||
|
<Representation bandwidth="869460" codecs="avc1.4d401e" height="480" id="3" mimeType="video/mp4" width="854">
|
||||||
|
<BaseURL>http://example.com/video-8.mp4</BaseURL>
|
||||||
|
<SegmentBase indexRange="708-1183">
|
||||||
|
<Initialization range="0-707" />
|
||||||
|
</SegmentBase>
|
||||||
|
</Representation>
|
||||||
|
<Representation bandwidth="686521" codecs="avc1.4d401e" height="360" id="4" mimeType="video/mp4" width="640">
|
||||||
|
<BaseURL>http://example.com/video-7.mp4</BaseURL>
|
||||||
|
<SegmentBase indexRange="708-1183">
|
||||||
|
<Initialization range="0-707" />
|
||||||
|
</SegmentBase>
|
||||||
|
</Representation>
|
||||||
|
<Representation bandwidth="264835" codecs="avc1.4d4015" height="240" id="5" mimeType="video/mp4" width="426">
|
||||||
|
<BaseURL>http://example.com/video-6.mp4</BaseURL>
|
||||||
|
<SegmentBase indexRange="672-1147">
|
||||||
|
<Initialization range="0-671" />
|
||||||
|
</SegmentBase>
|
||||||
|
</Representation>
|
||||||
|
<Representation bandwidth="100000" codecs="avc1.4d4015" height="144" id="5" mimeType="video/mp4" width="256">
|
||||||
|
<BaseURL>http://example.com/video-5.mp4</BaseURL>
|
||||||
|
<SegmentBase indexRange="671-1146">
|
||||||
|
<Initialization range="0-670" />
|
||||||
|
</SegmentBase>
|
||||||
|
</Representation>
|
||||||
|
</AdaptationSet>
|
||||||
|
<AdaptationSet>
|
||||||
|
<ContentComponent contentType="audio" id="2" />
|
||||||
|
<Representation bandwidth="127236" codecs="mp4a.40.2" id="6" mimeType="audio/mp4" numChannels="2" sampleRate="44100">
|
||||||
|
<BaseURL>http://example.com/audio-1.mp4</BaseURL>
|
||||||
|
<SegmentBase indexRange="592-851">
|
||||||
|
<Initialization range="0-591" />
|
||||||
|
</SegmentBase>
|
||||||
|
</Representation>
|
||||||
|
<Representation bandwidth="255236" codecs="mp4a.40.2" id="7" mimeType="audio/mp4" numChannels="2" sampleRate="44100">
|
||||||
|
<BaseURL>http://example.com/audio-2.mp4</BaseURL>
|
||||||
|
<SegmentBase indexRange="592-851">
|
||||||
|
<Initialization range="0-591" />
|
||||||
|
</SegmentBase>
|
||||||
|
</Representation>
|
||||||
|
<Representation bandwidth="31749" codecs="mp4a.40.5" id="8" mimeType="audio/mp4" numChannels="1" sampleRate="22050">
|
||||||
|
<BaseURL>http://example.com/audio-0.mp4</BaseURL>
|
||||||
|
<SegmentBase indexRange="592-851">
|
||||||
|
<Initialization range="0-591" />
|
||||||
|
</SegmentBase>
|
||||||
|
</Representation>
|
||||||
|
</AdaptationSet>
|
||||||
|
</Period>
|
||||||
|
</MPD>
|
16
sample_archive/text_content/sample_hls.m3u8
Normal file
16
sample_archive/text_content/sample_hls.m3u8
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
#EXTM3U
|
||||||
|
#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="WebVTT",NAME="English",DEFAULT=YES,AUTOSELECT=YES,FORCED=NO,URI="https://example.com/subtitles/"
|
||||||
|
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=610000,RESOLUTION=640x360,CODECS="avc1.66.30, mp4a.40.2",SUBTITLES="WebVTT"
|
||||||
|
http://example.com/video_1.m3u8
|
||||||
|
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=416000,RESOLUTION=400x224,CODECS="avc1.66.30, mp4a.40.2",SUBTITLES="WebVTT"
|
||||||
|
http://example.com/video_2.m3u8
|
||||||
|
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=797000,RESOLUTION=640x360,CODECS="avc1.66.30, mp4a.40.2",SUBTITLES="WebVTT"
|
||||||
|
http://example.com/video_3.m3u8
|
||||||
|
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1002000,RESOLUTION=640x360,CODECS="avc1.77.30, mp4a.40.2",SUBTITLES="WebVTT"
|
||||||
|
http://example.com/video_4.m3u8
|
||||||
|
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=2505000,RESOLUTION=1280x720,CODECS="avc1.77.30, mp4a.40.2",SUBTITLES="WebVTT"
|
||||||
|
http://example.com/video_5.m3u8
|
||||||
|
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=4495000,RESOLUTION=1920x1080,CODECS="avc1.640028, mp4a.40.2",SUBTITLES="WebVTT"
|
||||||
|
http://example.com/video_6.m3u8
|
||||||
|
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=38000,CODECS="mp4a.40.2",SUBTITLES="WebVTT"
|
||||||
|
http://example.com/audio_0.m3u8
|
Loading…
x
Reference in New Issue
Block a user