mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
Rewriting fix for DASH FB and document.write (#529)
* rewrite fixes: - dash rewrite fix for fb: when rewriting, match quoted '"dash_prefetched_representation_ids"' as well as w/o quotes, update tests to ensure rewriting both old and new formats - wombat update to fix #527: ensure document.write() doesn't accidentally remove end-tag if end-tag was not lowercase (see webrecorder/wombat#21) * tests: fix recorder cookie filtering test, use https://www.google.com/ for testing * appveyor: fix appveyor builds
This commit is contained in:
parent
523e35d973
commit
f0b9d5b8e8
@ -23,6 +23,7 @@ install:
|
||||
- "pip install pypiwin32"
|
||||
- "pip install certauth boto3 youtube-dl pysocks"
|
||||
- "pip install codecov"
|
||||
- "pip install wheel"
|
||||
|
||||
build_script:
|
||||
- "python setup.py install"
|
||||
|
@ -71,8 +71,8 @@ class TestRecorder(LiveServerTests, HttpBinLiveTests, FakeRedisTests, TempDirTes
|
||||
|
||||
return dedup_index
|
||||
|
||||
def _test_warc_write(self, recorder_app, host, path, other_params='', link_url=''):
|
||||
url = 'http://' + host + path
|
||||
def _test_warc_write(self, recorder_app, host, path, other_params='', link_url='', protocol='http'):
|
||||
url = protocol + '://' + host + path
|
||||
req_url = '/live/resource/postreq?url=' + url + other_params
|
||||
testapp = webtest.TestApp(recorder_app)
|
||||
resp = testapp.post(req_url, general_req_data.format(host=host, path=path).encode('utf-8'))
|
||||
@ -231,8 +231,9 @@ class TestRecorder(LiveServerTests, HttpBinLiveTests, FakeRedisTests, TempDirTes
|
||||
PerRecordWARCWriter(warc_path, header_filter=header_filter),
|
||||
accept_colls='live')
|
||||
|
||||
resp = self._test_warc_write(recorder_app, 'www.google.com', '/')
|
||||
assert b'HTTP/1.1 302' in resp.body
|
||||
resp = self._test_warc_write(recorder_app, 'www.google.com', '/', protocol='https')
|
||||
print(resp.body.decode('utf-8'))
|
||||
#assert b'HTTP/1.1 302' in resp.body
|
||||
|
||||
buff = BytesIO(resp.body)
|
||||
record = ArcWarcRecordLoader().parse_record_stream(buff)
|
||||
|
@ -59,21 +59,30 @@ class RewriteDASH(BufferedRewriter):
|
||||
|
||||
# ============================================================================
|
||||
def rewrite_fb_dash(string, *args):
|
||||
DASH_SPLIT = r'\n",dash_prefetched_representation_ids:'
|
||||
inx = string.find(DASH_SPLIT)
|
||||
DASH_SPLITS = [r'\n",dash_prefetched_representation_ids:', r'\n","dash_prefetched_representation_ids":']
|
||||
|
||||
inx = -1
|
||||
split = None
|
||||
for split in DASH_SPLITS:
|
||||
inx = string.find(split)
|
||||
if inx >= 0:
|
||||
break
|
||||
|
||||
if inx < 0:
|
||||
return string
|
||||
return
|
||||
|
||||
string = string[:inx]
|
||||
|
||||
buff = string.encode('utf-8').decode('unicode-escape')
|
||||
buff = buff.replace('\\/', '/')
|
||||
buff = buff.encode('utf-8')
|
||||
io = BytesIO(buff)
|
||||
io, best_ids = RewriteDASH().rewrite_dash(io, None)
|
||||
string = json.dumps(io.read().decode('utf-8'))
|
||||
buff = io.read().decode('utf-8')
|
||||
string = json.dumps(buff)
|
||||
string = string[1:-1].replace('<', r'\x3C')
|
||||
|
||||
string += DASH_SPLIT
|
||||
string += split
|
||||
string += json.dumps(best_ids)
|
||||
return string
|
||||
|
||||
|
@ -718,6 +718,25 @@ http://example.com/video_4.m3u8
|
||||
assert 'dash_prefetched_representation_ids:["1", "7"]' in result
|
||||
assert rep_ids not in result
|
||||
|
||||
def test_dash_fb_in_js_2(self):
|
||||
headers = {'Content-Type': 'text/javascript'}
|
||||
with open(os.path.join(get_test_dir(), 'text_content', 'sample_dash.mpd'), 'rt') as fh:
|
||||
content = 'dash_manifest:"' + fh.read().encode('unicode-escape').decode('utf-8')
|
||||
|
||||
rep_ids = r'\n","dash_prefetched_representation_ids":["4","5"]'
|
||||
content += rep_ids
|
||||
|
||||
headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701js_',
|
||||
url='http://facebook.com/example/dash/manifest.mpd')
|
||||
|
||||
assert headers.headers == [('Content-Type', 'text/javascript')]
|
||||
|
||||
result = b''.join(gen).decode('utf-8')
|
||||
|
||||
# 4, 5 representations removed, replaced with default 1, 7
|
||||
assert '"dash_prefetched_representation_ids":["1", "7"]' in result
|
||||
assert rep_ids not in result
|
||||
|
||||
def test_dash_custom_max_resolution(self):
|
||||
headers = {'Content-Type': 'application/dash+xml'}
|
||||
with open(os.path.join(get_test_dir(), 'text_content', 'sample_dash.mpd'), 'rt') as fh:
|
||||
|
@ -166,7 +166,7 @@ rules:
|
||||
- match: 'Bootloader\.configurePage.*?;'
|
||||
replace: '/* {0} */'
|
||||
|
||||
- match: 'dash_manifest:"(.*",dash_prefetched_representation_ids:.*?])'
|
||||
- match: 'dash_manifest"?:"(.*","?dash_prefetched_representation_ids"?:.*?])'
|
||||
group: 1
|
||||
function: 'pywb.rewrite.rewrite_dash:rewrite_fb_dash'
|
||||
|
||||
|
File diff suppressed because one or more lines are too long
2
setup.py
2
setup.py
@ -93,7 +93,7 @@ setup(
|
||||
long_description=get_ldecription(),
|
||||
license='GPL',
|
||||
packages=find_packages(exclude=['tests_disabled']),
|
||||
zip_safe=True,
|
||||
zip_safe=False,
|
||||
package_data={
|
||||
'pywb': get_package_data(),
|
||||
},
|
||||
|
2
wombat
2
wombat
@ -1 +1 @@
|
||||
Subproject commit c3276154de61196c0c34d9f5f1242706d6e407b6
|
||||
Subproject commit b8a75357e82ef91b006be177cc3e5d827e02ff7d
|
Loading…
x
Reference in New Issue
Block a user