mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
Brotli: Don't accept brotli if library can't be loaded. (#444)
* brotli: if the brotli module can not be loaded, print warning and also remove `br` from any Accept-Encoding header to avoid recording with brotli, addresses #434
This commit is contained in:
parent
000ed89dc3
commit
32c1e6c85b
@ -6,6 +6,14 @@ from six.moves.urllib.parse import urlsplit
|
||||
import re
|
||||
|
||||
|
||||
try: # pragma: no cover
|
||||
import brotli
|
||||
has_brotli = True
|
||||
except Exception: # pragma: no cover
|
||||
has_brotli = False
|
||||
print('Warning: brotli module could not be loaded, will not be able to replay brotli-encoded content')
|
||||
|
||||
|
||||
#=============================================================================
|
||||
class RewriteInputRequest(DirectWSGIInputRequest):
|
||||
RANGE_ARG_RX = re.compile('.*.googlevideo.com/videoplayback.*([&?]range=(\d+)-(\d+))')
|
||||
@ -79,6 +87,12 @@ class RewriteInputRequest(DirectWSGIInputRequest):
|
||||
if self.splits:
|
||||
value = self.splits.scheme
|
||||
|
||||
elif not has_brotli and name == 'HTTP_ACCEPT_ENCODING' and 'br' in value:
|
||||
# if brotli not available, remove 'br' from accept-encoding to avoid
|
||||
# capture brotli encoded content
|
||||
name = 'Accept-Encoding'
|
||||
value = ','.join([enc for enc in value.split(',') if enc.strip() != 'br'])
|
||||
|
||||
elif name.startswith('HTTP_'):
|
||||
name = name[5:].title().replace('_', '-')
|
||||
|
||||
|
@ -3,10 +3,15 @@ from pywb.manager.manager import main as manager
|
||||
from pywb.manager.autoindex import AutoIndexer
|
||||
from pywb.warcserver.test.testutils import to_path, HttpBinLiveTests, TEST_WARC_PATH, TEST_CDX_PATH
|
||||
|
||||
from warcio import ArchiveIterator
|
||||
|
||||
import os
|
||||
import time
|
||||
import json
|
||||
|
||||
from mock import patch
|
||||
import pytest
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class TestRecordReplay(HttpBinLiveTests, CollsDirMixin, BaseConfigTest):
|
||||
@ -153,6 +158,20 @@ class TestRecordCustomConfig(HttpBinLiveTests, CollsDirMixin, BaseConfigTest):
|
||||
assert names[0].startswith('pywb-rec-test-')
|
||||
assert names[0].endswith('.warcgz')
|
||||
|
||||
TestRecordCustomConfig.warc_name = os.path.join(dir_name, names[0])
|
||||
|
||||
@patch('pywb.rewrite.rewriteinputreq.has_brotli', False)
|
||||
def test_no_brotli(self):
|
||||
res = self.testapp.get('/test-new/record/mp_/http://httpbin.org/get?C=D',
|
||||
headers={'Accept-Encoding': 'gzip, deflate, br'})
|
||||
assert '"C": "D"' in res.text
|
||||
|
||||
with open(self.warc_name, 'rb') as fh:
|
||||
for record in ArchiveIterator(fh):
|
||||
last_record = record
|
||||
|
||||
assert record.http_headers['Accept-Encoding'] == 'gzip, deflate'
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class TestRecordFilter(HttpBinLiveTests, CollsDirMixin, BaseConfigTest):
|
||||
@ -174,17 +193,17 @@ class TestRecordFilter(HttpBinLiveTests, CollsDirMixin, BaseConfigTest):
|
||||
}
|
||||
super(TestRecordFilter, cls).setup_class('config_test_record.yaml', custom_config=rec_custom)
|
||||
manager(['init', 'test-new'])
|
||||
|
||||
|
||||
def test_skip_existing(self):
|
||||
dir_name = os.path.join(self.root_dir, '_test_colls', 'test-new', 'archive')
|
||||
assert os.path.isdir(dir_name)
|
||||
res = self.testapp.get('/fallback/cdx?url=http://example.com/?example=1')
|
||||
assert res.text != ''
|
||||
|
||||
|
||||
res = self.testapp.get('/test-new/record/mp_/http://example.com/?example=1')
|
||||
assert 'Example Domain' in res.text
|
||||
assert os.listdir(dir_name) == []
|
||||
|
||||
|
||||
def test_record_new(self):
|
||||
dir_name = os.path.join(self.root_dir, '_test_colls', 'test-new', 'archive')
|
||||
assert os.path.isdir(dir_name)
|
||||
|
Loading…
x
Reference in New Issue
Block a user