From 457a1a564c1ef0b6fa8f624fc7efef5208777cff Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Wed, 15 Jun 2016 01:37:29 -0400 Subject: [PATCH] bufferedreader: support brotli decompression rewrite: handle Content-Encoding: br using brotli decompressor setup: add brotlipy as dependency --- pywb/rewrite/rewrite_content.py | 12 +++++++++--- pywb/utils/bufferedreaders.py | 14 +++++++++++++- setup.py | 1 + 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/pywb/rewrite/rewrite_content.py b/pywb/rewrite/rewrite_content.py index 93012d0f..677e20ae 100644 --- a/pywb/rewrite/rewrite_content.py +++ b/pywb/rewrite/rewrite_content.py @@ -77,6 +77,7 @@ class RewriteContent(object): def _check_encoding(self, rewritten_headers, stream, enc): + matched = False if (rewritten_headers. contains_removed_header('content-encoding', enc)): @@ -87,8 +88,9 @@ class RewriteContent(object): stream = DecompressingBufferedReader(stream, decomp_type=enc) rewritten_headers.status_headers.remove_header('content-length') + matched = True - return stream + return matched, stream @@ -139,8 +141,12 @@ class RewriteContent(object): encoding = None first_buff = b'' - stream = self._check_encoding(rewritten_headers, stream, 'gzip') - stream = self._check_encoding(rewritten_headers, stream, 'deflate') + for decomp_type in BufferedReader.get_supported_decompressors(): + matched, stream = self._check_encoding(rewritten_headers, + stream, + decomp_type) + if matched: + break if mod == 'js_': text_type, stream = self._resolve_text_type('js', diff --git a/pywb/utils/bufferedreaders.py b/pywb/utils/bufferedreaders.py index 81132bdd..e1ebfc90 100644 --- a/pywb/utils/bufferedreaders.py +++ b/pywb/utils/bufferedreaders.py @@ -1,5 +1,6 @@ from io import BytesIO import zlib +import brotli #================================================================= @@ -17,6 +18,11 @@ def deflate_decompressor(): def deflate_decompressor_alt(): return zlib.decompressobj(-zlib.MAX_WBITS) +def brotli_decompressor(): + decomp = brotli.Decompressor() + decomp.unused_data = None + return decomp + #================================================================= class BufferedReader(object): @@ -40,7 +46,9 @@ class BufferedReader(object): DECOMPRESSORS = {'gzip': gzip_decompressor, 'deflate': deflate_decompressor, - 'deflate_alt': deflate_decompressor_alt} + 'deflate_alt': deflate_decompressor_alt, + 'br': brotli_decompressor + } def __init__(self, stream, block_size=1024, decomp_type=None, @@ -181,6 +189,10 @@ class BufferedReader(object): self.stream.close() self.stream = None + @classmethod + def get_supported_decompressors(cls): + return cls.DECOMPRESSORS.keys() + #================================================================= class DecompressingBufferedReader(BufferedReader): diff --git a/setup.py b/setup.py index d7958308..629ea228 100755 --- a/setup.py +++ b/setup.py @@ -79,6 +79,7 @@ setup( 'redis', 'jinja2', 'surt>=0.3.0', + 'brotlipy', 'pyyaml', 'watchdog', 'webencodings',