From ee9e37556005af4abf6affa719dc53bad616514b Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Wed, 3 Aug 2022 11:15:25 -0700 Subject: [PATCH] zlib decompression --- setup.py | 4 ++-- warcprox/warcproxy.py | 11 ++++++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 667299f..52af206 100755 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ ''' setup.py - setuptools installation configuration for warcprox -Copyright (C) 2013-2022 Internet Archive +Copyright (C) 2013-2021 Internet Archive This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License @@ -44,7 +44,7 @@ except: setuptools.setup( name='warcprox', - version='2.4.30', + version='2.4.29', description='WARC writing MITM HTTP/S proxy', url='https://github.com/internetarchive/warcprox', author='Noah Levitt', diff --git a/warcprox/warcproxy.py b/warcprox/warcproxy.py index 05eb8b7..5f3db07 100644 --- a/warcprox/warcproxy.py +++ b/warcprox/warcproxy.py @@ -2,7 +2,7 @@ warcprox/warcproxy.py - recording proxy, extends mitmproxy to record traffic, enqueue info on the recorded url queue -Copyright (C) 2013-2018 Internet Archive +Copyright (C) 2013-2022 Internet Archive This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License @@ -46,6 +46,8 @@ import tempfile import hashlib import doublethink import re +import zlib +import base64 class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler): ''' @@ -175,6 +177,13 @@ class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler): warcprox_meta = json.loads(self.headers['Warcprox-Meta']) self._security_check(warcprox_meta) self._enforce_limits(warcprox_meta) + if 'compressed_blocks' in warcprox_meta: + # b64decode and decompress + blocks_decompressed = zlib.decompress(base64.b64decode(warcprox_meta['compressed_blocks'])) + # decode() and json.loads + warcprox_meta['blocks'] = json.loads(blocks_decompressed.decode()) + # delete compressed_blocks (just in case?) + del warcprox_meta['compressed_blocks'] self._enforce_blocks(warcprox_meta) def _connect_to_remote_server(self):