diff --git a/setup.py b/setup.py index b635ff1..e762b13 100755 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ except: setuptools.setup( name='warcprox', - version='2.4.5', + version='2.4.6', description='WARC writing MITM HTTP/S proxy', url='https://github.com/internetarchive/warcprox', author='Noah Levitt', diff --git a/warcprox/warcproxy.py b/warcprox/warcproxy.py index 8066ace..4a5312e 100644 --- a/warcprox/warcproxy.py +++ b/warcprox/warcproxy.py @@ -47,6 +47,7 @@ from urllib3 import PoolManager import tempfile import hashlib import doublethink +import re class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler): ''' @@ -387,9 +388,8 @@ class RecordedUrl: self.mimetype = content_type if self.mimetype: - n = self.mimetype.find(";") - if n >= 0: - self.mimetype = self.mimetype[:n] + # chop off subtype, and ensure there's no whitespace + self.mimetype = re.split(r'[;\s]', self.mimetype, 2)[0] self.custom_type = custom_type self.status = status