diff --git a/pywb/__init__.py b/pywb/__init__.py index 05caf769..caece5a6 100644 --- a/pywb/__init__.py +++ b/pywb/__init__.py @@ -1,4 +1,4 @@ -__version__ = '2.1.0' +__version__ = '2.1.1' DEFAULT_CONFIG = 'pywb/default_config.yaml' diff --git a/pywb/warcserver/resource/responseloader.py b/pywb/warcserver/resource/responseloader.py index 6aecbb80..2396da1e 100644 --- a/pywb/warcserver/resource/responseloader.py +++ b/pywb/warcserver/resource/responseloader.py @@ -353,6 +353,17 @@ class LiveWebLoader(BaseLoader): v = self.unrewrite_header(cdx, v) http_headers_buff += n + ': ' + v + '\r\n' + + http_headers_buff += '\r\n' + + try: + # http headers could be encoded as utf-8 (though non-standard) + # first try utf-8 encoding + http_headers_buff = http_headers_buff.encode('utf-8') + except: + # then, fall back to latin-1 + http_headers_buff = http_headers_buff.encode('latin-1') + except: #pragma: no cover #PY 2 resp_headers = orig_resp.msg.headers @@ -374,8 +385,8 @@ class LiveWebLoader(BaseLoader): else: http_headers_buff += line - http_headers_buff += '\r\n' - http_headers_buff = http_headers_buff.encode('latin-1') + # if python2, already byte headers, so leave as is + http_headers_buff += '\r\n' try: fp = upstream_res._fp.fp diff --git a/tests/base_config_test.py b/tests/base_config_test.py index bbc83a21..58d75e79 100644 --- a/tests/base_config_test.py +++ b/tests/base_config_test.py @@ -22,11 +22,13 @@ def fmod_sl(request): # ============================================================================ class BaseConfigTest(BaseTestClass): + lint_app = True + @classmethod def get_test_app(cls, config_file, custom_config=None): config_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), config_file) app = FrontEndApp(config_file=config_file, custom_config=custom_config) - return app, webtest.TestApp(app) + return app, webtest.TestApp(app, lint=cls.lint_app) @classmethod def setup_class(cls, config_file, include_non_frame=True, custom_config=None): diff --git a/tests/test_live_rewriter.py b/tests/test_live_rewriter.py index 68d29cb2..4062e67f 100644 --- a/tests/test_live_rewriter.py +++ b/tests/test_live_rewriter.py @@ -1,14 +1,44 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + from .base_config_test import BaseConfigTest, fmod_sl from pywb.warcserver.test.testutils import HttpBinLiveTests + +from pywb.utils.geventserver import GeventServer import pytest import sys +import six + + +# ============================================================================ +def header_test_server(environ, start_response): + body = b'body' + value = u'⛄' + value = value.encode('utf-8') + if six.PY3: + value = value.decode('latin-1') + + headers = [] + if environ['PATH_INFO'] == '/unicode': + headers = [('Content-Length', str(len(body))), + ('x-utf-8', value)] + + start_response('200 OK', headers=headers) + return [body] # ============================================================================ class TestLiveRewriter(HttpBinLiveTests, BaseConfigTest): @classmethod def setup_class(cls): + cls.lint_app = False super(TestLiveRewriter, cls).setup_class('config_test.yaml') + cls.test_serv = GeventServer(header_test_server) + + @classmethod + def teardown_class(cls): + cls.test_serv.stop() + super(TestLiveRewriter, cls).teardown_class() def test_live_live_1(self, fmod_sl): headers = [('User-Agent', 'python'), ('Referer', 'http://localhost:80/live/other.example.com')] @@ -58,6 +88,15 @@ class TestLiveRewriter(HttpBinLiveTests, BaseConfigTest): assert resp.headers['Content-Length'] == '90' assert resp.headers['Content-Range'] == 'bytes 0-89/90' + def test_custom_unicode_header(self, fmod_sl): + value = u'⛄' + value = value.encode('utf-8') + if six.PY3: + value = value.decode('latin-1') + + resp = self.get('/live/{0}http://localhost:%s/unicode' % self.test_serv.port, fmod_sl) + assert resp.headers['x-utf-8'] == value + def test_live_live_frame(self): resp = self.testapp.get('/live/http://example.com/') assert resp.status_int == 200