mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
live rewriting/utf-8 headers: fix for sites that have utf-8 in headers despite standard (#402)
- attempt to encode headers as utf-8 first for live web, then latin-1 (similar to warcio http header parsing) - only encode headers for py3 (in py2, headers are already bytestrings) - tests: add tests for utf-8 in header bump version to 2.1.1
This commit is contained in:
parent
1b151b74bf
commit
e1e8917bc3
@ -1,4 +1,4 @@
|
||||
__version__ = '2.1.0'
|
||||
__version__ = '2.1.1'
|
||||
|
||||
DEFAULT_CONFIG = 'pywb/default_config.yaml'
|
||||
|
||||
|
@ -353,6 +353,17 @@ class LiveWebLoader(BaseLoader):
|
||||
v = self.unrewrite_header(cdx, v)
|
||||
|
||||
http_headers_buff += n + ': ' + v + '\r\n'
|
||||
|
||||
http_headers_buff += '\r\n'
|
||||
|
||||
try:
|
||||
# http headers could be encoded as utf-8 (though non-standard)
|
||||
# first try utf-8 encoding
|
||||
http_headers_buff = http_headers_buff.encode('utf-8')
|
||||
except:
|
||||
# then, fall back to latin-1
|
||||
http_headers_buff = http_headers_buff.encode('latin-1')
|
||||
|
||||
except: #pragma: no cover
|
||||
#PY 2
|
||||
resp_headers = orig_resp.msg.headers
|
||||
@ -374,8 +385,8 @@ class LiveWebLoader(BaseLoader):
|
||||
else:
|
||||
http_headers_buff += line
|
||||
|
||||
http_headers_buff += '\r\n'
|
||||
http_headers_buff = http_headers_buff.encode('latin-1')
|
||||
# if python2, already byte headers, so leave as is
|
||||
http_headers_buff += '\r\n'
|
||||
|
||||
try:
|
||||
fp = upstream_res._fp.fp
|
||||
|
@ -22,11 +22,13 @@ def fmod_sl(request):
|
||||
|
||||
# ============================================================================
|
||||
class BaseConfigTest(BaseTestClass):
|
||||
lint_app = True
|
||||
|
||||
@classmethod
|
||||
def get_test_app(cls, config_file, custom_config=None):
|
||||
config_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), config_file)
|
||||
app = FrontEndApp(config_file=config_file, custom_config=custom_config)
|
||||
return app, webtest.TestApp(app)
|
||||
return app, webtest.TestApp(app, lint=cls.lint_app)
|
||||
|
||||
@classmethod
|
||||
def setup_class(cls, config_file, include_non_frame=True, custom_config=None):
|
||||
|
@ -1,14 +1,44 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from .base_config_test import BaseConfigTest, fmod_sl
|
||||
from pywb.warcserver.test.testutils import HttpBinLiveTests
|
||||
|
||||
from pywb.utils.geventserver import GeventServer
|
||||
import pytest
|
||||
import sys
|
||||
import six
|
||||
|
||||
|
||||
# ============================================================================
|
||||
def header_test_server(environ, start_response):
|
||||
body = b'body'
|
||||
value = u'⛄'
|
||||
value = value.encode('utf-8')
|
||||
if six.PY3:
|
||||
value = value.decode('latin-1')
|
||||
|
||||
headers = []
|
||||
if environ['PATH_INFO'] == '/unicode':
|
||||
headers = [('Content-Length', str(len(body))),
|
||||
('x-utf-8', value)]
|
||||
|
||||
start_response('200 OK', headers=headers)
|
||||
return [body]
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class TestLiveRewriter(HttpBinLiveTests, BaseConfigTest):
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
cls.lint_app = False
|
||||
super(TestLiveRewriter, cls).setup_class('config_test.yaml')
|
||||
cls.test_serv = GeventServer(header_test_server)
|
||||
|
||||
@classmethod
|
||||
def teardown_class(cls):
|
||||
cls.test_serv.stop()
|
||||
super(TestLiveRewriter, cls).teardown_class()
|
||||
|
||||
def test_live_live_1(self, fmod_sl):
|
||||
headers = [('User-Agent', 'python'), ('Referer', 'http://localhost:80/live/other.example.com')]
|
||||
@ -58,6 +88,15 @@ class TestLiveRewriter(HttpBinLiveTests, BaseConfigTest):
|
||||
assert resp.headers['Content-Length'] == '90'
|
||||
assert resp.headers['Content-Range'] == 'bytes 0-89/90'
|
||||
|
||||
def test_custom_unicode_header(self, fmod_sl):
|
||||
value = u'⛄'
|
||||
value = value.encode('utf-8')
|
||||
if six.PY3:
|
||||
value = value.decode('latin-1')
|
||||
|
||||
resp = self.get('/live/{0}http://localhost:%s/unicode' % self.test_serv.port, fmod_sl)
|
||||
assert resp.headers['x-utf-8'] == value
|
||||
|
||||
def test_live_live_frame(self):
|
||||
resp = self.testapp.get('/live/http://example.com/')
|
||||
assert resp.status_int == 200
|
||||
|
Loading…
x
Reference in New Issue
Block a user