mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
live rewriting/utf-8 headers: fix for sites that have utf-8 in headers despite standard (#402)
- attempt to encode headers as utf-8 first for live web, then latin-1 (similar to warcio http header parsing) - only encode headers for py3 (in py2, headers are already bytestrings) - tests: add tests for utf-8 in header bump version to 2.1.1
This commit is contained in:
parent
1b151b74bf
commit
e1e8917bc3
@ -1,4 +1,4 @@
|
|||||||
__version__ = '2.1.0'
|
__version__ = '2.1.1'
|
||||||
|
|
||||||
DEFAULT_CONFIG = 'pywb/default_config.yaml'
|
DEFAULT_CONFIG = 'pywb/default_config.yaml'
|
||||||
|
|
||||||
|
@ -353,6 +353,17 @@ class LiveWebLoader(BaseLoader):
|
|||||||
v = self.unrewrite_header(cdx, v)
|
v = self.unrewrite_header(cdx, v)
|
||||||
|
|
||||||
http_headers_buff += n + ': ' + v + '\r\n'
|
http_headers_buff += n + ': ' + v + '\r\n'
|
||||||
|
|
||||||
|
http_headers_buff += '\r\n'
|
||||||
|
|
||||||
|
try:
|
||||||
|
# http headers could be encoded as utf-8 (though non-standard)
|
||||||
|
# first try utf-8 encoding
|
||||||
|
http_headers_buff = http_headers_buff.encode('utf-8')
|
||||||
|
except:
|
||||||
|
# then, fall back to latin-1
|
||||||
|
http_headers_buff = http_headers_buff.encode('latin-1')
|
||||||
|
|
||||||
except: #pragma: no cover
|
except: #pragma: no cover
|
||||||
#PY 2
|
#PY 2
|
||||||
resp_headers = orig_resp.msg.headers
|
resp_headers = orig_resp.msg.headers
|
||||||
@ -374,8 +385,8 @@ class LiveWebLoader(BaseLoader):
|
|||||||
else:
|
else:
|
||||||
http_headers_buff += line
|
http_headers_buff += line
|
||||||
|
|
||||||
http_headers_buff += '\r\n'
|
# if python2, already byte headers, so leave as is
|
||||||
http_headers_buff = http_headers_buff.encode('latin-1')
|
http_headers_buff += '\r\n'
|
||||||
|
|
||||||
try:
|
try:
|
||||||
fp = upstream_res._fp.fp
|
fp = upstream_res._fp.fp
|
||||||
|
@ -22,11 +22,13 @@ def fmod_sl(request):
|
|||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
class BaseConfigTest(BaseTestClass):
|
class BaseConfigTest(BaseTestClass):
|
||||||
|
lint_app = True
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_test_app(cls, config_file, custom_config=None):
|
def get_test_app(cls, config_file, custom_config=None):
|
||||||
config_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), config_file)
|
config_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), config_file)
|
||||||
app = FrontEndApp(config_file=config_file, custom_config=custom_config)
|
app = FrontEndApp(config_file=config_file, custom_config=custom_config)
|
||||||
return app, webtest.TestApp(app)
|
return app, webtest.TestApp(app, lint=cls.lint_app)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def setup_class(cls, config_file, include_non_frame=True, custom_config=None):
|
def setup_class(cls, config_file, include_non_frame=True, custom_config=None):
|
||||||
|
@ -1,14 +1,44 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from .base_config_test import BaseConfigTest, fmod_sl
|
from .base_config_test import BaseConfigTest, fmod_sl
|
||||||
from pywb.warcserver.test.testutils import HttpBinLiveTests
|
from pywb.warcserver.test.testutils import HttpBinLiveTests
|
||||||
|
|
||||||
|
from pywb.utils.geventserver import GeventServer
|
||||||
import pytest
|
import pytest
|
||||||
import sys
|
import sys
|
||||||
|
import six
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
def header_test_server(environ, start_response):
|
||||||
|
body = b'body'
|
||||||
|
value = u'⛄'
|
||||||
|
value = value.encode('utf-8')
|
||||||
|
if six.PY3:
|
||||||
|
value = value.decode('latin-1')
|
||||||
|
|
||||||
|
headers = []
|
||||||
|
if environ['PATH_INFO'] == '/unicode':
|
||||||
|
headers = [('Content-Length', str(len(body))),
|
||||||
|
('x-utf-8', value)]
|
||||||
|
|
||||||
|
start_response('200 OK', headers=headers)
|
||||||
|
return [body]
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
class TestLiveRewriter(HttpBinLiveTests, BaseConfigTest):
|
class TestLiveRewriter(HttpBinLiveTests, BaseConfigTest):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setup_class(cls):
|
def setup_class(cls):
|
||||||
|
cls.lint_app = False
|
||||||
super(TestLiveRewriter, cls).setup_class('config_test.yaml')
|
super(TestLiveRewriter, cls).setup_class('config_test.yaml')
|
||||||
|
cls.test_serv = GeventServer(header_test_server)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def teardown_class(cls):
|
||||||
|
cls.test_serv.stop()
|
||||||
|
super(TestLiveRewriter, cls).teardown_class()
|
||||||
|
|
||||||
def test_live_live_1(self, fmod_sl):
|
def test_live_live_1(self, fmod_sl):
|
||||||
headers = [('User-Agent', 'python'), ('Referer', 'http://localhost:80/live/other.example.com')]
|
headers = [('User-Agent', 'python'), ('Referer', 'http://localhost:80/live/other.example.com')]
|
||||||
@ -58,6 +88,15 @@ class TestLiveRewriter(HttpBinLiveTests, BaseConfigTest):
|
|||||||
assert resp.headers['Content-Length'] == '90'
|
assert resp.headers['Content-Length'] == '90'
|
||||||
assert resp.headers['Content-Range'] == 'bytes 0-89/90'
|
assert resp.headers['Content-Range'] == 'bytes 0-89/90'
|
||||||
|
|
||||||
|
def test_custom_unicode_header(self, fmod_sl):
|
||||||
|
value = u'⛄'
|
||||||
|
value = value.encode('utf-8')
|
||||||
|
if six.PY3:
|
||||||
|
value = value.decode('latin-1')
|
||||||
|
|
||||||
|
resp = self.get('/live/{0}http://localhost:%s/unicode' % self.test_serv.port, fmod_sl)
|
||||||
|
assert resp.headers['x-utf-8'] == value
|
||||||
|
|
||||||
def test_live_live_frame(self):
|
def test_live_live_frame(self):
|
||||||
resp = self.testapp.get('/live/http://example.com/')
|
resp = self.testapp.get('/live/http://example.com/')
|
||||||
assert resp.status_int == 200
|
assert resp.status_int == 200
|
||||||
|
Loading…
x
Reference in New Issue
Block a user