From a954a5470fe5f2b5cd5404c1ac532dbbcb12ad5c Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Mon, 29 Jan 2018 16:34:25 -0800 Subject: [PATCH] HEAD requests: fix pywb recording & replay of HEAD requests (force payload of 0 instead of content-length if HEAD request from live web) tests: fix socks-proxy test to fast-fail to a random unused port to detect proxy hook is enabled --- pywb/warcserver/resource/responseloader.py | 6 +++- tests/test_live_rewriter.py | 5 ++++ tests/test_record_replay.py | 34 +++++++++++++++++----- tests/test_socks.py | 6 ++-- 4 files changed, 40 insertions(+), 11 deletions(-) diff --git a/pywb/warcserver/resource/responseloader.py b/pywb/warcserver/resource/responseloader.py index ddf615a4..ea64e081 100644 --- a/pywb/warcserver/resource/responseloader.py +++ b/pywb/warcserver/resource/responseloader.py @@ -389,8 +389,12 @@ class LiveWebLoader(BaseLoader): warc_headers['Content-Type'] = 'application/http; msgtype=response' + if method == 'HEAD': + content_len = 0 + else: + content_len = upstream_res.headers.get('Content-Length', -1) - self._set_content_len(upstream_res.headers.get('Content-Length', -1), + self._set_content_len(content_len, warc_headers, len(http_headers_buff)) diff --git a/tests/test_live_rewriter.py b/tests/test_live_rewriter.py index f4ac365d..7353d808 100644 --- a/tests/test_live_rewriter.py +++ b/tests/test_live_rewriter.py @@ -29,6 +29,11 @@ class TestLiveRewriter(BaseConfigTest): assert '"http://httpbin.org/anything/abc##xyz"' in resp.text assert resp.status_int == 200 + def test_live_head(self, fmod_sl): + resp = self.head('/live/{0}httpbin.org/anything/foo', fmod_sl) + #assert '"http://httpbin.org/anything/foo"' in resp.text + assert resp.status_int == 200 + def test_live_live_frame(self): resp = self.testapp.get('/live/http://example.com/') assert resp.status_int == 200 diff --git a/tests/test_record_replay.py b/tests/test_record_replay.py index 6c085383..ca00b2ca 100644 --- a/tests/test_record_replay.py +++ b/tests/test_record_replay.py @@ -32,6 +32,11 @@ class TestRecordReplay(CollsDirMixin, BaseConfigTest): res = self.testapp.get('/test/record/mp_/http://httpbin.org/get?A=B') assert '"A": "B"' in res.text + def test_record_head(self): + res = self.testapp.head('/test/record/mp_/http://httpbin.org/get?A=B') + assert res.status_code == 200 + assert res.text == '' + def test_replay_1(self, fmod): self.ensure_empty() @@ -39,6 +44,13 @@ class TestRecordReplay(CollsDirMixin, BaseConfigTest): res = self.get('/test/{0}http://httpbin.org/get?A=B', fmod_slash) assert '"A": "B"' in res.text + def test_replay_head(self, fmod): + fmod_slash = fmod + '/' if fmod else '' + + res = self.testapp.head('/test/{0}http://httpbin.org/get?A=B'.format(fmod_slash)) + assert res.status_code == 200 + assert res.text == '' + def test_record_2(self): res = self.testapp.get('/test2/record/mp_/http://httpbin.org/get?C=D') assert '"C": "D"' in res.text @@ -87,21 +99,29 @@ class TestRecordReplay(CollsDirMixin, BaseConfigTest): cdxj_lines = [json.loads(line) for line in res.text.rstrip().split('\n')] - assert len(cdxj_lines) == 3 + assert len(cdxj_lines) == 4 assert cdxj_lines[0]['url'] == 'http://httpbin.org/get?A=B' - assert cdxj_lines[1]['url'] == 'http://httpbin.org/get?C=D' + assert cdxj_lines[1]['url'] == 'http://httpbin.org/get?A=B' assert cdxj_lines[2]['url'] == 'http://httpbin.org/get?C=D' + assert cdxj_lines[3]['url'] == 'http://httpbin.org/get?C=D' + + assert cdxj_lines[0]['urlkey'] == 'org,httpbin)/get?__pywb_method=head&a=b' + assert cdxj_lines[1]['urlkey'] == 'org,httpbin)/get?a=b' + assert cdxj_lines[2]['urlkey'] == 'org,httpbin)/get?c=d' + assert cdxj_lines[3]['urlkey'] == 'org,httpbin)/get?c=d' assert cdxj_lines[0]['source'] == to_path('test/indexes/autoindex.cdxj') - assert cdxj_lines[1]['source'] == to_path('test2/indexes/autoindex.cdxj') - assert cdxj_lines[2]['source'] == to_path('test/indexes/autoindex.cdxj') + assert cdxj_lines[1]['source'] == to_path('test/indexes/autoindex.cdxj') + assert cdxj_lines[2]['source'] == to_path('test2/indexes/autoindex.cdxj') + assert cdxj_lines[3]['source'] == to_path('test/indexes/autoindex.cdxj') assert cdxj_lines[0]['source-coll'] == 'test' - assert cdxj_lines[1]['source-coll'] == 'test2' - assert cdxj_lines[2]['source-coll'] == 'test' + assert cdxj_lines[1]['source-coll'] == 'test' + assert cdxj_lines[2]['source-coll'] == 'test2' + assert cdxj_lines[3]['source-coll'] == 'test' - assert cdxj_lines[0]['filename'] == cdxj_lines[2]['filename'] + assert cdxj_lines[1]['filename'] == cdxj_lines[3]['filename'] def test_timemap_all_coll(self): res = self.testapp.get('/all/timemap/link/http://httpbin.org/get?C=D') diff --git a/tests/test_socks.py b/tests/test_socks.py index ba0ba494..31aee3c7 100644 --- a/tests/test_socks.py +++ b/tests/test_socks.py @@ -12,7 +12,7 @@ class TestSOCKSProxy(BaseConfigTest): @classmethod def setup_class(cls): os.environ['SOCKS_HOST'] = 'localhost' - os.environ['SOCKS_PORT'] = '8080' + os.environ['SOCKS_PORT'] = '0' pywb_http.patch_socks() import pywb.warcserver.resource.responseloader @@ -25,8 +25,8 @@ class TestSOCKSProxy(BaseConfigTest): super(TestSOCKSProxy, cls).teardown_class() def test_socks_proxy_set(self): - assert pywb_http.SOCKS_PROXIES == {'http': 'socks5h://localhost:8080', - 'https': 'socks5h://localhost:8080' + assert pywb_http.SOCKS_PROXIES == {'http': 'socks5h://localhost:0', + 'https': 'socks5h://localhost:0' } def test_socks_attempt_connect(self, fmod_sl):