1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

HEAD requests: fix pywb recording & replay of HEAD requests (force payload of 0 instead of content-length if HEAD request from live web)

tests: fix socks-proxy test to fast-fail to a random unused port to detect proxy hook is enabled
This commit is contained in:
Ilya Kreymer 2018-01-29 16:34:25 -08:00
parent 273b3eec30
commit a954a5470f
4 changed files with 40 additions and 11 deletions

View File

@ -389,8 +389,12 @@ class LiveWebLoader(BaseLoader):
warc_headers['Content-Type'] = 'application/http; msgtype=response' warc_headers['Content-Type'] = 'application/http; msgtype=response'
if method == 'HEAD':
content_len = 0
else:
content_len = upstream_res.headers.get('Content-Length', -1)
self._set_content_len(upstream_res.headers.get('Content-Length', -1), self._set_content_len(content_len,
warc_headers, warc_headers,
len(http_headers_buff)) len(http_headers_buff))

View File

@ -29,6 +29,11 @@ class TestLiveRewriter(BaseConfigTest):
assert '"http://httpbin.org/anything/abc##xyz"' in resp.text assert '"http://httpbin.org/anything/abc##xyz"' in resp.text
assert resp.status_int == 200 assert resp.status_int == 200
def test_live_head(self, fmod_sl):
resp = self.head('/live/{0}httpbin.org/anything/foo', fmod_sl)
#assert '"http://httpbin.org/anything/foo"' in resp.text
assert resp.status_int == 200
def test_live_live_frame(self): def test_live_live_frame(self):
resp = self.testapp.get('/live/http://example.com/') resp = self.testapp.get('/live/http://example.com/')
assert resp.status_int == 200 assert resp.status_int == 200

View File

@ -32,6 +32,11 @@ class TestRecordReplay(CollsDirMixin, BaseConfigTest):
res = self.testapp.get('/test/record/mp_/http://httpbin.org/get?A=B') res = self.testapp.get('/test/record/mp_/http://httpbin.org/get?A=B')
assert '"A": "B"' in res.text assert '"A": "B"' in res.text
def test_record_head(self):
res = self.testapp.head('/test/record/mp_/http://httpbin.org/get?A=B')
assert res.status_code == 200
assert res.text == ''
def test_replay_1(self, fmod): def test_replay_1(self, fmod):
self.ensure_empty() self.ensure_empty()
@ -39,6 +44,13 @@ class TestRecordReplay(CollsDirMixin, BaseConfigTest):
res = self.get('/test/{0}http://httpbin.org/get?A=B', fmod_slash) res = self.get('/test/{0}http://httpbin.org/get?A=B', fmod_slash)
assert '"A": "B"' in res.text assert '"A": "B"' in res.text
def test_replay_head(self, fmod):
fmod_slash = fmod + '/' if fmod else ''
res = self.testapp.head('/test/{0}http://httpbin.org/get?A=B'.format(fmod_slash))
assert res.status_code == 200
assert res.text == ''
def test_record_2(self): def test_record_2(self):
res = self.testapp.get('/test2/record/mp_/http://httpbin.org/get?C=D') res = self.testapp.get('/test2/record/mp_/http://httpbin.org/get?C=D')
assert '"C": "D"' in res.text assert '"C": "D"' in res.text
@ -87,21 +99,29 @@ class TestRecordReplay(CollsDirMixin, BaseConfigTest):
cdxj_lines = [json.loads(line) for line in res.text.rstrip().split('\n')] cdxj_lines = [json.loads(line) for line in res.text.rstrip().split('\n')]
assert len(cdxj_lines) == 3 assert len(cdxj_lines) == 4
assert cdxj_lines[0]['url'] == 'http://httpbin.org/get?A=B' assert cdxj_lines[0]['url'] == 'http://httpbin.org/get?A=B'
assert cdxj_lines[1]['url'] == 'http://httpbin.org/get?C=D' assert cdxj_lines[1]['url'] == 'http://httpbin.org/get?A=B'
assert cdxj_lines[2]['url'] == 'http://httpbin.org/get?C=D' assert cdxj_lines[2]['url'] == 'http://httpbin.org/get?C=D'
assert cdxj_lines[3]['url'] == 'http://httpbin.org/get?C=D'
assert cdxj_lines[0]['urlkey'] == 'org,httpbin)/get?__pywb_method=head&a=b'
assert cdxj_lines[1]['urlkey'] == 'org,httpbin)/get?a=b'
assert cdxj_lines[2]['urlkey'] == 'org,httpbin)/get?c=d'
assert cdxj_lines[3]['urlkey'] == 'org,httpbin)/get?c=d'
assert cdxj_lines[0]['source'] == to_path('test/indexes/autoindex.cdxj') assert cdxj_lines[0]['source'] == to_path('test/indexes/autoindex.cdxj')
assert cdxj_lines[1]['source'] == to_path('test2/indexes/autoindex.cdxj') assert cdxj_lines[1]['source'] == to_path('test/indexes/autoindex.cdxj')
assert cdxj_lines[2]['source'] == to_path('test/indexes/autoindex.cdxj') assert cdxj_lines[2]['source'] == to_path('test2/indexes/autoindex.cdxj')
assert cdxj_lines[3]['source'] == to_path('test/indexes/autoindex.cdxj')
assert cdxj_lines[0]['source-coll'] == 'test' assert cdxj_lines[0]['source-coll'] == 'test'
assert cdxj_lines[1]['source-coll'] == 'test2' assert cdxj_lines[1]['source-coll'] == 'test'
assert cdxj_lines[2]['source-coll'] == 'test' assert cdxj_lines[2]['source-coll'] == 'test2'
assert cdxj_lines[3]['source-coll'] == 'test'
assert cdxj_lines[0]['filename'] == cdxj_lines[2]['filename'] assert cdxj_lines[1]['filename'] == cdxj_lines[3]['filename']
def test_timemap_all_coll(self): def test_timemap_all_coll(self):
res = self.testapp.get('/all/timemap/link/http://httpbin.org/get?C=D') res = self.testapp.get('/all/timemap/link/http://httpbin.org/get?C=D')

View File

@ -12,7 +12,7 @@ class TestSOCKSProxy(BaseConfigTest):
@classmethod @classmethod
def setup_class(cls): def setup_class(cls):
os.environ['SOCKS_HOST'] = 'localhost' os.environ['SOCKS_HOST'] = 'localhost'
os.environ['SOCKS_PORT'] = '8080' os.environ['SOCKS_PORT'] = '0'
pywb_http.patch_socks() pywb_http.patch_socks()
import pywb.warcserver.resource.responseloader import pywb.warcserver.resource.responseloader
@ -25,8 +25,8 @@ class TestSOCKSProxy(BaseConfigTest):
super(TestSOCKSProxy, cls).teardown_class() super(TestSOCKSProxy, cls).teardown_class()
def test_socks_proxy_set(self): def test_socks_proxy_set(self):
assert pywb_http.SOCKS_PROXIES == {'http': 'socks5h://localhost:8080', assert pywb_http.SOCKS_PROXIES == {'http': 'socks5h://localhost:0',
'https': 'socks5h://localhost:8080' 'https': 'socks5h://localhost:0'
} }
def test_socks_attempt_connect(self, fmod_sl): def test_socks_attempt_connect(self, fmod_sl):