diff --git a/pywb/indexer/test/test_indexing.py b/pywb/indexer/test/test_indexing.py index b2bd6dfe..de79f2e9 100644 --- a/pywb/indexer/test/test_indexing.py +++ b/pywb/indexer/test/test_indexing.py @@ -152,19 +152,19 @@ StatusAndHeadersParserException: Expected Status Line starting with ['HTTP/1.0', >>> cli_lines(['--sort', '-', TEST_WARC_DIR]) com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 example-url-agnostic-revisit.warc.gz urn:X-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX5CDCB6MN6UN6 - - 557 3181 example-wpull.warc.gz -Total: 211 +Total: 212 # test sort, multiple inputs, recursive, from base test dir >>> cli_lines(['--sort', '-r', '-', get_test_dir()]) com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 warcs/example-url-agnostic-revisit.warc.gz urn:X-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX5CDCB6MN6UN6 - - 557 3181 warcs/example-wpull.warc.gz -Total: 211 +Total: 212 # test sort, 9-field, multiple inputs, all records + post query >>> cli_lines(['--sort', '-a', '-p', '-9', TEST_WARC_DIR]) com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - 355 example-url-agnostic-revisit.warc.gz urn:X-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX5CDCB6MN6UN6 - 3181 example-wpull.warc.gz -Total: 406 +Total: 407 # test writing to stdout >>> cli_lines(['-', TEST_WARC_DIR + 'example.warc.gz']) @@ -188,7 +188,7 @@ Total: 4 >>> cli_lines(['--sort', '--dir-root', get_test_dir() + 'other/', TEST_WARC_DIR]) com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 ../warcs/example-url-agnostic-revisit.warc.gz urn:X-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX5CDCB6MN6UN6 - - 557 3181 ../warcs/example-wpull.warc.gz -Total: 211 +Total: 212 # test writing to temp dir, also use unicode filename >>> cli_lines_with_dir(TEST_WARC_DIR + 'example.warc.gz') diff --git a/pywb/warcserver/resource/responseloader.py b/pywb/warcserver/resource/responseloader.py index d45d291f..6b7cccc9 100644 --- a/pywb/warcserver/resource/responseloader.py +++ b/pywb/warcserver/resource/responseloader.py @@ -195,15 +195,17 @@ class WARCPathLoader(DefaultResolverMixin, BaseLoader): failed_files, local_index_query)) - status = cdx.get('status') - if not status or status.startswith('3'): - http_headers = self.headers_parser.parse(payload.raw_stream) - self.raise_on_self_redirect(params, cdx, - http_headers.get_statuscode(), - http_headers.get_header('Location')) - http_headers_buff = http_headers.to_bytes() - else: - http_headers_buff = None + http_headers_buff = None + if payload.rec_type in ('response', 'revisit'): + status = cdx.get('status') + # status may not be set for 'revisit' + if not status or status.startswith('3'): + http_headers = self.headers_parser.parse(payload.raw_stream) + self.raise_on_self_redirect(params, cdx, + http_headers.get_statuscode(), + http_headers.get_header('Location')) + + http_headers_buff = http_headers.to_bytes() warc_headers = payload.rec_headers diff --git a/sample_archive/cdx/httpbin-resource.cdxj b/sample_archive/cdx/httpbin-resource.cdxj new file mode 100644 index 00000000..6d01f921 --- /dev/null +++ b/sample_archive/cdx/httpbin-resource.cdxj @@ -0,0 +1 @@ +org,httpbin)/anything/resource.json 20171130220904 {"filename":"httpbin-resource.warc.gz","digest":"UQ3W6RIQVJO6ZEL55355BJODG2DMWBPH","length":"465","offset":"0","mime":"application/json","url":"http://httpbin.org/anything/resource.json"} diff --git a/sample_archive/warcs/httpbin-resource.warc.gz b/sample_archive/warcs/httpbin-resource.warc.gz new file mode 100644 index 00000000..af3df8f3 Binary files /dev/null and b/sample_archive/warcs/httpbin-resource.warc.gz differ diff --git a/tests/test_integration.py b/tests/test_integration.py index ee5733f4..666ff98f 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -101,6 +101,10 @@ class TestWbIntegration(BaseConfigTest): csp = "default-src 'unsafe-eval' 'unsafe-inline' 'self' data: blob: mediastream: ws: wss: ; form-action 'self'" assert resp.headers['Content-Security-Policy'] == csp + def test_replay_resource(self, fmod): + resp = self.get('/pywb/20171122230223{0}/http://httpbin.org/anything/resource.json', fmod) + assert resp.headers['Content-Type'] == 'application/json' + def test_replay_fuzzy_1(self, fmod): resp = self.get('/pywb/20140127171238{0}/http://www.iana.org/?_=123', fmod) assert resp.status_int == 200