mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
warcserver: resource load: only read headers for self-redirect for response or revisit records
tests: add test with resource record (new warc/cdxj) to ensure correct read of resource records
This commit is contained in:
parent
8a107b0f6f
commit
9eba59d8b4
@ -152,19 +152,19 @@ StatusAndHeadersParserException: Expected Status Line starting with ['HTTP/1.0',
|
||||
>>> cli_lines(['--sort', '-', TEST_WARC_DIR])
|
||||
com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 example-url-agnostic-revisit.warc.gz
|
||||
urn:X-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX5CDCB6MN6UN6 - - 557 3181 example-wpull.warc.gz
|
||||
Total: 211
|
||||
Total: 212
|
||||
|
||||
# test sort, multiple inputs, recursive, from base test dir
|
||||
>>> cli_lines(['--sort', '-r', '-', get_test_dir()])
|
||||
com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 warcs/example-url-agnostic-revisit.warc.gz
|
||||
urn:X-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX5CDCB6MN6UN6 - - 557 3181 warcs/example-wpull.warc.gz
|
||||
Total: 211
|
||||
Total: 212
|
||||
|
||||
# test sort, 9-field, multiple inputs, all records + post query
|
||||
>>> cli_lines(['--sort', '-a', '-p', '-9', TEST_WARC_DIR])
|
||||
com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - 355 example-url-agnostic-revisit.warc.gz
|
||||
urn:X-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX5CDCB6MN6UN6 - 3181 example-wpull.warc.gz
|
||||
Total: 406
|
||||
Total: 407
|
||||
|
||||
# test writing to stdout
|
||||
>>> cli_lines(['-', TEST_WARC_DIR + 'example.warc.gz'])
|
||||
@ -188,7 +188,7 @@ Total: 4
|
||||
>>> cli_lines(['--sort', '--dir-root', get_test_dir() + 'other/', TEST_WARC_DIR])
|
||||
com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 ../warcs/example-url-agnostic-revisit.warc.gz
|
||||
urn:X-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX5CDCB6MN6UN6 - - 557 3181 ../warcs/example-wpull.warc.gz
|
||||
Total: 211
|
||||
Total: 212
|
||||
|
||||
# test writing to temp dir, also use unicode filename
|
||||
>>> cli_lines_with_dir(TEST_WARC_DIR + 'example.warc.gz')
|
||||
|
@ -195,15 +195,17 @@ class WARCPathLoader(DefaultResolverMixin, BaseLoader):
|
||||
failed_files,
|
||||
local_index_query))
|
||||
|
||||
status = cdx.get('status')
|
||||
if not status or status.startswith('3'):
|
||||
http_headers = self.headers_parser.parse(payload.raw_stream)
|
||||
self.raise_on_self_redirect(params, cdx,
|
||||
http_headers.get_statuscode(),
|
||||
http_headers.get_header('Location'))
|
||||
http_headers_buff = http_headers.to_bytes()
|
||||
else:
|
||||
http_headers_buff = None
|
||||
http_headers_buff = None
|
||||
if payload.rec_type in ('response', 'revisit'):
|
||||
status = cdx.get('status')
|
||||
# status may not be set for 'revisit'
|
||||
if not status or status.startswith('3'):
|
||||
http_headers = self.headers_parser.parse(payload.raw_stream)
|
||||
self.raise_on_self_redirect(params, cdx,
|
||||
http_headers.get_statuscode(),
|
||||
http_headers.get_header('Location'))
|
||||
|
||||
http_headers_buff = http_headers.to_bytes()
|
||||
|
||||
warc_headers = payload.rec_headers
|
||||
|
||||
|
1
sample_archive/cdx/httpbin-resource.cdxj
Normal file
1
sample_archive/cdx/httpbin-resource.cdxj
Normal file
@ -0,0 +1 @@
|
||||
org,httpbin)/anything/resource.json 20171130220904 {"filename":"httpbin-resource.warc.gz","digest":"UQ3W6RIQVJO6ZEL55355BJODG2DMWBPH","length":"465","offset":"0","mime":"application/json","url":"http://httpbin.org/anything/resource.json"}
|
BIN
sample_archive/warcs/httpbin-resource.warc.gz
Normal file
BIN
sample_archive/warcs/httpbin-resource.warc.gz
Normal file
Binary file not shown.
@ -101,6 +101,10 @@ class TestWbIntegration(BaseConfigTest):
|
||||
csp = "default-src 'unsafe-eval' 'unsafe-inline' 'self' data: blob: mediastream: ws: wss: ; form-action 'self'"
|
||||
assert resp.headers['Content-Security-Policy'] == csp
|
||||
|
||||
def test_replay_resource(self, fmod):
|
||||
resp = self.get('/pywb/20171122230223{0}/http://httpbin.org/anything/resource.json', fmod)
|
||||
assert resp.headers['Content-Type'] == 'application/json'
|
||||
|
||||
def test_replay_fuzzy_1(self, fmod):
|
||||
resp = self.get('/pywb/20140127171238{0}/http://www.iana.org/?_=123', fmod)
|
||||
assert resp.status_int == 200
|
||||
|
Loading…
x
Reference in New Issue
Block a user