mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
do not write incorrect warc-payload-digest to...
... request records see https://github.com/webrecorder/warcio/issues/74#issuecomment-487816378
This commit is contained in:
parent
38d6e4337d
commit
dfc081fff8
2
setup.py
2
setup.py
@ -42,7 +42,7 @@ except:
|
|||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='warcprox',
|
name='warcprox',
|
||||||
version='2.4.8',
|
version='2.4.9',
|
||||||
description='WARC writing MITM HTTP/S proxy',
|
description='WARC writing MITM HTTP/S proxy',
|
||||||
url='https://github.com/internetarchive/warcprox',
|
url='https://github.com/internetarchive/warcprox',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
|
@ -125,48 +125,59 @@ class WarcRecordBuilder:
|
|||||||
headers.append((warctools.WarcRecord.CONCURRENT_TO, concurrent_to))
|
headers.append((warctools.WarcRecord.CONCURRENT_TO, concurrent_to))
|
||||||
if content_type is not None:
|
if content_type is not None:
|
||||||
headers.append((warctools.WarcRecord.CONTENT_TYPE, content_type))
|
headers.append((warctools.WarcRecord.CONTENT_TYPE, content_type))
|
||||||
if payload_digest is not None:
|
|
||||||
headers.append((warctools.WarcRecord.PAYLOAD_DIGEST, payload_digest))
|
|
||||||
# truncated value may be 'length' or 'time'
|
# truncated value may be 'length' or 'time'
|
||||||
if truncated is not None:
|
if truncated is not None:
|
||||||
headers.append((b'WARC-Truncated', truncated))
|
headers.append((b'WARC-Truncated', truncated))
|
||||||
|
if content_length is not None:
|
||||||
|
headers.append((
|
||||||
|
warctools.WarcRecord.CONTENT_LENGTH,
|
||||||
|
str(content_length).encode('latin1')))
|
||||||
|
|
||||||
if recorder is not None:
|
if recorder is not None:
|
||||||
if content_length is not None:
|
if payload_digest is not None:
|
||||||
headers.append((
|
headers.append(
|
||||||
warctools.WarcRecord.CONTENT_LENGTH,
|
(warctools.WarcRecord.PAYLOAD_DIGEST, payload_digest))
|
||||||
str(content_length).encode('latin1')))
|
if content_length is None:
|
||||||
else:
|
|
||||||
headers.append((
|
headers.append((
|
||||||
warctools.WarcRecord.CONTENT_LENGTH,
|
warctools.WarcRecord.CONTENT_LENGTH,
|
||||||
str(len(recorder)).encode('latin1')))
|
str(len(recorder)).encode('latin1')))
|
||||||
headers.append((warctools.WarcRecord.BLOCK_DIGEST,
|
headers.append((warctools.WarcRecord.BLOCK_DIGEST,
|
||||||
warcprox.digest_str(recorder.block_digest, self.base32)))
|
warcprox.digest_str(recorder.block_digest, self.base32)))
|
||||||
recorder.tempfile.seek(0)
|
recorder.tempfile.seek(0)
|
||||||
record = warctools.WarcRecord(headers=headers, content_file=recorder.tempfile)
|
record = warctools.WarcRecord(
|
||||||
|
headers=headers, content_file=recorder.tempfile)
|
||||||
else:
|
else:
|
||||||
if content_length is not None:
|
if content_length is None:
|
||||||
headers.append((
|
|
||||||
warctools.WarcRecord.CONTENT_LENGTH,
|
|
||||||
str(content_length).encode('latin1')))
|
|
||||||
else:
|
|
||||||
headers.append((
|
headers.append((
|
||||||
warctools.WarcRecord.CONTENT_LENGTH,
|
warctools.WarcRecord.CONTENT_LENGTH,
|
||||||
str(len(data)).encode('latin1')))
|
str(len(data)).encode('latin1')))
|
||||||
# no http headers so block digest == payload digest
|
|
||||||
if not payload_digest:
|
block_digest = None
|
||||||
payload_digest = warcprox.digest_str(
|
if not hasattr(data, 'read'):
|
||||||
|
block_digest = warcprox.digest_str(
|
||||||
hashlib.new(self.digest_algorithm, data), self.base32)
|
hashlib.new(self.digest_algorithm, data), self.base32)
|
||||||
headers.append((
|
|
||||||
warctools.WarcRecord.PAYLOAD_DIGEST, payload_digest))
|
if not content_type.lower().startswith(b'application/http'):
|
||||||
headers.append((warctools.WarcRecord.BLOCK_DIGEST, payload_digest))
|
# no http headers, so block digest == payload digest
|
||||||
|
if payload_digest and not block_digest:
|
||||||
|
block_digest = payload_digest
|
||||||
|
elif block_digest and not payload_digest:
|
||||||
|
payload_digest = block_digest
|
||||||
|
|
||||||
|
if block_digest:
|
||||||
|
headers.append(
|
||||||
|
(warctools.WarcRecord.BLOCK_DIGEST, block_digest))
|
||||||
|
if payload_digest:
|
||||||
|
headers.append(
|
||||||
|
(warctools.WarcRecord.PAYLOAD_DIGEST, payload_digest))
|
||||||
|
|
||||||
if hasattr(data, 'read'):
|
if hasattr(data, 'read'):
|
||||||
record = warctools.WarcRecord(
|
record = warctools.WarcRecord(
|
||||||
headers=headers, content_file=data)
|
headers=headers, content_file=data)
|
||||||
else:
|
else:
|
||||||
content_tuple = content_type, data
|
content_tuple = content_type, data
|
||||||
record = warctools.WarcRecord(
|
record = warctools.WarcRecord(
|
||||||
headers=headers, content=content_tuple)
|
headers=headers, content=(content_type, data))
|
||||||
|
|
||||||
return record
|
return record
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user