mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
add payload digest header to revisit records
This commit is contained in:
parent
0eb2917e50
commit
965853f4ab
@ -75,6 +75,7 @@ class WarcWriter:
|
|||||||
refers_to=dedup_info['i'],
|
refers_to=dedup_info['i'],
|
||||||
refers_to_target_uri=dedup_info['u'],
|
refers_to_target_uri=dedup_info['u'],
|
||||||
refers_to_date=dedup_info['d'],
|
refers_to_date=dedup_info['d'],
|
||||||
|
payload_digest=self.digest_str(recorded_url.response_recorder.payload_digest),
|
||||||
profile=warctools.WarcRecord.PROFILE_IDENTICAL_PAYLOAD_DIGEST,
|
profile=warctools.WarcRecord.PROFILE_IDENTICAL_PAYLOAD_DIGEST,
|
||||||
content_type=hanzo.httptools.ResponseMessage.CONTENT_TYPE,
|
content_type=hanzo.httptools.ResponseMessage.CONTENT_TYPE,
|
||||||
remote_ip=recorded_url.remote_ip)
|
remote_ip=recorded_url.remote_ip)
|
||||||
@ -104,7 +105,7 @@ class WarcWriter:
|
|||||||
def build_warc_record(self, url, warc_date=None, recorder=None, data=None,
|
def build_warc_record(self, url, warc_date=None, recorder=None, data=None,
|
||||||
concurrent_to=None, warc_type=None, content_type=None, remote_ip=None,
|
concurrent_to=None, warc_type=None, content_type=None, remote_ip=None,
|
||||||
profile=None, refers_to=None, refers_to_target_uri=None,
|
profile=None, refers_to=None, refers_to_target_uri=None,
|
||||||
refers_to_date=None):
|
refers_to_date=None, payload_digest=None):
|
||||||
|
|
||||||
if warc_date is None:
|
if warc_date is None:
|
||||||
warc_date = warctools.warc.warc_datetime_str(datetime.utcnow())
|
warc_date = warctools.warc.warc_datetime_str(datetime.utcnow())
|
||||||
@ -131,6 +132,8 @@ class WarcWriter:
|
|||||||
headers.append((warctools.WarcRecord.CONCURRENT_TO, concurrent_to))
|
headers.append((warctools.WarcRecord.CONCURRENT_TO, concurrent_to))
|
||||||
if content_type is not None:
|
if content_type is not None:
|
||||||
headers.append((warctools.WarcRecord.CONTENT_TYPE, content_type))
|
headers.append((warctools.WarcRecord.CONTENT_TYPE, content_type))
|
||||||
|
if payload_digest is not None:
|
||||||
|
headers.append((warctools.WarcRecord.PAYLOAD_DIGEST, payload_digest))
|
||||||
|
|
||||||
if recorder is not None:
|
if recorder is not None:
|
||||||
headers.append((warctools.WarcRecord.CONTENT_LENGTH, str(len(recorder)).encode('latin1')))
|
headers.append((warctools.WarcRecord.CONTENT_LENGTH, str(len(recorder)).encode('latin1')))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user