diff --git a/pywb/recorder/multifilewarcwriter.py b/pywb/recorder/multifilewarcwriter.py index 9c0396ef..06f980bb 100644 --- a/pywb/recorder/multifilewarcwriter.py +++ b/pywb/recorder/multifilewarcwriter.py @@ -44,6 +44,9 @@ class MultiFileWARCWriter(BaseWARCWriter): if not self.dedup_index or record.rec_type != 'response': return record + # ensure payload digest is computed at this point + self.ensure_digest(record, block=False, payload=True) + try: url = record.rec_headers.get_header('WARC-Target-URI') digest = record.rec_headers.get_header('WARC-Payload-Digest') diff --git a/pywb/recorder/recorderapp.py b/pywb/recorder/recorderapp.py index 8641fc61..56d897ef 100644 --- a/pywb/recorder/recorderapp.py +++ b/pywb/recorder/recorderapp.py @@ -2,6 +2,8 @@ from pywb.webagg.utils import StreamIter, BUFF_SIZE from pywb.webagg.utils import ParamFormatter, res_template from pywb.webagg.inputrequest import DirectWSGIInputRequest +from warcio.recordloader import ArcWarcRecordLoader + from pywb.recorder.filters import SkipRangeRequestFilter, CollectionFilter from six.moves.urllib.parse import parse_qsl @@ -69,7 +71,7 @@ class RecorderApp(object): resp_length = resp_pay.tell() resp_pay.seek(0) - resp = self.writer.create_record_from_stream(resp_pay, resp_length) + resp = ArcWarcRecordLoader().parse_record_stream(resp_pay) if resp.rec_type == 'response': uri = resp.rec_headers.get_header('WARC-Target-Uri') diff --git a/requirements.txt b/requirements.txt index 5e6367b9..61b4653a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ six -warcio==1.2 +warcio==1.3 chardet requests redis