mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
update to warcio==1.3
recorder: use ArcWarcRecordLoader() for parsing response record multifilewarcwriter: ensure digest is computed before trying to lookup revisits
This commit is contained in:
parent
58f39f0558
commit
147c3217dd
@ -44,6 +44,9 @@ class MultiFileWARCWriter(BaseWARCWriter):
|
||||
if not self.dedup_index or record.rec_type != 'response':
|
||||
return record
|
||||
|
||||
# ensure payload digest is computed at this point
|
||||
self.ensure_digest(record, block=False, payload=True)
|
||||
|
||||
try:
|
||||
url = record.rec_headers.get_header('WARC-Target-URI')
|
||||
digest = record.rec_headers.get_header('WARC-Payload-Digest')
|
||||
|
@ -2,6 +2,8 @@ from pywb.webagg.utils import StreamIter, BUFF_SIZE
|
||||
from pywb.webagg.utils import ParamFormatter, res_template
|
||||
from pywb.webagg.inputrequest import DirectWSGIInputRequest
|
||||
|
||||
from warcio.recordloader import ArcWarcRecordLoader
|
||||
|
||||
from pywb.recorder.filters import SkipRangeRequestFilter, CollectionFilter
|
||||
|
||||
from six.moves.urllib.parse import parse_qsl
|
||||
@ -69,7 +71,7 @@ class RecorderApp(object):
|
||||
|
||||
resp_length = resp_pay.tell()
|
||||
resp_pay.seek(0)
|
||||
resp = self.writer.create_record_from_stream(resp_pay, resp_length)
|
||||
resp = ArcWarcRecordLoader().parse_record_stream(resp_pay)
|
||||
|
||||
if resp.rec_type == 'response':
|
||||
uri = resp.rec_headers.get_header('WARC-Target-Uri')
|
||||
|
@ -1,5 +1,5 @@
|
||||
six
|
||||
warcio==1.2
|
||||
warcio==1.3
|
||||
chardet
|
||||
requests
|
||||
redis
|
||||
|
Loading…
x
Reference in New Issue
Block a user