1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

update to warcio==1.3

recorder: use ArcWarcRecordLoader() for parsing response record
multifilewarcwriter: ensure digest is computed before trying to lookup revisits
This commit is contained in:
Ilya Kreymer 2017-05-01 21:50:39 -07:00
parent 58f39f0558
commit 147c3217dd
3 changed files with 7 additions and 2 deletions

View File

@ -44,6 +44,9 @@ class MultiFileWARCWriter(BaseWARCWriter):
if not self.dedup_index or record.rec_type != 'response':
return record
# ensure payload digest is computed at this point
self.ensure_digest(record, block=False, payload=True)
try:
url = record.rec_headers.get_header('WARC-Target-URI')
digest = record.rec_headers.get_header('WARC-Payload-Digest')

View File

@ -2,6 +2,8 @@ from pywb.webagg.utils import StreamIter, BUFF_SIZE
from pywb.webagg.utils import ParamFormatter, res_template
from pywb.webagg.inputrequest import DirectWSGIInputRequest
from warcio.recordloader import ArcWarcRecordLoader
from pywb.recorder.filters import SkipRangeRequestFilter, CollectionFilter
from six.moves.urllib.parse import parse_qsl
@ -69,7 +71,7 @@ class RecorderApp(object):
resp_length = resp_pay.tell()
resp_pay.seek(0)
resp = self.writer.create_record_from_stream(resp_pay, resp_length)
resp = ArcWarcRecordLoader().parse_record_stream(resp_pay)
if resp.rec_type == 'response':
uri = resp.rec_headers.get_header('WARC-Target-Uri')

View File

@ -1,5 +1,5 @@
six
warcio==1.2
warcio==1.3
chardet
requests
redis