1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

update to wario1.1

archiveindexer: explicitly consume content for each record
This commit is contained in:
Ilya Kreymer 2017-03-10 10:05:39 -08:00
parent af7bbfd6e1
commit 7a8fed2681
2 changed files with 11 additions and 2 deletions

View File

@ -1,6 +1,8 @@
from pywb.utils.canonicalize import canonicalize
from pywb.utils.loaders import extract_post_query, append_post_query
from pywb.webagg.utils import BUFF_SIZE
from warcio.timeutils import iso_date_to_timestamp
from warcio.archiveiterator import ArchiveIterator
@ -188,7 +190,14 @@ class DefaultRecordParser(object):
entry.record = record
self.begin_payload(compute_digest, entry)
raw_iter.read_to_end(record, self.handle_payload)
while True:
buff = record.raw_stream.read(BUFF_SIZE)
if not buff:
break
self.handle_payload(buff)
raw_iter.read_to_end(record)
entry.set_rec_info(*raw_iter.member_info)
self.end_payload(entry)

View File

@ -1,5 +1,5 @@
six
warcio
warcio==1.1
chardet
requests
redis