mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
update to wario1.1
archiveindexer: explicitly consume content for each record
This commit is contained in:
parent
af7bbfd6e1
commit
7a8fed2681
@ -1,6 +1,8 @@
|
|||||||
from pywb.utils.canonicalize import canonicalize
|
from pywb.utils.canonicalize import canonicalize
|
||||||
from pywb.utils.loaders import extract_post_query, append_post_query
|
from pywb.utils.loaders import extract_post_query, append_post_query
|
||||||
|
|
||||||
|
from pywb.webagg.utils import BUFF_SIZE
|
||||||
|
|
||||||
from warcio.timeutils import iso_date_to_timestamp
|
from warcio.timeutils import iso_date_to_timestamp
|
||||||
from warcio.archiveiterator import ArchiveIterator
|
from warcio.archiveiterator import ArchiveIterator
|
||||||
|
|
||||||
@ -188,7 +190,14 @@ class DefaultRecordParser(object):
|
|||||||
entry.record = record
|
entry.record = record
|
||||||
|
|
||||||
self.begin_payload(compute_digest, entry)
|
self.begin_payload(compute_digest, entry)
|
||||||
raw_iter.read_to_end(record, self.handle_payload)
|
|
||||||
|
while True:
|
||||||
|
buff = record.raw_stream.read(BUFF_SIZE)
|
||||||
|
if not buff:
|
||||||
|
break
|
||||||
|
self.handle_payload(buff)
|
||||||
|
|
||||||
|
raw_iter.read_to_end(record)
|
||||||
|
|
||||||
entry.set_rec_info(*raw_iter.member_info)
|
entry.set_rec_info(*raw_iter.member_info)
|
||||||
self.end_payload(entry)
|
self.end_payload(entry)
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
six
|
six
|
||||||
warcio
|
warcio==1.1
|
||||||
chardet
|
chardet
|
||||||
requests
|
requests
|
||||||
redis
|
redis
|
||||||
|
Loading…
x
Reference in New Issue
Block a user