mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
warc: add ability to set read block size (def 16384) in archiveiterator
This commit is contained in:
parent
5be65f2945
commit
841fd3f7b4
@ -32,12 +32,11 @@ class ArchiveIterator(object):
|
|||||||
|
|
||||||
self.member_info = None
|
self.member_info = None
|
||||||
|
|
||||||
def iter_records(self):
|
def iter_records(self, block_size=16384):
|
||||||
""" iterate over each record
|
""" iterate over each record
|
||||||
"""
|
"""
|
||||||
|
|
||||||
decomp_type = 'gzip'
|
decomp_type = 'gzip'
|
||||||
block_size = 16384
|
|
||||||
|
|
||||||
self.reader = DecompressingBufferedReader(self.fh,
|
self.reader = DecompressingBufferedReader(self.fh,
|
||||||
block_size=block_size)
|
block_size=block_size)
|
||||||
@ -204,8 +203,9 @@ class ArchiveIndexEntry(object):
|
|||||||
def create_record_iter(arcv_iter, options):
|
def create_record_iter(arcv_iter, options):
|
||||||
append_post = options.get('append_post')
|
append_post = options.get('append_post')
|
||||||
include_all = options.get('include_all')
|
include_all = options.get('include_all')
|
||||||
|
block_size = options.get('block_size', 16384)
|
||||||
|
|
||||||
for record in arcv_iter.iter_records():
|
for record in arcv_iter.iter_records(block_size):
|
||||||
entry = None
|
entry = None
|
||||||
|
|
||||||
if not include_all and (record.status_headers.get_statuscode() == '-'):
|
if not include_all and (record.status_headers.get_statuscode() == '-'):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user