mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
recorder: split up _open_file() into get_new_filename() and allow_new_file() to customize skipping recording by returning false
from allow_new_file() create_warcinfo_record() - switch to dict args over kwargs, update tests
This commit is contained in:
parent
498f87fb54
commit
1b09015954
@ -471,7 +471,7 @@ class TestRecorder(LiveServerTests, FakeRedisTests, TempDirTests, BaseTestClass)
|
|||||||
'format': 'WARC File Format 1.0',
|
'format': 'WARC File Format 1.0',
|
||||||
'json-metadata': json.dumps({'foo': 'bar'})}
|
'json-metadata': json.dumps({'foo': 'bar'})}
|
||||||
|
|
||||||
record = simplewriter.create_warcinfo_record('testfile.warc.gz', **params)
|
record = simplewriter.create_warcinfo_record('testfile.warc.gz', params)
|
||||||
simplewriter.write_record(record)
|
simplewriter.write_record(record)
|
||||||
buff = simplewriter.get_buffer()
|
buff = simplewriter.get_buffer()
|
||||||
assert isinstance(buff, bytes)
|
assert isinstance(buff, bytes)
|
||||||
|
@ -152,7 +152,7 @@ class BaseWARCWriter(object):
|
|||||||
|
|
||||||
return record_type, record
|
return record_type, record
|
||||||
|
|
||||||
def create_warcinfo_record(self, filename, **kwargs):
|
def create_warcinfo_record(self, filename, info):
|
||||||
warc_headers = {}
|
warc_headers = {}
|
||||||
warc_headers['WARC-Record-ID'] = self._make_warc_id()
|
warc_headers['WARC-Record-ID'] = self._make_warc_id()
|
||||||
warc_headers['WARC-Type'] = 'warcinfo'
|
warc_headers['WARC-Type'] = 'warcinfo'
|
||||||
@ -161,7 +161,7 @@ class BaseWARCWriter(object):
|
|||||||
warc_headers['WARC-Date'] = datetime_to_iso_date(datetime.datetime.utcnow())
|
warc_headers['WARC-Date'] = datetime_to_iso_date(datetime.datetime.utcnow())
|
||||||
|
|
||||||
warcinfo = BytesIO()
|
warcinfo = BytesIO()
|
||||||
for n, v in six.iteritems(kwargs):
|
for n, v in six.iteritems(info):
|
||||||
self._header(warcinfo, n, v)
|
self._header(warcinfo, n, v)
|
||||||
|
|
||||||
warcinfo.seek(0)
|
warcinfo.seek(0)
|
||||||
@ -344,7 +344,7 @@ class MultiFileWARCWriter(BaseWARCWriter):
|
|||||||
|
|
||||||
self.fh_cache = {}
|
self.fh_cache = {}
|
||||||
|
|
||||||
def _open_file(self, dir_, params):
|
def get_new_filename(self, dir_, params):
|
||||||
timestamp = timestamp20_now()
|
timestamp = timestamp20_now()
|
||||||
|
|
||||||
randstr = base64.b32encode(os.urandom(5)).decode('utf-8')
|
randstr = base64.b32encode(os.urandom(5)).decode('utf-8')
|
||||||
@ -354,6 +354,12 @@ class MultiFileWARCWriter(BaseWARCWriter):
|
|||||||
timestamp=timestamp,
|
timestamp=timestamp,
|
||||||
random=randstr)
|
random=randstr)
|
||||||
|
|
||||||
|
return filename
|
||||||
|
|
||||||
|
def allow_new_file(self, filename, params):
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _open_file(self, filename, params):
|
||||||
path, name = os.path.split(filename)
|
path, name = os.path.split(filename)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -366,7 +372,7 @@ class MultiFileWARCWriter(BaseWARCWriter):
|
|||||||
if self.dedup_index:
|
if self.dedup_index:
|
||||||
self.dedup_index.add_warc_file(filename, params)
|
self.dedup_index.add_warc_file(filename, params)
|
||||||
|
|
||||||
return fh, filename
|
return fh
|
||||||
|
|
||||||
def _close_file(self, fh):
|
def _close_file(self, fh):
|
||||||
fcntl.flock(fh, fcntl.LOCK_UN)
|
fcntl.flock(fh, fcntl.LOCK_UN)
|
||||||
@ -415,7 +421,13 @@ class MultiFileWARCWriter(BaseWARCWriter):
|
|||||||
out, filename = result
|
out, filename = result
|
||||||
is_new = False
|
is_new = False
|
||||||
else:
|
else:
|
||||||
out, filename = self._open_file(full_dir, params)
|
filename = self.get_new_filename(full_dir, params)
|
||||||
|
|
||||||
|
if not self.allow_new_file(filename, params):
|
||||||
|
return
|
||||||
|
|
||||||
|
out = self._open_file(filename, params)
|
||||||
|
|
||||||
is_new = True
|
is_new = True
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user