1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 08:04:49 +01:00

recorder: split up _open_file() into get_new_filename() and allow_new_file() to customize skipping recording by returning false

from allow_new_file()
create_warcinfo_record() - switch to dict args over kwargs, update tests
This commit is contained in:
Ilya Kreymer 2016-07-30 13:11:12 -04:00
parent 498f87fb54
commit 1b09015954
2 changed files with 18 additions and 6 deletions

View File

@ -471,7 +471,7 @@ class TestRecorder(LiveServerTests, FakeRedisTests, TempDirTests, BaseTestClass)
'format': 'WARC File Format 1.0',
'json-metadata': json.dumps({'foo': 'bar'})}
record = simplewriter.create_warcinfo_record('testfile.warc.gz', **params)
record = simplewriter.create_warcinfo_record('testfile.warc.gz', params)
simplewriter.write_record(record)
buff = simplewriter.get_buffer()
assert isinstance(buff, bytes)

View File

@ -152,7 +152,7 @@ class BaseWARCWriter(object):
return record_type, record
def create_warcinfo_record(self, filename, **kwargs):
def create_warcinfo_record(self, filename, info):
warc_headers = {}
warc_headers['WARC-Record-ID'] = self._make_warc_id()
warc_headers['WARC-Type'] = 'warcinfo'
@ -161,7 +161,7 @@ class BaseWARCWriter(object):
warc_headers['WARC-Date'] = datetime_to_iso_date(datetime.datetime.utcnow())
warcinfo = BytesIO()
for n, v in six.iteritems(kwargs):
for n, v in six.iteritems(info):
self._header(warcinfo, n, v)
warcinfo.seek(0)
@ -344,7 +344,7 @@ class MultiFileWARCWriter(BaseWARCWriter):
self.fh_cache = {}
def _open_file(self, dir_, params):
def get_new_filename(self, dir_, params):
timestamp = timestamp20_now()
randstr = base64.b32encode(os.urandom(5)).decode('utf-8')
@ -354,6 +354,12 @@ class MultiFileWARCWriter(BaseWARCWriter):
timestamp=timestamp,
random=randstr)
return filename
def allow_new_file(self, filename, params):
return True
def _open_file(self, filename, params):
path, name = os.path.split(filename)
try:
@ -366,7 +372,7 @@ class MultiFileWARCWriter(BaseWARCWriter):
if self.dedup_index:
self.dedup_index.add_warc_file(filename, params)
return fh, filename
return fh
def _close_file(self, fh):
fcntl.flock(fh, fcntl.LOCK_UN)
@ -415,7 +421,13 @@ class MultiFileWARCWriter(BaseWARCWriter):
out, filename = result
is_new = False
else:
out, filename = self._open_file(full_dir, params)
filename = self.get_new_filename(full_dir, params)
if not self.allow_new_file(filename, params):
return
out = self._open_file(filename, params)
is_new = True
try: