use gdbm instead of anydbm, since gdbm has sync() and hopefully is available everywhere(?)

This commit is contained in:
Noah Levitt 2013-11-05 18:39:51 -08:00
parent 41b1db79e5
commit 8b8124503a
2 changed files with 10 additions and 11 deletions

View File

@ -16,7 +16,11 @@ if __name__ == "__main__":
sys.stderr.write("usage: {} DBM_FILE\n".format(sys.argv[0]))
exit(1)
db = anydbm.open(sys.argv[1])
for key in db:
print("{}:{}".format(key, db[key]))
# import whichdb
# which = whichdb.whichdb(sys.argv[1])
# print('{} is a {} db'.format(sys.argv[1], which))
db = anydbm.open(sys.argv[1])
for key in db.keys():
print("{}:{}".format(key, db[key]))

View File

@ -28,9 +28,9 @@ import signal
import time
import tempfile
import base64
import anydbm
import json
import traceback
import gdbm
class CertificateAuthority(object):
@ -552,17 +552,14 @@ class DedupDb:
else:
logging.info('creating new deduplication database {}'.format(dbm_file))
self.db = anydbm.open(dbm_file, 'c')
self.db = gdbm.open(dbm_file, 'c')
def close(self):
self.db.close()
def sync(self):
# XXX depends on db impl?
self.db.sync()
def save(self, key, response_record, offset):
record_id = response_record.get_header(warctools.WarcRecord.ID)
url = response_record.get_header(warctools.WarcRecord.URL)
@ -858,17 +855,15 @@ class PlaybackIndexDb:
else:
logging.info('creating new playback index database {}'.format(dbm_file))
self.db = anydbm.open(dbm_file, 'c')
self.db = gdbm.open(dbm_file, 'c')
def close(self):
self.db.close()
def sync(self):
# XXX depends on db impl?
self.db.sync()
def save(self, warcfile, recordset, offset):
response_record = recordset[0]
# XXX canonicalize url?