use gdbm instead of anydbm, since gdbm has sync() and hopefully is available everywhere(?)

This commit is contained in:
Noah Levitt 2013-11-05 18:39:51 -08:00
parent 41b1db79e5
commit 8b8124503a
2 changed files with 10 additions and 11 deletions

View File

@ -16,7 +16,11 @@ if __name__ == "__main__":
sys.stderr.write("usage: {} DBM_FILE\n".format(sys.argv[0])) sys.stderr.write("usage: {} DBM_FILE\n".format(sys.argv[0]))
exit(1) exit(1)
db = anydbm.open(sys.argv[1]) # import whichdb
for key in db: # which = whichdb.whichdb(sys.argv[1])
print("{}:{}".format(key, db[key])) # print('{} is a {} db'.format(sys.argv[1], which))
db = anydbm.open(sys.argv[1])
for key in db.keys():
print("{}:{}".format(key, db[key]))

View File

@ -28,9 +28,9 @@ import signal
import time import time
import tempfile import tempfile
import base64 import base64
import anydbm
import json import json
import traceback import traceback
import gdbm
class CertificateAuthority(object): class CertificateAuthority(object):
@ -552,17 +552,14 @@ class DedupDb:
else: else:
logging.info('creating new deduplication database {}'.format(dbm_file)) logging.info('creating new deduplication database {}'.format(dbm_file))
self.db = anydbm.open(dbm_file, 'c') self.db = gdbm.open(dbm_file, 'c')
def close(self): def close(self):
self.db.close() self.db.close()
def sync(self): def sync(self):
# XXX depends on db impl?
self.db.sync() self.db.sync()
def save(self, key, response_record, offset): def save(self, key, response_record, offset):
record_id = response_record.get_header(warctools.WarcRecord.ID) record_id = response_record.get_header(warctools.WarcRecord.ID)
url = response_record.get_header(warctools.WarcRecord.URL) url = response_record.get_header(warctools.WarcRecord.URL)
@ -858,17 +855,15 @@ class PlaybackIndexDb:
else: else:
logging.info('creating new playback index database {}'.format(dbm_file)) logging.info('creating new playback index database {}'.format(dbm_file))
self.db = anydbm.open(dbm_file, 'c') self.db = gdbm.open(dbm_file, 'c')
def close(self): def close(self):
self.db.close() self.db.close()
def sync(self): def sync(self):
# XXX depends on db impl?
self.db.sync() self.db.sync()
def save(self, warcfile, recordset, offset): def save(self, warcfile, recordset, offset):
response_record = recordset[0] response_record = recordset[0]
# XXX canonicalize url? # XXX canonicalize url?