From 8b8124503a974e1ec8984c4db02fe1549a5a529a Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Tue, 5 Nov 2013 18:39:51 -0800 Subject: [PATCH] use gdbm instead of anydbm, since gdbm has sync() and hopefully is available everywhere(?) --- dump-anydbm.py | 10 +++++++--- warcprox.py | 11 +++-------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/dump-anydbm.py b/dump-anydbm.py index 44403b6..d0a0405 100755 --- a/dump-anydbm.py +++ b/dump-anydbm.py @@ -16,7 +16,11 @@ if __name__ == "__main__": sys.stderr.write("usage: {} DBM_FILE\n".format(sys.argv[0])) exit(1) - db = anydbm.open(sys.argv[1]) - for key in db: - print("{}:{}".format(key, db[key])) + # import whichdb + # which = whichdb.whichdb(sys.argv[1]) + # print('{} is a {} db'.format(sys.argv[1], which)) + db = anydbm.open(sys.argv[1]) + + for key in db.keys(): + print("{}:{}".format(key, db[key])) diff --git a/warcprox.py b/warcprox.py index 90a38b8..453880c 100755 --- a/warcprox.py +++ b/warcprox.py @@ -28,9 +28,9 @@ import signal import time import tempfile import base64 -import anydbm import json import traceback +import gdbm class CertificateAuthority(object): @@ -552,17 +552,14 @@ class DedupDb: else: logging.info('creating new deduplication database {}'.format(dbm_file)) - self.db = anydbm.open(dbm_file, 'c') - + self.db = gdbm.open(dbm_file, 'c') def close(self): self.db.close() def sync(self): - # XXX depends on db impl? self.db.sync() - def save(self, key, response_record, offset): record_id = response_record.get_header(warctools.WarcRecord.ID) url = response_record.get_header(warctools.WarcRecord.URL) @@ -858,17 +855,15 @@ class PlaybackIndexDb: else: logging.info('creating new playback index database {}'.format(dbm_file)) - self.db = anydbm.open(dbm_file, 'c') + self.db = gdbm.open(dbm_file, 'c') def close(self): self.db.close() def sync(self): - # XXX depends on db impl? self.db.sync() - def save(self, warcfile, recordset, offset): response_record = recordset[0] # XXX canonicalize url?