mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
use gdbm instead of anydbm, since gdbm has sync() and hopefully is available everywhere(?)
This commit is contained in:
parent
41b1db79e5
commit
8b8124503a
@ -16,7 +16,11 @@ if __name__ == "__main__":
|
|||||||
sys.stderr.write("usage: {} DBM_FILE\n".format(sys.argv[0]))
|
sys.stderr.write("usage: {} DBM_FILE\n".format(sys.argv[0]))
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
db = anydbm.open(sys.argv[1])
|
# import whichdb
|
||||||
for key in db:
|
# which = whichdb.whichdb(sys.argv[1])
|
||||||
print("{}:{}".format(key, db[key]))
|
# print('{} is a {} db'.format(sys.argv[1], which))
|
||||||
|
|
||||||
|
db = anydbm.open(sys.argv[1])
|
||||||
|
|
||||||
|
for key in db.keys():
|
||||||
|
print("{}:{}".format(key, db[key]))
|
||||||
|
11
warcprox.py
11
warcprox.py
@ -28,9 +28,9 @@ import signal
|
|||||||
import time
|
import time
|
||||||
import tempfile
|
import tempfile
|
||||||
import base64
|
import base64
|
||||||
import anydbm
|
|
||||||
import json
|
import json
|
||||||
import traceback
|
import traceback
|
||||||
|
import gdbm
|
||||||
|
|
||||||
class CertificateAuthority(object):
|
class CertificateAuthority(object):
|
||||||
|
|
||||||
@ -552,17 +552,14 @@ class DedupDb:
|
|||||||
else:
|
else:
|
||||||
logging.info('creating new deduplication database {}'.format(dbm_file))
|
logging.info('creating new deduplication database {}'.format(dbm_file))
|
||||||
|
|
||||||
self.db = anydbm.open(dbm_file, 'c')
|
self.db = gdbm.open(dbm_file, 'c')
|
||||||
|
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
self.db.close()
|
self.db.close()
|
||||||
|
|
||||||
def sync(self):
|
def sync(self):
|
||||||
# XXX depends on db impl?
|
|
||||||
self.db.sync()
|
self.db.sync()
|
||||||
|
|
||||||
|
|
||||||
def save(self, key, response_record, offset):
|
def save(self, key, response_record, offset):
|
||||||
record_id = response_record.get_header(warctools.WarcRecord.ID)
|
record_id = response_record.get_header(warctools.WarcRecord.ID)
|
||||||
url = response_record.get_header(warctools.WarcRecord.URL)
|
url = response_record.get_header(warctools.WarcRecord.URL)
|
||||||
@ -858,17 +855,15 @@ class PlaybackIndexDb:
|
|||||||
else:
|
else:
|
||||||
logging.info('creating new playback index database {}'.format(dbm_file))
|
logging.info('creating new playback index database {}'.format(dbm_file))
|
||||||
|
|
||||||
self.db = anydbm.open(dbm_file, 'c')
|
self.db = gdbm.open(dbm_file, 'c')
|
||||||
|
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
self.db.close()
|
self.db.close()
|
||||||
|
|
||||||
def sync(self):
|
def sync(self):
|
||||||
# XXX depends on db impl?
|
|
||||||
self.db.sync()
|
self.db.sync()
|
||||||
|
|
||||||
|
|
||||||
def save(self, warcfile, recordset, offset):
|
def save(self, warcfile, recordset, offset):
|
||||||
response_record = recordset[0]
|
response_record = recordset[0]
|
||||||
# XXX canonicalize url?
|
# XXX canonicalize url?
|
||||||
|
Loading…
x
Reference in New Issue
Block a user