Merge branch 'periodic-commits' of git://github.com/yang/bitrot into yang-periodic-commits
Conflicts: src/bitrot.py
This commit is contained in:
commit
a8faff93e1
@ -30,6 +30,7 @@ import argparse
|
||||
import atexit
|
||||
import datetime
|
||||
import errno
|
||||
import functools
|
||||
import hashlib
|
||||
import os
|
||||
import shutil
|
||||
@ -37,22 +38,28 @@ import sqlite3
|
||||
import stat
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
|
||||
|
||||
CHUNK_SIZE = 16384
|
||||
DEFAULT_CHUNK_SIZE = 16384
|
||||
DOT_THRESHOLD = 200
|
||||
VERSION = (0, 5, 1)
|
||||
|
||||
|
||||
def sha1(path):
|
||||
def sha1(path, chunk_size):
|
||||
digest = hashlib.sha1()
|
||||
with open(path) as f:
|
||||
d = f.read(CHUNK_SIZE)
|
||||
d = f.read(chunk_size)
|
||||
while d:
|
||||
digest.update(d)
|
||||
d = f.read(CHUNK_SIZE)
|
||||
d = f.read(chunk_size)
|
||||
return digest.hexdigest()
|
||||
|
||||
def throttled_commit(conn, commit_interval, last_commit_time):
|
||||
if time.time() - last_commit_time > commit_interval:
|
||||
conn.commit()
|
||||
return time.time()
|
||||
return last_commit_time
|
||||
|
||||
def get_sqlite3_cursor(path, copy=False):
|
||||
if copy:
|
||||
@ -80,7 +87,7 @@ def get_sqlite3_cursor(path, copy=False):
|
||||
return conn
|
||||
|
||||
|
||||
def run(verbosity=1, test=False):
|
||||
def run(verbosity=1, test=False, commit_interval=300, chunk_size=DEFAULT_CHUNK_SIZE):
|
||||
current_dir = b'.' # sic, relative path
|
||||
bitrot_db = os.path.join(current_dir, b'.bitrot.db')
|
||||
conn = get_sqlite3_cursor(bitrot_db, copy=test)
|
||||
@ -113,6 +120,8 @@ def run(verbosity=1, test=False):
|
||||
paths.append(p)
|
||||
total_size += st.st_size
|
||||
paths.sort()
|
||||
last_commit_time = 0
|
||||
tcommit = functools.partial(throttled_commit, conn, commit_interval)
|
||||
for p in paths:
|
||||
st = os.stat(p)
|
||||
new_mtime = int(st.st_mtime)
|
||||
@ -123,7 +132,7 @@ def run(verbosity=1, test=False):
|
||||
sys.stdout.write(size_fmt)
|
||||
sys.stdout.flush()
|
||||
last_reported_size = size_fmt
|
||||
new_sha1 = sha1(p)
|
||||
new_sha1 = sha1(p, chunk_size)
|
||||
update_ts = datetime.datetime.utcnow().strftime(
|
||||
'%Y-%m-%d %H:%M:%S%z'
|
||||
)
|
||||
@ -144,13 +153,14 @@ def run(verbosity=1, test=False):
|
||||
cur.execute('UPDATE bitrot SET mtime=?, path=?, '
|
||||
'timestamp=? WHERE hash=?',
|
||||
(new_mtime, p_uni, update_ts, new_sha1))
|
||||
conn.commit()
|
||||
|
||||
last_commit_time = tcommit(last_commit_time)
|
||||
break
|
||||
else:
|
||||
new_paths.append(p)
|
||||
cur.execute('INSERT INTO bitrot VALUES (?, ?, ?, ?)',
|
||||
(p_uni, new_mtime, new_sha1, update_ts))
|
||||
conn.commit()
|
||||
last_commit_time = tcommit(last_commit_time)
|
||||
continue
|
||||
stored_mtime, stored_sha1, update_ts = row
|
||||
if int(stored_mtime) != new_mtime:
|
||||
@ -158,7 +168,7 @@ def run(verbosity=1, test=False):
|
||||
cur.execute('UPDATE bitrot SET mtime=?, hash=?, timestamp=? '
|
||||
'WHERE path=?',
|
||||
(new_mtime, new_sha1, update_ts, p_uni))
|
||||
conn.commit()
|
||||
last_commit_time = tcommit(last_commit_time)
|
||||
elif stored_sha1 != new_sha1:
|
||||
error_count += 1
|
||||
print('\rerror: SHA1 mismatch for {}: expected {}, got {}.'
|
||||
@ -169,7 +179,8 @@ def run(verbosity=1, test=False):
|
||||
)
|
||||
for path in missing_paths:
|
||||
cur.execute('DELETE FROM bitrot WHERE path=?', (path,))
|
||||
conn.commit()
|
||||
last_commit_time = tcommit(last_commit_time)
|
||||
conn.commit()
|
||||
cur.execute('SELECT COUNT(path) FROM bitrot')
|
||||
all_count = cur.fetchone()[0]
|
||||
if verbosity:
|
||||
@ -238,6 +249,10 @@ def run_from_command_line():
|
||||
help='just test against an existing database, don\'t update anything')
|
||||
parser.add_argument('--version', action='version',
|
||||
version='%(prog)s {}.{}.{}'.format(*VERSION))
|
||||
parser.add_argument('--commit-interval', type=float, default=300,
|
||||
help='min time between commits (0 commits on every operation)')
|
||||
parser.add_argument('--chunk-size', type=int, default=DEFAULT_CHUNK_SIZE,
|
||||
help='read files this many bytes at a time')
|
||||
args = parser.parse_args()
|
||||
if args.sum:
|
||||
try:
|
||||
@ -250,7 +265,9 @@ def run_from_command_line():
|
||||
verbosity = 0
|
||||
elif args.verbose:
|
||||
verbosity = 2
|
||||
run(verbosity=verbosity, test=args.test)
|
||||
run(verbosity=verbosity, test=args.test,
|
||||
commit_interval=args.commit_interval,
|
||||
chunk_size=args.chunk_size)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
Loading…
x
Reference in New Issue
Block a user