This commit is contained in:
Łukasz Langa 2013-11-11 00:43:22 -08:00
parent 1b8a582e34
commit e5f737b09d

View File

@ -80,11 +80,11 @@ def get_sqlite3_cursor(path, copy=False):
conn = sqlite3.connect(path) conn = sqlite3.connect(path)
atexit.register(conn.close) atexit.register(conn.close)
cur = conn.cursor() cur = conn.cursor()
names = set(name for name, in cur.execute('SELECT name FROM sqlite_master')) tables = set(t for t, in cur.execute('SELECT name FROM sqlite_master'))
if 'bitrot' not in names: if 'bitrot' not in tables:
cur.execute('CREATE TABLE bitrot (path TEXT PRIMARY KEY, ' cur.execute('CREATE TABLE bitrot (path TEXT PRIMARY KEY, '
'mtime INTEGER, hash TEXT, timestamp TEXT)') 'mtime INTEGER, hash TEXT, timestamp TEXT)')
if 'bitrot_hash_idx' not in names: if 'bitrot_hash_idx' not in tables:
cur.execute('CREATE INDEX bitrot_hash_idx ON bitrot (hash)') cur.execute('CREATE INDEX bitrot_hash_idx ON bitrot (hash)')
atexit.register(conn.commit) atexit.register(conn.commit)
return conn return conn
@ -176,8 +176,10 @@ def run(verbosity=1, test=False, follow_links=False, commit_interval=300,
break break
else: else:
new_paths.append(p) new_paths.append(p)
cur.execute('INSERT INTO bitrot VALUES (?, ?, ?, ?)', cur.execute(
(p_uni, new_mtime, new_sha1, update_ts)) 'INSERT INTO bitrot VALUES (?, ?, ?, ?)',
(p_uni, new_mtime, new_sha1, update_ts),
)
last_commit_time = tcommit(last_commit_time) last_commit_time = tcommit(last_commit_time)
continue continue
stored_mtime, stored_sha1, update_ts = row stored_mtime, stored_sha1, update_ts = row
@ -189,11 +191,12 @@ def run(verbosity=1, test=False, follow_links=False, commit_interval=300,
last_commit_time = tcommit(last_commit_time) last_commit_time = tcommit(last_commit_time)
elif stored_sha1 != new_sha1: elif stored_sha1 != new_sha1:
error_count += 1 error_count += 1
print('\rerror: SHA1 mismatch for {}: expected {}, got {}.' print(
' Original info from {}.'.format( '\rerror: SHA1 mismatch for {}: expected {}, got {}.'
p, stored_sha1, new_sha1, update_ts ' Original info from {}.'.format(
), p, stored_sha1, new_sha1, update_ts
file=sys.stderr, ),
file=sys.stderr,
) )
for path in missing_paths: for path in missing_paths:
cur.execute('DELETE FROM bitrot WHERE path=?', (path,)) cur.execute('DELETE FROM bitrot WHERE path=?', (path,))
@ -205,10 +208,13 @@ def run(verbosity=1, test=False, follow_links=False, commit_interval=300,
print('\rFinished. {:.2f} MiB of data read. {} errors found.' print('\rFinished. {:.2f} MiB of data read. {} errors found.'
''.format(total_size/1024/1024, error_count)) ''.format(total_size/1024/1024, error_count))
if verbosity == 1: if verbosity == 1:
print('{} entries in the database, {} new, {} updated, ' print(
'{} renamed, {} missing.'.format(all_count, len(new_paths), '{} entries in the database, {} new, {} updated, '
len(updated_paths), len(renamed_paths), len(missing_paths) '{} renamed, {} missing.'.format(
)) all_count, len(new_paths), len(updated_paths),
len(renamed_paths), len(missing_paths),
),
)
elif verbosity > 1: elif verbosity > 1:
print('{} entries in the database.'.format(all_count), end=' ') print('{} entries in the database.'.format(all_count), end=' ')
if new_paths: if new_paths:
@ -255,29 +261,37 @@ def stable_sum():
def run_from_command_line(): def run_from_command_line():
parser = argparse.ArgumentParser(prog='bitrot') parser = argparse.ArgumentParser(prog='bitrot')
parser.add_argument('-l', '--follow-links', action='store_true', parser.add_argument(
'-l', '--follow-links', action='store_true',
help='follow symbolic links and store target files\' hashes. Once ' help='follow symbolic links and store target files\' hashes. Once '
'a path is present in the database, it will be checked against ' 'a path is present in the database, it will be checked against '
'changes in content even if it becomes a symbolic link. In ' 'changes in content even if it becomes a symbolic link. In '
'other words, if you run `bitrot -l`, on subsequent runs ' 'other words, if you run `bitrot -l`, on subsequent runs '
'symbolic links registered during the first run will be ' 'symbolic links registered during the first run will be '
'properly followed and checked even if you run without `-l`.') 'properly followed and checked even if you run without `-l`.')
parser.add_argument('-q', '--quiet', action='store_true', parser.add_argument(
'-q', '--quiet', action='store_true',
help='don\'t print anything besides checksum errors') help='don\'t print anything besides checksum errors')
parser.add_argument('-s', '--sum', action='store_true', parser.add_argument(
'-s', '--sum', action='store_true',
help='using only the data already gathered, return a SHA-512 sum ' help='using only the data already gathered, return a SHA-512 sum '
'of hashes of all the entries in the database. No timestamps ' 'of hashes of all the entries in the database. No timestamps '
'are used in calculation.') 'are used in calculation.')
parser.add_argument('-v', '--verbose', action='store_true', parser.add_argument(
'-v', '--verbose', action='store_true',
help='list new, updated and missing entries') help='list new, updated and missing entries')
parser.add_argument('-t', '--test', action='store_true', parser.add_argument(
'-t', '--test', action='store_true',
help='just test against an existing database, don\'t update anything') help='just test against an existing database, don\'t update anything')
parser.add_argument('--version', action='version', parser.add_argument(
'--version', action='version',
version='%(prog)s {}.{}.{}'.format(*VERSION)) version='%(prog)s {}.{}.{}'.format(*VERSION))
parser.add_argument('--commit-interval', type=float, default=300, parser.add_argument(
'--commit-interval', type=float, default=300,
help='min time in seconds between commits ' help='min time in seconds between commits '
'(0 commits on every operation)') '(0 commits on every operation)')
parser.add_argument('--chunk-size', type=int, default=DEFAULT_CHUNK_SIZE, parser.add_argument(
'--chunk-size', type=int, default=DEFAULT_CHUNK_SIZE,
help='read files this many bytes at a time') help='read files this many bytes at a time')
args = parser.parse_args() args = parser.parse_args()
if args.sum: if args.sum:
@ -291,7 +305,8 @@ def run_from_command_line():
verbosity = 0 verbosity = 0
elif args.verbose: elif args.verbose:
verbosity = 2 verbosity = 2
run(verbosity=verbosity, run(
verbosity=verbosity,
test=args.test, test=args.test,
follow_links=args.follow_links, follow_links=args.follow_links,
commit_interval=args.commit_interval, commit_interval=args.commit_interval,