diff --git a/README.rst b/README.rst index 1ba0acf..ae4a4f5 100644 --- a/README.rst +++ b/README.rst @@ -21,6 +21,9 @@ files with a changed modification date. Most importantly however, it will report all errors, e.g. files that changed on the hard drive but still have the same modification date. +All paths stored in ``.bitrot.db`` are relative so it's safe to rescan a folder +after moving it to another drive. + Performance ----------- @@ -33,6 +36,13 @@ under 10 minutes. Both tests on HFS+. Change Log ---------- +0.2.0 +~~~~~ + +* ``--verbose`` and ``--quiet`` command-line arguments + +* if a file is no longer there, its entry is removed from the database + 0.1.0 ~~~~~ diff --git a/bin/bitrot b/bin/bitrot index df097cf..670c601 100644 --- a/bin/bitrot +++ b/bin/bitrot @@ -26,5 +26,5 @@ from __future__ import division from __future__ import print_function from __future__ import unicode_literals -from bitrot import run -run() +from bitrot import run_from_command_line +run_from_command_line() diff --git a/setup.py b/setup.py index 7ccb061..d38ac36 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,8 @@ from setuptools import setup, find_packages reload(sys) sys.setdefaultencoding('utf8') -ld_file = open(os.path.join(os.path.dirname(__file__), 'README.rst')) +current_dir = os.path.abspath(os.path.dirname(__file__)) +ld_file = open(os.path.join(current_dir, 'README.rst')) try: long_description = ld_file.read() finally: @@ -36,9 +37,13 @@ finally: # We let it die a horrible tracebacking death if reading the file fails. # We couldn't sensibly recover anyway: we need the long description. +sys.path.insert(0, current_dir + os.sep + 'src') +from bitrot import VERSION +release = ".".join(str(num) for num in VERSION) + setup( name = 'bitrot', - version = '0.1.0', + version = release, author = 'Ɓukasz Langa', author_email = 'lukasz@langa.pl', description = ("Detects bit rotten files on the hard drive to save your " diff --git a/src/bitrot.py b/src/bitrot.py index 5c9428d..4e62d42 100644 --- a/src/bitrot.py +++ b/src/bitrot.py @@ -26,6 +26,7 @@ from __future__ import division from __future__ import print_function from __future__ import unicode_literals +import argparse import atexit import datetime import hashlib @@ -36,7 +37,7 @@ import sys CHUNK_SIZE = 16384 DOT_THRESHOLD = 200 -VERSION = (0, 1, 0) +VERSION = (0, 2, 0) def sha1(path): @@ -62,21 +63,27 @@ def get_sqlite3_cursor(path): return conn -def run(): +def run(verbosity=1): current_dir = b'.' # sic, relative path bitrot_db = os.path.join(current_dir, b'.bitrot.db') conn = get_sqlite3_cursor(bitrot_db) cur = conn.cursor() - new_count = 0 - update_count = 0 + new_paths = [] + updated_paths = [] error_count = 0 dot_count = 0 + missing_paths = set() + cur.execute('SELECT path FROM bitrot') + row = cur.fetchone() + while row: + missing_paths.add(row[0]) + row = cur.fetchone() for path, _, files in os.walk(current_dir): for f in files: - dot_count = (dot_count + 1) % DOT_THRESHOLD - if not dot_count: + if verbosity and not dot_count: sys.stdout.write('.') sys.stdout.flush() + dot_count = (dot_count + 1) % DOT_THRESHOLD p = os.path.join(path, f) if p == bitrot_db: continue @@ -86,18 +93,19 @@ def run(): "%Y-%m-%d %H:%M:%S%z" ) p_uni = p.decode('utf8') + missing_paths.remove(p_uni) cur.execute('SELECT mtime, hash, timestamp FROM bitrot WHERE ' 'path=?', (p_uni,)) row = cur.fetchone() if not row: - new_count += 1 + new_paths.append(p) cur.execute('INSERT INTO bitrot VALUES (?, ?, ?, ?)', (p_uni, new_mtime, new_sha1, update_ts)) conn.commit() continue stored_mtime, stored_sha1, update_ts = row if int(stored_mtime) != new_mtime: - update_count += 1 + updated_paths.append(p) cur.execute('UPDATE bitrot SET mtime=?, hash=?, timestamp=? ' 'WHERE path=?', (new_mtime, new_sha1, update_ts, p_uni)) @@ -110,15 +118,56 @@ def run(): ), file=sys.stderr, ) + for path in missing_paths: + cur.execute('DELETE FROM bitrot WHERE path=?', (path,)) + conn.commit() cur.execute('SELECT COUNT(path) FROM bitrot') all_count = cur.fetchone()[0] - print("\nFinished. {} errors found.".format(error_count)) - print("{} entries in the database, {} new, {} updated.".format( - all_count, new_count, update_count - )) + if verbosity: + print("\rFinished. {} errors found.".format(error_count)) + if verbosity == 1: + print("{} entries in the database, {} new, {} updated, {} missing." + "".format(all_count, len(new_paths), len(updated_paths), + len(missing_paths))) + elif verbosity > 1: + print("{} entries in the database.".format(all_count), end=' ') + if new_paths: + print("{} entries new:".format(len(new_paths))) + new_paths.sort() + for path in new_paths: + print(" ", path) + if updated_paths: + print("{} entries updated:".format(len(updated_paths))) + updated_paths.sort() + for path in updated_paths: + print(" ", path) + if missing_paths: + print("{} entries missing:".format(len(missing_paths))) + missing_paths = sorted(missing_paths) + for path in missing_paths: + print(" ", path) + if not any((new_paths, updated_paths, missing_paths)): + print() if error_count: sys.exit(1) +def run_from_command_line(): + parser = argparse.ArgumentParser(prog='bitrot') + parser.add_argument('-q', '--quiet', action='store_true', + help='don\'t print anything besides checksum errors') + parser.add_argument('-v', '--verbose', action='store_true', + help='list new, updated and missing entries') + parser.add_argument('--version', action='version', + version='%(prog)s {}.{}.{}'.format(*VERSION)) + args = parser.parse_args() + verbosity = 1 + if args.quiet: + verbosity = 0 + elif args.verbose: + verbosity = 2 + run(verbosity=verbosity) + + if __name__ == '__main__': - run() + run_from_command_line()