diff --git a/README.rst b/README.rst index e496875..167f0bd 100644 --- a/README.rst +++ b/README.rst @@ -2,8 +2,8 @@ bitrot ====== -Detects bit rotten files on the hard drive to save your precious photo and -music collection from slow decay. +Detects bit rotten files on the hard drive to save your precious photo +and music collection from slow decay. Usage ----- @@ -12,26 +12,26 @@ Go to the desired directory and simply invoke:: $ bitrot -This will start digging through your directory structure recursively indexing -all files found. The index is stored in a ``.bitrot.db`` file which is a SQLite -3 database. +This will start digging through your directory structure recursively +indexing all files found. The index is stored in a ``.bitrot.db`` file +which is a SQLite 3 database. -Next time you run ``bitrot`` it will add new files and update the index for -files with a changed modification date. Most importantly however, it will -report all errors, e.g. files that changed on the hard drive but still have the -same modification date. +Next time you run ``bitrot`` it will add new files and update the index +for files with a changed modification date. Most importantly however, it +will report all errors, e.g. files that changed on the hard drive but +still have the same modification date. -All paths stored in ``.bitrot.db`` are relative so it's safe to rescan a folder -after moving it to another drive. +All paths stored in ``.bitrot.db`` are relative so it's safe to rescan +a folder after moving it to another drive. Performance ----------- -Obviously depends on how fast the underlying drive is. No rigorous performance -tests have been done. For informational purposes, on my typical 5400 RPM laptop -hard drive scanning a 60+ GB music library takes around 15 minutes. On an OCZ -Vertex 3 SSD drive ``bitrot`` is able to scan a 100 GB Aperture library in -under 10 minutes. Both tests on HFS+. +Obviously depends on how fast the underlying drive is. No rigorous +performance tests have been done. For informational purposes, a typical +5400 RPM laptop hard drive scanning a 60+ GB music library takes around +15 minutes. On an OCZ Vertex 3 SSD drive ``bitrot`` is able to scan +a 100 GB Aperture library in under 10 minutes. Both tests on HFS+. Change Log ---------- @@ -42,7 +42,10 @@ Change Log * more control over performance with ``--commit-interval`` and ``--chunk-size`` command-line arguments -* bugfix: symbolic links are now properly skipped +* bugfix: symbolic links are now properly skipped (or can be followed if + ``--follow-links`` is passed) + +* bugfix: files that cannot be opened are now gracefully skipped * bugfix: fixed a rare division by zero when run in an empty directory @@ -54,8 +57,9 @@ Change Log 0.5.0 ~~~~~ -* ``--test`` command-line argument for testing the state without updating the - database on disk (works for testing databases you don't have write access to) +* ``--test`` command-line argument for testing the state without + updating the database on disk (works for testing databases you don't + have write access to) * size of the data read is reported upon finish @@ -66,19 +70,22 @@ Change Log * renames are now reported as such -* all non-regular files (e.g. symbolic links, pipes, sockets) are now skipped +* all non-regular files (e.g. symbolic links, pipes, sockets) are now + skipped * progress presented in percentage 0.3.0 ~~~~~ -* ``--sum`` command-line argument for easy comparison of multiple databases +* ``--sum`` command-line argument for easy comparison of multiple + databases 0.2.1 ~~~~~ -* fixed regression from 0.2.0 where new files caused a ``KeyError`` exception +* fixed regression from 0.2.0 where new files caused a ``KeyError`` + exception 0.2.0 ~~~~~ diff --git a/src/bitrot.py b/src/bitrot.py index 57041d8..97c8ec4 100644 --- a/src/bitrot.py +++ b/src/bitrot.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # Copyright (C) 2013 by Ɓukasz Langa -# + # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights @@ -90,7 +90,7 @@ def get_sqlite3_cursor(path, copy=False): return conn -def run(verbosity=1, test=False, commit_interval=300, +def run(verbosity=1, test=False, follow_links=False, commit_interval=300, chunk_size=DEFAULT_CHUNK_SIZE): current_dir = b'.' # sic, relative path bitrot_db = os.path.join(current_dir, b'.bitrot.db') @@ -113,8 +113,12 @@ def run(verbosity=1, test=False, commit_interval=300, for path, _, files in os.walk(current_dir): for f in files: p = os.path.join(path, f) + p_uni = p.decode('utf8') try: - st = os.lstat(p) + if follow_links or p_uni in missing_paths: + st = os.stat(p) + else: + st = os.lstat(p) except OSError as ex: if ex.errno != errno.ENOENT: raise @@ -136,12 +140,22 @@ def run(verbosity=1, test=False, commit_interval=300, sys.stdout.write(size_fmt) sys.stdout.flush() last_reported_size = size_fmt - new_sha1 = sha1(p, chunk_size) + p_uni = p.decode('utf8') + missing_paths.discard(p_uni) + try: + new_sha1 = sha1(p, chunk_size) + except (IOError, OSError) as e: + if verbosity: + print( + '\rwarning: cannot compute hash of {} [{}]'.format( + p, errno.errorcode[e.args[0]], + ), + file=sys.stderr, + ) + continue update_ts = datetime.datetime.utcnow().strftime( '%Y-%m-%d %H:%M:%S%z' ) - p_uni = p.decode('utf8') - missing_paths.discard(p_uni) cur.execute('SELECT mtime, hash, timestamp FROM bitrot WHERE ' 'path=?', (p_uni,)) row = cur.fetchone() @@ -241,6 +255,13 @@ def stable_sum(): def run_from_command_line(): parser = argparse.ArgumentParser(prog='bitrot') + parser.add_argument('-l', '--follow-links', action='store_true', + help='follow symbolic links and store target files\' hashes. Once ' + 'a path is present in the database, it will be checked against ' + 'changes in content even if it becomes a symbolic link. In ' + 'other words, if you run `bitrot -l`, on subsequent runs ' + 'symbolic links registered during the first run will be ' + 'properly followed and checked even if you run without `-l`.') parser.add_argument('-q', '--quiet', action='store_true', help='don\'t print anything besides checksum errors') parser.add_argument('-s', '--sum', action='store_true', @@ -254,7 +275,8 @@ def run_from_command_line(): parser.add_argument('--version', action='version', version='%(prog)s {}.{}.{}'.format(*VERSION)) parser.add_argument('--commit-interval', type=float, default=300, - help='min time between commits (0 commits on every operation)') + help='min time in seconds between commits ' + '(0 commits on every operation)') parser.add_argument('--chunk-size', type=int, default=DEFAULT_CHUNK_SIZE, help='read files this many bytes at a time') args = parser.parse_args() @@ -269,9 +291,12 @@ def run_from_command_line(): verbosity = 0 elif args.verbose: verbosity = 2 - run(verbosity=verbosity, test=args.test, + run(verbosity=verbosity, + test=args.test, + follow_links=args.follow_links, commit_interval=args.commit_interval, - chunk_size=args.chunk_size) + chunk_size=args.chunk_size, + ) if __name__ == '__main__':