Add --follow-links, skip files with ENOACCES et al.
This commit is contained in:
parent
1f94944f87
commit
1b8a582e34
51
README.rst
51
README.rst
@ -2,8 +2,8 @@
|
||||
bitrot
|
||||
======
|
||||
|
||||
Detects bit rotten files on the hard drive to save your precious photo and
|
||||
music collection from slow decay.
|
||||
Detects bit rotten files on the hard drive to save your precious photo
|
||||
and music collection from slow decay.
|
||||
|
||||
Usage
|
||||
-----
|
||||
@ -12,26 +12,26 @@ Go to the desired directory and simply invoke::
|
||||
|
||||
$ bitrot
|
||||
|
||||
This will start digging through your directory structure recursively indexing
|
||||
all files found. The index is stored in a ``.bitrot.db`` file which is a SQLite
|
||||
3 database.
|
||||
This will start digging through your directory structure recursively
|
||||
indexing all files found. The index is stored in a ``.bitrot.db`` file
|
||||
which is a SQLite 3 database.
|
||||
|
||||
Next time you run ``bitrot`` it will add new files and update the index for
|
||||
files with a changed modification date. Most importantly however, it will
|
||||
report all errors, e.g. files that changed on the hard drive but still have the
|
||||
same modification date.
|
||||
Next time you run ``bitrot`` it will add new files and update the index
|
||||
for files with a changed modification date. Most importantly however, it
|
||||
will report all errors, e.g. files that changed on the hard drive but
|
||||
still have the same modification date.
|
||||
|
||||
All paths stored in ``.bitrot.db`` are relative so it's safe to rescan a folder
|
||||
after moving it to another drive.
|
||||
All paths stored in ``.bitrot.db`` are relative so it's safe to rescan
|
||||
a folder after moving it to another drive.
|
||||
|
||||
Performance
|
||||
-----------
|
||||
|
||||
Obviously depends on how fast the underlying drive is. No rigorous performance
|
||||
tests have been done. For informational purposes, on my typical 5400 RPM laptop
|
||||
hard drive scanning a 60+ GB music library takes around 15 minutes. On an OCZ
|
||||
Vertex 3 SSD drive ``bitrot`` is able to scan a 100 GB Aperture library in
|
||||
under 10 minutes. Both tests on HFS+.
|
||||
Obviously depends on how fast the underlying drive is. No rigorous
|
||||
performance tests have been done. For informational purposes, a typical
|
||||
5400 RPM laptop hard drive scanning a 60+ GB music library takes around
|
||||
15 minutes. On an OCZ Vertex 3 SSD drive ``bitrot`` is able to scan
|
||||
a 100 GB Aperture library in under 10 minutes. Both tests on HFS+.
|
||||
|
||||
Change Log
|
||||
----------
|
||||
@ -42,7 +42,10 @@ Change Log
|
||||
* more control over performance with ``--commit-interval`` and
|
||||
``--chunk-size`` command-line arguments
|
||||
|
||||
* bugfix: symbolic links are now properly skipped
|
||||
* bugfix: symbolic links are now properly skipped (or can be followed if
|
||||
``--follow-links`` is passed)
|
||||
|
||||
* bugfix: files that cannot be opened are now gracefully skipped
|
||||
|
||||
* bugfix: fixed a rare division by zero when run in an empty directory
|
||||
|
||||
@ -54,8 +57,9 @@ Change Log
|
||||
0.5.0
|
||||
~~~~~
|
||||
|
||||
* ``--test`` command-line argument for testing the state without updating the
|
||||
database on disk (works for testing databases you don't have write access to)
|
||||
* ``--test`` command-line argument for testing the state without
|
||||
updating the database on disk (works for testing databases you don't
|
||||
have write access to)
|
||||
|
||||
* size of the data read is reported upon finish
|
||||
|
||||
@ -66,19 +70,22 @@ Change Log
|
||||
|
||||
* renames are now reported as such
|
||||
|
||||
* all non-regular files (e.g. symbolic links, pipes, sockets) are now skipped
|
||||
* all non-regular files (e.g. symbolic links, pipes, sockets) are now
|
||||
skipped
|
||||
|
||||
* progress presented in percentage
|
||||
|
||||
0.3.0
|
||||
~~~~~
|
||||
|
||||
* ``--sum`` command-line argument for easy comparison of multiple databases
|
||||
* ``--sum`` command-line argument for easy comparison of multiple
|
||||
databases
|
||||
|
||||
0.2.1
|
||||
~~~~~
|
||||
|
||||
* fixed regression from 0.2.0 where new files caused a ``KeyError`` exception
|
||||
* fixed regression from 0.2.0 where new files caused a ``KeyError``
|
||||
exception
|
||||
|
||||
0.2.0
|
||||
~~~~~
|
||||
|
@ -2,7 +2,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright (C) 2013 by Łukasz Langa
|
||||
#
|
||||
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
@ -90,7 +90,7 @@ def get_sqlite3_cursor(path, copy=False):
|
||||
return conn
|
||||
|
||||
|
||||
def run(verbosity=1, test=False, commit_interval=300,
|
||||
def run(verbosity=1, test=False, follow_links=False, commit_interval=300,
|
||||
chunk_size=DEFAULT_CHUNK_SIZE):
|
||||
current_dir = b'.' # sic, relative path
|
||||
bitrot_db = os.path.join(current_dir, b'.bitrot.db')
|
||||
@ -113,8 +113,12 @@ def run(verbosity=1, test=False, commit_interval=300,
|
||||
for path, _, files in os.walk(current_dir):
|
||||
for f in files:
|
||||
p = os.path.join(path, f)
|
||||
p_uni = p.decode('utf8')
|
||||
try:
|
||||
st = os.lstat(p)
|
||||
if follow_links or p_uni in missing_paths:
|
||||
st = os.stat(p)
|
||||
else:
|
||||
st = os.lstat(p)
|
||||
except OSError as ex:
|
||||
if ex.errno != errno.ENOENT:
|
||||
raise
|
||||
@ -136,12 +140,22 @@ def run(verbosity=1, test=False, commit_interval=300,
|
||||
sys.stdout.write(size_fmt)
|
||||
sys.stdout.flush()
|
||||
last_reported_size = size_fmt
|
||||
new_sha1 = sha1(p, chunk_size)
|
||||
p_uni = p.decode('utf8')
|
||||
missing_paths.discard(p_uni)
|
||||
try:
|
||||
new_sha1 = sha1(p, chunk_size)
|
||||
except (IOError, OSError) as e:
|
||||
if verbosity:
|
||||
print(
|
||||
'\rwarning: cannot compute hash of {} [{}]'.format(
|
||||
p, errno.errorcode[e.args[0]],
|
||||
),
|
||||
file=sys.stderr,
|
||||
)
|
||||
continue
|
||||
update_ts = datetime.datetime.utcnow().strftime(
|
||||
'%Y-%m-%d %H:%M:%S%z'
|
||||
)
|
||||
p_uni = p.decode('utf8')
|
||||
missing_paths.discard(p_uni)
|
||||
cur.execute('SELECT mtime, hash, timestamp FROM bitrot WHERE '
|
||||
'path=?', (p_uni,))
|
||||
row = cur.fetchone()
|
||||
@ -241,6 +255,13 @@ def stable_sum():
|
||||
|
||||
def run_from_command_line():
|
||||
parser = argparse.ArgumentParser(prog='bitrot')
|
||||
parser.add_argument('-l', '--follow-links', action='store_true',
|
||||
help='follow symbolic links and store target files\' hashes. Once '
|
||||
'a path is present in the database, it will be checked against '
|
||||
'changes in content even if it becomes a symbolic link. In '
|
||||
'other words, if you run `bitrot -l`, on subsequent runs '
|
||||
'symbolic links registered during the first run will be '
|
||||
'properly followed and checked even if you run without `-l`.')
|
||||
parser.add_argument('-q', '--quiet', action='store_true',
|
||||
help='don\'t print anything besides checksum errors')
|
||||
parser.add_argument('-s', '--sum', action='store_true',
|
||||
@ -254,7 +275,8 @@ def run_from_command_line():
|
||||
parser.add_argument('--version', action='version',
|
||||
version='%(prog)s {}.{}.{}'.format(*VERSION))
|
||||
parser.add_argument('--commit-interval', type=float, default=300,
|
||||
help='min time between commits (0 commits on every operation)')
|
||||
help='min time in seconds between commits '
|
||||
'(0 commits on every operation)')
|
||||
parser.add_argument('--chunk-size', type=int, default=DEFAULT_CHUNK_SIZE,
|
||||
help='read files this many bytes at a time')
|
||||
args = parser.parse_args()
|
||||
@ -269,9 +291,12 @@ def run_from_command_line():
|
||||
verbosity = 0
|
||||
elif args.verbose:
|
||||
verbosity = 2
|
||||
run(verbosity=verbosity, test=args.test,
|
||||
run(verbosity=verbosity,
|
||||
test=args.test,
|
||||
follow_links=args.follow_links,
|
||||
commit_interval=args.commit_interval,
|
||||
chunk_size=args.chunk_size)
|
||||
chunk_size=args.chunk_size,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
Loading…
x
Reference in New Issue
Block a user