Add --follow-links, skip files with ENOACCES et al.
This commit is contained in:
parent
1f94944f87
commit
1b8a582e34
51
README.rst
51
README.rst
@ -2,8 +2,8 @@
|
|||||||
bitrot
|
bitrot
|
||||||
======
|
======
|
||||||
|
|
||||||
Detects bit rotten files on the hard drive to save your precious photo and
|
Detects bit rotten files on the hard drive to save your precious photo
|
||||||
music collection from slow decay.
|
and music collection from slow decay.
|
||||||
|
|
||||||
Usage
|
Usage
|
||||||
-----
|
-----
|
||||||
@ -12,26 +12,26 @@ Go to the desired directory and simply invoke::
|
|||||||
|
|
||||||
$ bitrot
|
$ bitrot
|
||||||
|
|
||||||
This will start digging through your directory structure recursively indexing
|
This will start digging through your directory structure recursively
|
||||||
all files found. The index is stored in a ``.bitrot.db`` file which is a SQLite
|
indexing all files found. The index is stored in a ``.bitrot.db`` file
|
||||||
3 database.
|
which is a SQLite 3 database.
|
||||||
|
|
||||||
Next time you run ``bitrot`` it will add new files and update the index for
|
Next time you run ``bitrot`` it will add new files and update the index
|
||||||
files with a changed modification date. Most importantly however, it will
|
for files with a changed modification date. Most importantly however, it
|
||||||
report all errors, e.g. files that changed on the hard drive but still have the
|
will report all errors, e.g. files that changed on the hard drive but
|
||||||
same modification date.
|
still have the same modification date.
|
||||||
|
|
||||||
All paths stored in ``.bitrot.db`` are relative so it's safe to rescan a folder
|
All paths stored in ``.bitrot.db`` are relative so it's safe to rescan
|
||||||
after moving it to another drive.
|
a folder after moving it to another drive.
|
||||||
|
|
||||||
Performance
|
Performance
|
||||||
-----------
|
-----------
|
||||||
|
|
||||||
Obviously depends on how fast the underlying drive is. No rigorous performance
|
Obviously depends on how fast the underlying drive is. No rigorous
|
||||||
tests have been done. For informational purposes, on my typical 5400 RPM laptop
|
performance tests have been done. For informational purposes, a typical
|
||||||
hard drive scanning a 60+ GB music library takes around 15 minutes. On an OCZ
|
5400 RPM laptop hard drive scanning a 60+ GB music library takes around
|
||||||
Vertex 3 SSD drive ``bitrot`` is able to scan a 100 GB Aperture library in
|
15 minutes. On an OCZ Vertex 3 SSD drive ``bitrot`` is able to scan
|
||||||
under 10 minutes. Both tests on HFS+.
|
a 100 GB Aperture library in under 10 minutes. Both tests on HFS+.
|
||||||
|
|
||||||
Change Log
|
Change Log
|
||||||
----------
|
----------
|
||||||
@ -42,7 +42,10 @@ Change Log
|
|||||||
* more control over performance with ``--commit-interval`` and
|
* more control over performance with ``--commit-interval`` and
|
||||||
``--chunk-size`` command-line arguments
|
``--chunk-size`` command-line arguments
|
||||||
|
|
||||||
* bugfix: symbolic links are now properly skipped
|
* bugfix: symbolic links are now properly skipped (or can be followed if
|
||||||
|
``--follow-links`` is passed)
|
||||||
|
|
||||||
|
* bugfix: files that cannot be opened are now gracefully skipped
|
||||||
|
|
||||||
* bugfix: fixed a rare division by zero when run in an empty directory
|
* bugfix: fixed a rare division by zero when run in an empty directory
|
||||||
|
|
||||||
@ -54,8 +57,9 @@ Change Log
|
|||||||
0.5.0
|
0.5.0
|
||||||
~~~~~
|
~~~~~
|
||||||
|
|
||||||
* ``--test`` command-line argument for testing the state without updating the
|
* ``--test`` command-line argument for testing the state without
|
||||||
database on disk (works for testing databases you don't have write access to)
|
updating the database on disk (works for testing databases you don't
|
||||||
|
have write access to)
|
||||||
|
|
||||||
* size of the data read is reported upon finish
|
* size of the data read is reported upon finish
|
||||||
|
|
||||||
@ -66,19 +70,22 @@ Change Log
|
|||||||
|
|
||||||
* renames are now reported as such
|
* renames are now reported as such
|
||||||
|
|
||||||
* all non-regular files (e.g. symbolic links, pipes, sockets) are now skipped
|
* all non-regular files (e.g. symbolic links, pipes, sockets) are now
|
||||||
|
skipped
|
||||||
|
|
||||||
* progress presented in percentage
|
* progress presented in percentage
|
||||||
|
|
||||||
0.3.0
|
0.3.0
|
||||||
~~~~~
|
~~~~~
|
||||||
|
|
||||||
* ``--sum`` command-line argument for easy comparison of multiple databases
|
* ``--sum`` command-line argument for easy comparison of multiple
|
||||||
|
databases
|
||||||
|
|
||||||
0.2.1
|
0.2.1
|
||||||
~~~~~
|
~~~~~
|
||||||
|
|
||||||
* fixed regression from 0.2.0 where new files caused a ``KeyError`` exception
|
* fixed regression from 0.2.0 where new files caused a ``KeyError``
|
||||||
|
exception
|
||||||
|
|
||||||
0.2.0
|
0.2.0
|
||||||
~~~~~
|
~~~~~
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright (C) 2013 by Łukasz Langa
|
# Copyright (C) 2013 by Łukasz Langa
|
||||||
#
|
|
||||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
# of this software and associated documentation files (the "Software"), to deal
|
# of this software and associated documentation files (the "Software"), to deal
|
||||||
# in the Software without restriction, including without limitation the rights
|
# in the Software without restriction, including without limitation the rights
|
||||||
@ -90,7 +90,7 @@ def get_sqlite3_cursor(path, copy=False):
|
|||||||
return conn
|
return conn
|
||||||
|
|
||||||
|
|
||||||
def run(verbosity=1, test=False, commit_interval=300,
|
def run(verbosity=1, test=False, follow_links=False, commit_interval=300,
|
||||||
chunk_size=DEFAULT_CHUNK_SIZE):
|
chunk_size=DEFAULT_CHUNK_SIZE):
|
||||||
current_dir = b'.' # sic, relative path
|
current_dir = b'.' # sic, relative path
|
||||||
bitrot_db = os.path.join(current_dir, b'.bitrot.db')
|
bitrot_db = os.path.join(current_dir, b'.bitrot.db')
|
||||||
@ -113,8 +113,12 @@ def run(verbosity=1, test=False, commit_interval=300,
|
|||||||
for path, _, files in os.walk(current_dir):
|
for path, _, files in os.walk(current_dir):
|
||||||
for f in files:
|
for f in files:
|
||||||
p = os.path.join(path, f)
|
p = os.path.join(path, f)
|
||||||
|
p_uni = p.decode('utf8')
|
||||||
try:
|
try:
|
||||||
st = os.lstat(p)
|
if follow_links or p_uni in missing_paths:
|
||||||
|
st = os.stat(p)
|
||||||
|
else:
|
||||||
|
st = os.lstat(p)
|
||||||
except OSError as ex:
|
except OSError as ex:
|
||||||
if ex.errno != errno.ENOENT:
|
if ex.errno != errno.ENOENT:
|
||||||
raise
|
raise
|
||||||
@ -136,12 +140,22 @@ def run(verbosity=1, test=False, commit_interval=300,
|
|||||||
sys.stdout.write(size_fmt)
|
sys.stdout.write(size_fmt)
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
last_reported_size = size_fmt
|
last_reported_size = size_fmt
|
||||||
new_sha1 = sha1(p, chunk_size)
|
p_uni = p.decode('utf8')
|
||||||
|
missing_paths.discard(p_uni)
|
||||||
|
try:
|
||||||
|
new_sha1 = sha1(p, chunk_size)
|
||||||
|
except (IOError, OSError) as e:
|
||||||
|
if verbosity:
|
||||||
|
print(
|
||||||
|
'\rwarning: cannot compute hash of {} [{}]'.format(
|
||||||
|
p, errno.errorcode[e.args[0]],
|
||||||
|
),
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
continue
|
||||||
update_ts = datetime.datetime.utcnow().strftime(
|
update_ts = datetime.datetime.utcnow().strftime(
|
||||||
'%Y-%m-%d %H:%M:%S%z'
|
'%Y-%m-%d %H:%M:%S%z'
|
||||||
)
|
)
|
||||||
p_uni = p.decode('utf8')
|
|
||||||
missing_paths.discard(p_uni)
|
|
||||||
cur.execute('SELECT mtime, hash, timestamp FROM bitrot WHERE '
|
cur.execute('SELECT mtime, hash, timestamp FROM bitrot WHERE '
|
||||||
'path=?', (p_uni,))
|
'path=?', (p_uni,))
|
||||||
row = cur.fetchone()
|
row = cur.fetchone()
|
||||||
@ -241,6 +255,13 @@ def stable_sum():
|
|||||||
|
|
||||||
def run_from_command_line():
|
def run_from_command_line():
|
||||||
parser = argparse.ArgumentParser(prog='bitrot')
|
parser = argparse.ArgumentParser(prog='bitrot')
|
||||||
|
parser.add_argument('-l', '--follow-links', action='store_true',
|
||||||
|
help='follow symbolic links and store target files\' hashes. Once '
|
||||||
|
'a path is present in the database, it will be checked against '
|
||||||
|
'changes in content even if it becomes a symbolic link. In '
|
||||||
|
'other words, if you run `bitrot -l`, on subsequent runs '
|
||||||
|
'symbolic links registered during the first run will be '
|
||||||
|
'properly followed and checked even if you run without `-l`.')
|
||||||
parser.add_argument('-q', '--quiet', action='store_true',
|
parser.add_argument('-q', '--quiet', action='store_true',
|
||||||
help='don\'t print anything besides checksum errors')
|
help='don\'t print anything besides checksum errors')
|
||||||
parser.add_argument('-s', '--sum', action='store_true',
|
parser.add_argument('-s', '--sum', action='store_true',
|
||||||
@ -254,7 +275,8 @@ def run_from_command_line():
|
|||||||
parser.add_argument('--version', action='version',
|
parser.add_argument('--version', action='version',
|
||||||
version='%(prog)s {}.{}.{}'.format(*VERSION))
|
version='%(prog)s {}.{}.{}'.format(*VERSION))
|
||||||
parser.add_argument('--commit-interval', type=float, default=300,
|
parser.add_argument('--commit-interval', type=float, default=300,
|
||||||
help='min time between commits (0 commits on every operation)')
|
help='min time in seconds between commits '
|
||||||
|
'(0 commits on every operation)')
|
||||||
parser.add_argument('--chunk-size', type=int, default=DEFAULT_CHUNK_SIZE,
|
parser.add_argument('--chunk-size', type=int, default=DEFAULT_CHUNK_SIZE,
|
||||||
help='read files this many bytes at a time')
|
help='read files this many bytes at a time')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
@ -269,9 +291,12 @@ def run_from_command_line():
|
|||||||
verbosity = 0
|
verbosity = 0
|
||||||
elif args.verbose:
|
elif args.verbose:
|
||||||
verbosity = 2
|
verbosity = 2
|
||||||
run(verbosity=verbosity, test=args.test,
|
run(verbosity=verbosity,
|
||||||
|
test=args.test,
|
||||||
|
follow_links=args.follow_links,
|
||||||
commit_interval=args.commit_interval,
|
commit_interval=args.commit_interval,
|
||||||
chunk_size=args.chunk_size)
|
chunk_size=args.chunk_size,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
Reference in New Issue
Block a user