bitrot 0.8.0, fsencoding and self-integrity check
This commit is contained in:
parent
a09f0b0ad6
commit
e4efbc290c
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
.bitrot.db
|
||||||
|
.bitrot.sha512
|
10
README.rst
10
README.rst
@ -36,6 +36,16 @@ a 100 GB Aperture library in under 10 minutes. Both tests on HFS+.
|
|||||||
Change Log
|
Change Log
|
||||||
----------
|
----------
|
||||||
|
|
||||||
|
0.8.0
|
||||||
|
~~~~~
|
||||||
|
|
||||||
|
* bitrot now keeps track of its own database's bitrot by storing
|
||||||
|
a checksum of .bitrot.db in .bitrot.sha512
|
||||||
|
|
||||||
|
* bugfix: now properly uses the filesystem encoding to decode file names
|
||||||
|
for use with the .bitrotdb database. Report and original patch by
|
||||||
|
pallinger.
|
||||||
|
|
||||||
0.7.1
|
0.7.1
|
||||||
~~~~~
|
~~~~~
|
||||||
|
|
||||||
|
102
src/bitrot.py
102
src/bitrot.py
@ -42,8 +42,9 @@ import time
|
|||||||
|
|
||||||
DEFAULT_CHUNK_SIZE = 16384
|
DEFAULT_CHUNK_SIZE = 16384
|
||||||
DOT_THRESHOLD = 200
|
DOT_THRESHOLD = 200
|
||||||
VERSION = (0, 7, 1)
|
VERSION = (0, 8, 0)
|
||||||
IGNORED_FILE_SYSTEM_ERRORS = {errno.ENOENT, errno.EACCES}
|
IGNORED_FILE_SYSTEM_ERRORS = {errno.ENOENT, errno.EACCES}
|
||||||
|
FSENCODING = sys.getfilesystemencoding()
|
||||||
|
|
||||||
|
|
||||||
def sha1(path, chunk_size):
|
def sha1(path, chunk_size):
|
||||||
@ -102,7 +103,22 @@ def list_existing_paths(directory, expected=(), ignored=(), follow_links=False):
|
|||||||
for path, _, files in os.walk(directory):
|
for path, _, files in os.walk(directory):
|
||||||
for f in files:
|
for f in files:
|
||||||
p = os.path.join(path, f)
|
p = os.path.join(path, f)
|
||||||
p_uni = p.decode('utf8')
|
try:
|
||||||
|
p_uni = p.decode(FSENCODING)
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
try:
|
||||||
|
print(
|
||||||
|
"warning: cannot decode file name:",
|
||||||
|
path,
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
# yup, even printing the filename might fail in certain
|
||||||
|
# occasions
|
||||||
|
pass
|
||||||
|
|
||||||
|
continue
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if follow_links or p_uni in expected:
|
if follow_links or p_uni in expected:
|
||||||
st = os.stat(p)
|
st = os.stat(p)
|
||||||
@ -146,8 +162,10 @@ class Bitrot(object):
|
|||||||
self._last_commit_ts = time.time()
|
self._last_commit_ts = time.time()
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
current_dir = b'.' # sic, relative path
|
check_sha512_integrity()
|
||||||
bitrot_db = os.path.join(current_dir, b'.bitrot.db')
|
|
||||||
|
bitrot_db = get_path()
|
||||||
|
bitrot_sha512 = get_path(ext=b'sha512')
|
||||||
try:
|
try:
|
||||||
conn = get_sqlite3_cursor(bitrot_db, copy=self.test)
|
conn = get_sqlite3_cursor(bitrot_db, copy=self.test)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
@ -164,7 +182,7 @@ class Bitrot(object):
|
|||||||
current_size = 0
|
current_size = 0
|
||||||
missing_paths = self.select_all_paths(cur)
|
missing_paths = self.select_all_paths(cur)
|
||||||
paths, total_size = list_existing_paths(
|
paths, total_size = list_existing_paths(
|
||||||
current_dir, expected=missing_paths, ignored={bitrot_db},
|
b'.', expected=missing_paths, ignored={bitrot_db, bitrot_sha512},
|
||||||
follow_links=self.follow_links,
|
follow_links=self.follow_links,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -260,6 +278,8 @@ class Bitrot(object):
|
|||||||
missing_paths,
|
missing_paths,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
update_sha512_integrity()
|
||||||
|
|
||||||
if error_count:
|
if error_count:
|
||||||
raise BitrotException(
|
raise BitrotException(
|
||||||
1, 'There were {} errors found.'.format(error_count),
|
1, 'There were {} errors found.'.format(error_count),
|
||||||
@ -355,9 +375,16 @@ class Bitrot(object):
|
|||||||
return new_path
|
return new_path
|
||||||
|
|
||||||
|
|
||||||
def stable_sum():
|
def get_path(directory=b'.', ext=b'db'):
|
||||||
current_dir = b'.' # sic, relative path
|
"""Compose the path to the selected bitrot file."""
|
||||||
bitrot_db = os.path.join(current_dir, b'.bitrot.db')
|
return os.path.join(directory, b'.bitrot.' + ext)
|
||||||
|
|
||||||
|
|
||||||
|
def stable_sum(bitrot_db):
|
||||||
|
"""Calculates a stable SHA512 of all entries in the database.
|
||||||
|
|
||||||
|
Useful for comparing if two directories hold the same data, as it ignores
|
||||||
|
timing information."""
|
||||||
digest = hashlib.sha512()
|
digest = hashlib.sha512()
|
||||||
conn = get_sqlite3_cursor(bitrot_db)
|
conn = get_sqlite3_cursor(bitrot_db)
|
||||||
cur = conn.cursor()
|
cur = conn.cursor()
|
||||||
@ -369,7 +396,60 @@ def stable_sum():
|
|||||||
return digest.hexdigest()
|
return digest.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def check_sha512_integrity():
|
||||||
|
sha512_path = get_path(ext='sha512')
|
||||||
|
if not os.path.exists(sha512_path):
|
||||||
|
return
|
||||||
|
|
||||||
|
print('Checking bitrot.db integrity... ', end='')
|
||||||
|
with open(sha512_path, 'rb') as f:
|
||||||
|
old_sha512 = f.read().strip()
|
||||||
|
bitrot_db = get_path()
|
||||||
|
digest = hashlib.sha512()
|
||||||
|
with open(bitrot_db, 'rb') as f:
|
||||||
|
digest.update(f.read())
|
||||||
|
new_sha512 = digest.hexdigest()
|
||||||
|
if new_sha512 != old_sha512:
|
||||||
|
if len(old_sha512) == 128:
|
||||||
|
print(
|
||||||
|
"error: SHA512 of the file is different, bitrot.db might be "
|
||||||
|
"corrupt."
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
"error: SHA512 of the file is different but bitrot.sha512 has "
|
||||||
|
"a suspicious length. It might be corrupt."
|
||||||
|
)
|
||||||
|
print(
|
||||||
|
"If you'd like to continue anyway, delete the .bitrot.sha512 "
|
||||||
|
"file and try again."
|
||||||
|
)
|
||||||
|
raise BitrotException(
|
||||||
|
3, 'bitrot.db integrity check failed, cannot continue.',
|
||||||
|
)
|
||||||
|
|
||||||
|
print('ok.')
|
||||||
|
|
||||||
|
def update_sha512_integrity():
|
||||||
|
old_sha512 = 0
|
||||||
|
sha512_path = get_path(ext='sha512')
|
||||||
|
if os.path.exists(sha512_path):
|
||||||
|
with open(sha512_path, 'rb') as f:
|
||||||
|
old_sha512 = f.read().strip()
|
||||||
|
bitrot_db = get_path()
|
||||||
|
digest = hashlib.sha512()
|
||||||
|
with open(bitrot_db, 'rb') as f:
|
||||||
|
digest.update(f.read())
|
||||||
|
new_sha512 = digest.hexdigest()
|
||||||
|
if new_sha512 != old_sha512:
|
||||||
|
print('Updating bitrot.sha512... ', end='')
|
||||||
|
with open(sha512_path, 'wb') as f:
|
||||||
|
f.write(new_sha512)
|
||||||
|
print('done.')
|
||||||
|
|
||||||
def run_from_command_line():
|
def run_from_command_line():
|
||||||
|
global FSENCODING
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(prog='bitrot')
|
parser = argparse.ArgumentParser(prog='bitrot')
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-l', '--follow-links', action='store_true',
|
'-l', '--follow-links', action='store_true',
|
||||||
@ -403,6 +483,10 @@ def run_from_command_line():
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--chunk-size', type=int, default=DEFAULT_CHUNK_SIZE,
|
'--chunk-size', type=int, default=DEFAULT_CHUNK_SIZE,
|
||||||
help='read files this many bytes at a time')
|
help='read files this many bytes at a time')
|
||||||
|
parser.add_argument(
|
||||||
|
'--fsencoding', default='',
|
||||||
|
help='override the codec to decode filenames, otherwise taken from '
|
||||||
|
'the LANG environment variables')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
if args.sum:
|
if args.sum:
|
||||||
try:
|
try:
|
||||||
@ -422,6 +506,8 @@ def run_from_command_line():
|
|||||||
commit_interval=args.commit_interval,
|
commit_interval=args.commit_interval,
|
||||||
chunk_size=args.chunk_size,
|
chunk_size=args.chunk_size,
|
||||||
)
|
)
|
||||||
|
if args.fsencoding:
|
||||||
|
FSENCODING = args.fsencoding
|
||||||
try:
|
try:
|
||||||
bt.run()
|
bt.run()
|
||||||
except BitrotException as bre:
|
except BitrotException as bre:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user