0.4.0: rename support, progress as percentage, skipping symlinks
This commit is contained in:
parent
0201bcd853
commit
ebcf6a5926
@ -36,6 +36,15 @@ under 10 minutes. Both tests on HFS+.
|
|||||||
Change Log
|
Change Log
|
||||||
----------
|
----------
|
||||||
|
|
||||||
|
0.4.0
|
||||||
|
~~~~~
|
||||||
|
|
||||||
|
* renames are now reported as such
|
||||||
|
|
||||||
|
* all non-regular files (e.g. symbolic links, pipes, sockets) are now skipped
|
||||||
|
|
||||||
|
* progress presented in percentage
|
||||||
|
|
||||||
0.3.0
|
0.3.0
|
||||||
~~~~~
|
~~~~~
|
||||||
|
|
||||||
|
104
src/bitrot.py
104
src/bitrot.py
@ -32,12 +32,13 @@ import datetime
|
|||||||
import hashlib
|
import hashlib
|
||||||
import os
|
import os
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
import stat
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
|
||||||
CHUNK_SIZE = 16384
|
CHUNK_SIZE = 16384
|
||||||
DOT_THRESHOLD = 200
|
DOT_THRESHOLD = 200
|
||||||
VERSION = (0, 3, 0)
|
VERSION = (0, 4, 0)
|
||||||
|
|
||||||
|
|
||||||
def sha1(path):
|
def sha1(path):
|
||||||
@ -70,54 +71,77 @@ def run(verbosity=1):
|
|||||||
cur = conn.cursor()
|
cur = conn.cursor()
|
||||||
new_paths = []
|
new_paths = []
|
||||||
updated_paths = []
|
updated_paths = []
|
||||||
|
renamed_paths = []
|
||||||
error_count = 0
|
error_count = 0
|
||||||
dot_count = 0
|
total_size = 0
|
||||||
|
current_size = 0
|
||||||
missing_paths = set()
|
missing_paths = set()
|
||||||
cur.execute('SELECT path FROM bitrot')
|
cur.execute('SELECT path FROM bitrot')
|
||||||
row = cur.fetchone()
|
row = cur.fetchone()
|
||||||
while row:
|
while row:
|
||||||
missing_paths.add(row[0])
|
missing_paths.add(row[0])
|
||||||
row = cur.fetchone()
|
row = cur.fetchone()
|
||||||
|
paths = []
|
||||||
for path, _, files in os.walk(current_dir):
|
for path, _, files in os.walk(current_dir):
|
||||||
for f in files:
|
for f in files:
|
||||||
if verbosity and not dot_count:
|
|
||||||
sys.stdout.write('.')
|
|
||||||
sys.stdout.flush()
|
|
||||||
dot_count = (dot_count + 1) % DOT_THRESHOLD
|
|
||||||
p = os.path.join(path, f)
|
p = os.path.join(path, f)
|
||||||
if p == bitrot_db:
|
st = os.stat(p)
|
||||||
|
if not stat.S_ISREG(st.st_mode) or p == bitrot_db:
|
||||||
continue
|
continue
|
||||||
new_mtime = int(os.stat(p).st_mtime)
|
paths.append(p)
|
||||||
new_sha1 = sha1(p)
|
total_size += st.st_size
|
||||||
update_ts = datetime.datetime.utcnow().strftime(
|
paths.sort()
|
||||||
"%Y-%m-%d %H:%M:%S%z"
|
for p in paths:
|
||||||
)
|
st = os.stat(p)
|
||||||
p_uni = p.decode('utf8')
|
new_mtime = int(st.st_mtime)
|
||||||
missing_paths.discard(p_uni)
|
current_size += st.st_size
|
||||||
cur.execute('SELECT mtime, hash, timestamp FROM bitrot WHERE '
|
if verbosity:
|
||||||
'path=?', (p_uni,))
|
sys.stdout.write('\r{:>6.1%}'.format(current_size/total_size))
|
||||||
row = cur.fetchone()
|
sys.stdout.flush()
|
||||||
if not row:
|
new_sha1 = sha1(p)
|
||||||
|
update_ts = datetime.datetime.utcnow().strftime(
|
||||||
|
"%Y-%m-%d %H:%M:%S%z"
|
||||||
|
)
|
||||||
|
p_uni = p.decode('utf8')
|
||||||
|
missing_paths.discard(p_uni)
|
||||||
|
cur.execute('SELECT mtime, hash, timestamp FROM bitrot WHERE '
|
||||||
|
'path=?', (p_uni,))
|
||||||
|
row = cur.fetchone()
|
||||||
|
if not row:
|
||||||
|
cur.execute('SELECT mtime, path, timestamp FROM bitrot WHERE '
|
||||||
|
'hash=?', (new_sha1,))
|
||||||
|
rows = cur.fetchall()
|
||||||
|
for row in rows:
|
||||||
|
stored_mtime, stored_path, update_ts = row
|
||||||
|
if not os.path.exists(stored_path):
|
||||||
|
renamed_paths.append((stored_path, p_uni))
|
||||||
|
missing_paths.discard(stored_path)
|
||||||
|
cur.execute('UPDATE bitrot SET mtime=?, path=?, '
|
||||||
|
'timestamp=? WHERE hash=?',
|
||||||
|
(new_mtime, p_uni, update_ts, new_sha1))
|
||||||
|
conn.commit()
|
||||||
|
break
|
||||||
|
else:
|
||||||
new_paths.append(p)
|
new_paths.append(p)
|
||||||
cur.execute('INSERT INTO bitrot VALUES (?, ?, ?, ?)',
|
cur.execute('INSERT INTO bitrot VALUES (?, ?, ?, ?)',
|
||||||
(p_uni, new_mtime, new_sha1, update_ts))
|
(p_uni, new_mtime, new_sha1, update_ts))
|
||||||
conn.commit()
|
conn.commit()
|
||||||
continue
|
continue
|
||||||
stored_mtime, stored_sha1, update_ts = row
|
stored_mtime, stored_sha1, update_ts = row
|
||||||
if int(stored_mtime) != new_mtime:
|
if int(stored_mtime) != new_mtime:
|
||||||
updated_paths.append(p)
|
updated_paths.append(p)
|
||||||
cur.execute('UPDATE bitrot SET mtime=?, hash=?, timestamp=? '
|
cur.execute('UPDATE bitrot SET mtime=?, hash=?, timestamp=? '
|
||||||
'WHERE path=?',
|
'WHERE path=?',
|
||||||
(new_mtime, new_sha1, update_ts, p_uni))
|
(new_mtime, new_sha1, update_ts, p_uni))
|
||||||
conn.commit()
|
conn.commit()
|
||||||
elif stored_sha1 != new_sha1:
|
elif stored_sha1 != new_sha1:
|
||||||
error_count += 1
|
error_count += 1
|
||||||
print("\rerror: SHA1 mismatch for {}: expected {}, got {}."
|
print("\rerror: SHA1 mismatch for {}: expected {}, got {}."
|
||||||
" Original info from {}.".format(
|
" Original info from {}.".format(
|
||||||
p, stored_sha1, new_sha1, update_ts
|
p, stored_sha1, new_sha1, update_ts
|
||||||
),
|
),
|
||||||
file=sys.stderr,
|
file=sys.stderr,
|
||||||
)
|
)
|
||||||
for path in missing_paths:
|
for path in missing_paths:
|
||||||
cur.execute('DELETE FROM bitrot WHERE path=?', (path,))
|
cur.execute('DELETE FROM bitrot WHERE path=?', (path,))
|
||||||
conn.commit()
|
conn.commit()
|
||||||
@ -126,9 +150,10 @@ def run(verbosity=1):
|
|||||||
if verbosity:
|
if verbosity:
|
||||||
print("\rFinished. {} errors found.".format(error_count))
|
print("\rFinished. {} errors found.".format(error_count))
|
||||||
if verbosity == 1:
|
if verbosity == 1:
|
||||||
print("{} entries in the database, {} new, {} updated, {} missing."
|
print("{} entries in the database, {} new, {} updated, "
|
||||||
"".format(all_count, len(new_paths), len(updated_paths),
|
"{} renamed, {} missing.".format(all_count, len(new_paths),
|
||||||
len(missing_paths)))
|
len(updated_paths), len(renamed_paths), len(missing_paths)
|
||||||
|
))
|
||||||
elif verbosity > 1:
|
elif verbosity > 1:
|
||||||
print("{} entries in the database.".format(all_count), end=' ')
|
print("{} entries in the database.".format(all_count), end=' ')
|
||||||
if new_paths:
|
if new_paths:
|
||||||
@ -141,6 +166,11 @@ def run(verbosity=1):
|
|||||||
updated_paths.sort()
|
updated_paths.sort()
|
||||||
for path in updated_paths:
|
for path in updated_paths:
|
||||||
print(" ", path)
|
print(" ", path)
|
||||||
|
if renamed_paths:
|
||||||
|
print("{} entries renamed:".format(len(renamed_paths)))
|
||||||
|
renamed_paths.sort()
|
||||||
|
for path in renamed_paths:
|
||||||
|
print(" from", path[0], "to", path[1])
|
||||||
if missing_paths:
|
if missing_paths:
|
||||||
print("{} entries missing:".format(len(missing_paths)))
|
print("{} entries missing:".format(len(missing_paths)))
|
||||||
missing_paths = sorted(missing_paths)
|
missing_paths = sorted(missing_paths)
|
||||||
|
Reference in New Issue
Block a user