0.4.0: rename support, progress as percentage, skipping symlinks

This commit is contained in:
Łukasz Langa 2013-03-04 00:49:42 +01:00
parent 0201bcd853
commit ebcf6a5926
2 changed files with 76 additions and 37 deletions

View File

@ -36,6 +36,15 @@ under 10 minutes. Both tests on HFS+.
Change Log Change Log
---------- ----------
0.4.0
~~~~~
* renames are now reported as such
* all non-regular files (e.g. symbolic links, pipes, sockets) are now skipped
* progress presented in percentage
0.3.0 0.3.0
~~~~~ ~~~~~

View File

@ -32,12 +32,13 @@ import datetime
import hashlib import hashlib
import os import os
import sqlite3 import sqlite3
import stat
import sys import sys
CHUNK_SIZE = 16384 CHUNK_SIZE = 16384
DOT_THRESHOLD = 200 DOT_THRESHOLD = 200
VERSION = (0, 3, 0) VERSION = (0, 4, 0)
def sha1(path): def sha1(path):
@ -70,54 +71,77 @@ def run(verbosity=1):
cur = conn.cursor() cur = conn.cursor()
new_paths = [] new_paths = []
updated_paths = [] updated_paths = []
renamed_paths = []
error_count = 0 error_count = 0
dot_count = 0 total_size = 0
current_size = 0
missing_paths = set() missing_paths = set()
cur.execute('SELECT path FROM bitrot') cur.execute('SELECT path FROM bitrot')
row = cur.fetchone() row = cur.fetchone()
while row: while row:
missing_paths.add(row[0]) missing_paths.add(row[0])
row = cur.fetchone() row = cur.fetchone()
paths = []
for path, _, files in os.walk(current_dir): for path, _, files in os.walk(current_dir):
for f in files: for f in files:
if verbosity and not dot_count:
sys.stdout.write('.')
sys.stdout.flush()
dot_count = (dot_count + 1) % DOT_THRESHOLD
p = os.path.join(path, f) p = os.path.join(path, f)
if p == bitrot_db: st = os.stat(p)
if not stat.S_ISREG(st.st_mode) or p == bitrot_db:
continue continue
new_mtime = int(os.stat(p).st_mtime) paths.append(p)
new_sha1 = sha1(p) total_size += st.st_size
update_ts = datetime.datetime.utcnow().strftime( paths.sort()
"%Y-%m-%d %H:%M:%S%z" for p in paths:
) st = os.stat(p)
p_uni = p.decode('utf8') new_mtime = int(st.st_mtime)
missing_paths.discard(p_uni) current_size += st.st_size
cur.execute('SELECT mtime, hash, timestamp FROM bitrot WHERE ' if verbosity:
'path=?', (p_uni,)) sys.stdout.write('\r{:>6.1%}'.format(current_size/total_size))
row = cur.fetchone() sys.stdout.flush()
if not row: new_sha1 = sha1(p)
update_ts = datetime.datetime.utcnow().strftime(
"%Y-%m-%d %H:%M:%S%z"
)
p_uni = p.decode('utf8')
missing_paths.discard(p_uni)
cur.execute('SELECT mtime, hash, timestamp FROM bitrot WHERE '
'path=?', (p_uni,))
row = cur.fetchone()
if not row:
cur.execute('SELECT mtime, path, timestamp FROM bitrot WHERE '
'hash=?', (new_sha1,))
rows = cur.fetchall()
for row in rows:
stored_mtime, stored_path, update_ts = row
if not os.path.exists(stored_path):
renamed_paths.append((stored_path, p_uni))
missing_paths.discard(stored_path)
cur.execute('UPDATE bitrot SET mtime=?, path=?, '
'timestamp=? WHERE hash=?',
(new_mtime, p_uni, update_ts, new_sha1))
conn.commit()
break
else:
new_paths.append(p) new_paths.append(p)
cur.execute('INSERT INTO bitrot VALUES (?, ?, ?, ?)', cur.execute('INSERT INTO bitrot VALUES (?, ?, ?, ?)',
(p_uni, new_mtime, new_sha1, update_ts)) (p_uni, new_mtime, new_sha1, update_ts))
conn.commit() conn.commit()
continue continue
stored_mtime, stored_sha1, update_ts = row stored_mtime, stored_sha1, update_ts = row
if int(stored_mtime) != new_mtime: if int(stored_mtime) != new_mtime:
updated_paths.append(p) updated_paths.append(p)
cur.execute('UPDATE bitrot SET mtime=?, hash=?, timestamp=? ' cur.execute('UPDATE bitrot SET mtime=?, hash=?, timestamp=? '
'WHERE path=?', 'WHERE path=?',
(new_mtime, new_sha1, update_ts, p_uni)) (new_mtime, new_sha1, update_ts, p_uni))
conn.commit() conn.commit()
elif stored_sha1 != new_sha1: elif stored_sha1 != new_sha1:
error_count += 1 error_count += 1
print("\rerror: SHA1 mismatch for {}: expected {}, got {}." print("\rerror: SHA1 mismatch for {}: expected {}, got {}."
" Original info from {}.".format( " Original info from {}.".format(
p, stored_sha1, new_sha1, update_ts p, stored_sha1, new_sha1, update_ts
), ),
file=sys.stderr, file=sys.stderr,
) )
for path in missing_paths: for path in missing_paths:
cur.execute('DELETE FROM bitrot WHERE path=?', (path,)) cur.execute('DELETE FROM bitrot WHERE path=?', (path,))
conn.commit() conn.commit()
@ -126,9 +150,10 @@ def run(verbosity=1):
if verbosity: if verbosity:
print("\rFinished. {} errors found.".format(error_count)) print("\rFinished. {} errors found.".format(error_count))
if verbosity == 1: if verbosity == 1:
print("{} entries in the database, {} new, {} updated, {} missing." print("{} entries in the database, {} new, {} updated, "
"".format(all_count, len(new_paths), len(updated_paths), "{} renamed, {} missing.".format(all_count, len(new_paths),
len(missing_paths))) len(updated_paths), len(renamed_paths), len(missing_paths)
))
elif verbosity > 1: elif verbosity > 1:
print("{} entries in the database.".format(all_count), end=' ') print("{} entries in the database.".format(all_count), end=' ')
if new_paths: if new_paths:
@ -141,6 +166,11 @@ def run(verbosity=1):
updated_paths.sort() updated_paths.sort()
for path in updated_paths: for path in updated_paths:
print(" ", path) print(" ", path)
if renamed_paths:
print("{} entries renamed:".format(len(renamed_paths)))
renamed_paths.sort()
for path in renamed_paths:
print(" from", path[0], "to", path[1])
if missing_paths: if missing_paths:
print("{} entries missing:".format(len(missing_paths))) print("{} entries missing:".format(len(missing_paths)))
missing_paths = sorted(missing_paths) missing_paths = sorted(missing_paths)