Clean up throttling and sha1 from feedback
This commit is contained in:
parent
b6faaf94fa
commit
11e94f663c
@ -29,6 +29,7 @@ from __future__ import unicode_literals
|
|||||||
import argparse
|
import argparse
|
||||||
import atexit
|
import atexit
|
||||||
import datetime
|
import datetime
|
||||||
|
import functools
|
||||||
import hashlib
|
import hashlib
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
@ -44,6 +45,20 @@ DOT_THRESHOLD = 200
|
|||||||
VERSION = (0, 5, 1)
|
VERSION = (0, 5, 1)
|
||||||
|
|
||||||
|
|
||||||
|
def sha1(path, chunk_size):
|
||||||
|
digest = hashlib.sha1()
|
||||||
|
with open(path) as f:
|
||||||
|
d = f.read(chunk_size)
|
||||||
|
while d:
|
||||||
|
digest.update(d)
|
||||||
|
d = f.read(chunk_size)
|
||||||
|
return digest.hexdigest()
|
||||||
|
|
||||||
|
def throttled_commit(conn, commit_interval, last_commit_time):
|
||||||
|
if time.time() - last_commit_time > commit_interval:
|
||||||
|
conn.commit()
|
||||||
|
return time.time()
|
||||||
|
return last_commit_time
|
||||||
|
|
||||||
def get_sqlite3_cursor(path, copy=False):
|
def get_sqlite3_cursor(path, copy=False):
|
||||||
if copy:
|
if copy:
|
||||||
@ -99,19 +114,8 @@ def run(verbosity=1, test=False, commit_interval=300, chunk_size=DEFAULT_CHUNK_S
|
|||||||
paths.append(p)
|
paths.append(p)
|
||||||
total_size += st.st_size
|
total_size += st.st_size
|
||||||
paths.sort()
|
paths.sort()
|
||||||
last_commit_time = [time.time()]
|
last_commit_time = 0
|
||||||
def throttled_commit():
|
tcommit = functools.partial(throttled_commit, conn, commit_interval)
|
||||||
if time.time() - last_commit_time[0] > commit_interval:
|
|
||||||
conn.commit()
|
|
||||||
last_commit_time[0] = time.time()
|
|
||||||
def sha1(path):
|
|
||||||
digest = hashlib.sha1()
|
|
||||||
with open(path) as f:
|
|
||||||
d = f.read(chunk_size)
|
|
||||||
while d:
|
|
||||||
digest.update(d)
|
|
||||||
d = f.read(chunk_size)
|
|
||||||
return digest.hexdigest()
|
|
||||||
for p in paths:
|
for p in paths:
|
||||||
st = os.stat(p)
|
st = os.stat(p)
|
||||||
new_mtime = int(st.st_mtime)
|
new_mtime = int(st.st_mtime)
|
||||||
@ -122,7 +126,7 @@ def run(verbosity=1, test=False, commit_interval=300, chunk_size=DEFAULT_CHUNK_S
|
|||||||
sys.stdout.write(size_fmt)
|
sys.stdout.write(size_fmt)
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
last_reported_size = size_fmt
|
last_reported_size = size_fmt
|
||||||
new_sha1 = sha1(p)
|
new_sha1 = sha1(p, chunk_size)
|
||||||
update_ts = datetime.datetime.utcnow().strftime(
|
update_ts = datetime.datetime.utcnow().strftime(
|
||||||
'%Y-%m-%d %H:%M:%S%z'
|
'%Y-%m-%d %H:%M:%S%z'
|
||||||
)
|
)
|
||||||
@ -143,13 +147,14 @@ def run(verbosity=1, test=False, commit_interval=300, chunk_size=DEFAULT_CHUNK_S
|
|||||||
cur.execute('UPDATE bitrot SET mtime=?, path=?, '
|
cur.execute('UPDATE bitrot SET mtime=?, path=?, '
|
||||||
'timestamp=? WHERE hash=?',
|
'timestamp=? WHERE hash=?',
|
||||||
(new_mtime, p_uni, update_ts, new_sha1))
|
(new_mtime, p_uni, update_ts, new_sha1))
|
||||||
throttled_commit()
|
|
||||||
|
last_commit_time = tcommit(last_commit_time)
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
new_paths.append(p)
|
new_paths.append(p)
|
||||||
cur.execute('INSERT INTO bitrot VALUES (?, ?, ?, ?)',
|
cur.execute('INSERT INTO bitrot VALUES (?, ?, ?, ?)',
|
||||||
(p_uni, new_mtime, new_sha1, update_ts))
|
(p_uni, new_mtime, new_sha1, update_ts))
|
||||||
throttled_commit()
|
last_commit_time = tcommit(last_commit_time)
|
||||||
continue
|
continue
|
||||||
stored_mtime, stored_sha1, update_ts = row
|
stored_mtime, stored_sha1, update_ts = row
|
||||||
if int(stored_mtime) != new_mtime:
|
if int(stored_mtime) != new_mtime:
|
||||||
@ -157,7 +162,7 @@ def run(verbosity=1, test=False, commit_interval=300, chunk_size=DEFAULT_CHUNK_S
|
|||||||
cur.execute('UPDATE bitrot SET mtime=?, hash=?, timestamp=? '
|
cur.execute('UPDATE bitrot SET mtime=?, hash=?, timestamp=? '
|
||||||
'WHERE path=?',
|
'WHERE path=?',
|
||||||
(new_mtime, new_sha1, update_ts, p_uni))
|
(new_mtime, new_sha1, update_ts, p_uni))
|
||||||
throttled_commit()
|
last_commit_time = tcommit(last_commit_time)
|
||||||
elif stored_sha1 != new_sha1:
|
elif stored_sha1 != new_sha1:
|
||||||
error_count += 1
|
error_count += 1
|
||||||
print('\rerror: SHA1 mismatch for {}: expected {}, got {}.'
|
print('\rerror: SHA1 mismatch for {}: expected {}, got {}.'
|
||||||
@ -168,7 +173,7 @@ def run(verbosity=1, test=False, commit_interval=300, chunk_size=DEFAULT_CHUNK_S
|
|||||||
)
|
)
|
||||||
for path in missing_paths:
|
for path in missing_paths:
|
||||||
cur.execute('DELETE FROM bitrot WHERE path=?', (path,))
|
cur.execute('DELETE FROM bitrot WHERE path=?', (path,))
|
||||||
throttled_commit()
|
last_commit_time = tcommit(last_commit_time)
|
||||||
conn.commit()
|
conn.commit()
|
||||||
cur.execute('SELECT COUNT(path) FROM bitrot')
|
cur.execute('SELECT COUNT(path) FROM bitrot')
|
||||||
all_count = cur.fetchone()[0]
|
all_count = cur.fetchone()[0]
|
||||||
|
Reference in New Issue
Block a user