Make chunk size configurable
This commit is contained in:
parent
fc46cb7c53
commit
b6faaf94fa
@ -39,20 +39,11 @@ import tempfile
|
|||||||
import time
|
import time
|
||||||
|
|
||||||
|
|
||||||
CHUNK_SIZE = 16384
|
DEFAULT_CHUNK_SIZE = 16384
|
||||||
DOT_THRESHOLD = 200
|
DOT_THRESHOLD = 200
|
||||||
VERSION = (0, 5, 1)
|
VERSION = (0, 5, 1)
|
||||||
|
|
||||||
|
|
||||||
def sha1(path):
|
|
||||||
digest = hashlib.sha1()
|
|
||||||
with open(path) as f:
|
|
||||||
d = f.read(CHUNK_SIZE)
|
|
||||||
while d:
|
|
||||||
digest.update(d)
|
|
||||||
d = f.read(CHUNK_SIZE)
|
|
||||||
return digest.hexdigest()
|
|
||||||
|
|
||||||
|
|
||||||
def get_sqlite3_cursor(path, copy=False):
|
def get_sqlite3_cursor(path, copy=False):
|
||||||
if copy:
|
if copy:
|
||||||
@ -80,7 +71,7 @@ def get_sqlite3_cursor(path, copy=False):
|
|||||||
return conn
|
return conn
|
||||||
|
|
||||||
|
|
||||||
def run(verbosity=1, test=False, commit_interval=300):
|
def run(verbosity=1, test=False, commit_interval=300, chunk_size=DEFAULT_CHUNK_SIZE):
|
||||||
current_dir = b'.' # sic, relative path
|
current_dir = b'.' # sic, relative path
|
||||||
bitrot_db = os.path.join(current_dir, b'.bitrot.db')
|
bitrot_db = os.path.join(current_dir, b'.bitrot.db')
|
||||||
conn = get_sqlite3_cursor(bitrot_db, copy=test)
|
conn = get_sqlite3_cursor(bitrot_db, copy=test)
|
||||||
@ -113,6 +104,14 @@ def run(verbosity=1, test=False, commit_interval=300):
|
|||||||
if time.time() - last_commit_time[0] > commit_interval:
|
if time.time() - last_commit_time[0] > commit_interval:
|
||||||
conn.commit()
|
conn.commit()
|
||||||
last_commit_time[0] = time.time()
|
last_commit_time[0] = time.time()
|
||||||
|
def sha1(path):
|
||||||
|
digest = hashlib.sha1()
|
||||||
|
with open(path) as f:
|
||||||
|
d = f.read(chunk_size)
|
||||||
|
while d:
|
||||||
|
digest.update(d)
|
||||||
|
d = f.read(chunk_size)
|
||||||
|
return digest.hexdigest()
|
||||||
for p in paths:
|
for p in paths:
|
||||||
st = os.stat(p)
|
st = os.stat(p)
|
||||||
new_mtime = int(st.st_mtime)
|
new_mtime = int(st.st_mtime)
|
||||||
@ -241,6 +240,8 @@ def run_from_command_line():
|
|||||||
version='%(prog)s {}.{}.{}'.format(*VERSION))
|
version='%(prog)s {}.{}.{}'.format(*VERSION))
|
||||||
parser.add_argument('--commit-interval', type=float, default=300,
|
parser.add_argument('--commit-interval', type=float, default=300,
|
||||||
help='min time between commits (0 commits on every operation)')
|
help='min time between commits (0 commits on every operation)')
|
||||||
|
parser.add_argument('--chunk-size', type=int, default=DEFAULT_CHUNK_SIZE,
|
||||||
|
help='read files this many bytes at a time')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
if args.sum:
|
if args.sum:
|
||||||
try:
|
try:
|
||||||
@ -253,7 +254,9 @@ def run_from_command_line():
|
|||||||
verbosity = 0
|
verbosity = 0
|
||||||
elif args.verbose:
|
elif args.verbose:
|
||||||
verbosity = 2
|
verbosity = 2
|
||||||
run(verbosity=verbosity, test=args.test, commit_interval=args.commit_interval)
|
run(verbosity=verbosity, test=args.test,
|
||||||
|
commit_interval=args.commit_interval,
|
||||||
|
chunk_size=args.chunk_size)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
Reference in New Issue
Block a user