parent
104e07b66b
commit
0dc3390b7f
10
bin/bitrot
10
bin/bitrot
@ -2,7 +2,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright (C) 2013 by Łukasz Langa
|
||||
#
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
@ -26,5 +26,11 @@ from __future__ import division
|
||||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from multiprocessing import freeze_support
|
||||
|
||||
from bitrot import run_from_command_line
|
||||
run_from_command_line()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
freeze_support()
|
||||
run_from_command_line()
|
||||
|
2
setup.py
2
setup.py
@ -58,8 +58,8 @@ setup(
|
||||
include_package_data = True,
|
||||
zip_safe = False, # if only because of the readme file
|
||||
install_requires = [
|
||||
'futures; python_version == "2.7"'
|
||||
],
|
||||
|
||||
classifiers = [
|
||||
'Development Status :: 4 - Beta',
|
||||
'License :: OSI Approved :: MIT License',
|
||||
|
@ -40,6 +40,8 @@ import tempfile
|
||||
import time
|
||||
import unicodedata
|
||||
|
||||
from concurrent.futures import ProcessPoolExecutor, wait, as_completed
|
||||
|
||||
|
||||
DEFAULT_CHUNK_SIZE = 16384 # block size in HFS+; 4X the block size in ext4
|
||||
DOT_THRESHOLD = 200
|
||||
@ -144,6 +146,43 @@ def list_existing_paths(directory, expected=(), ignored=(), follow_links=False):
|
||||
return paths, total_size
|
||||
|
||||
|
||||
def compute_one(path, chunk_size):
|
||||
"""Return a tuple with (unicode path, size, mtime, sha1). Takes a binary path."""
|
||||
p_uni = normalize_path(path)
|
||||
try:
|
||||
st = os.stat(path)
|
||||
except OSError as ex:
|
||||
if ex.errno in IGNORED_FILE_SYSTEM_ERRORS:
|
||||
# The file disappeared between listing existing paths and
|
||||
# this run or is (temporarily?) locked with different
|
||||
# permissions. We'll just skip it for now.
|
||||
print(
|
||||
'\rwarning: `{}` is currently unavailable for '
|
||||
'reading: {}'.format(
|
||||
p_uni, ex,
|
||||
),
|
||||
file=sys.stderr,
|
||||
)
|
||||
raise BitrotException
|
||||
|
||||
raise # Not expected? https://github.com/ambv/bitrot/issues/
|
||||
|
||||
new_mtime = int(st.st_mtime)
|
||||
|
||||
try:
|
||||
new_sha1 = sha1(path, chunk_size)
|
||||
except (IOError, OSError) as e:
|
||||
print(
|
||||
'\rwarning: cannot compute hash of {} [{}]'.format(
|
||||
p_uni, errno.errorcode[e.args[0]],
|
||||
),
|
||||
file=sys.stderr,
|
||||
)
|
||||
raise BitrotException
|
||||
|
||||
return p_uni, st.st_size, int(st.st_mtime), new_sha1
|
||||
|
||||
|
||||
class BitrotException(Exception):
|
||||
pass
|
||||
|
||||
@ -151,7 +190,7 @@ class BitrotException(Exception):
|
||||
class Bitrot(object):
|
||||
def __init__(
|
||||
self, verbosity=1, test=False, follow_links=False, commit_interval=300,
|
||||
chunk_size=DEFAULT_CHUNK_SIZE,
|
||||
chunk_size=DEFAULT_CHUNK_SIZE, workers=os.cpu_count(),
|
||||
):
|
||||
self.verbosity = verbosity
|
||||
self.test = test
|
||||
@ -160,6 +199,7 @@ class Bitrot(object):
|
||||
self.chunk_size = chunk_size
|
||||
self._last_reported_size = ''
|
||||
self._last_commit_ts = 0
|
||||
self.pool = ProcessPoolExecutor(max_workers=workers)
|
||||
|
||||
def maybe_commit(self, conn):
|
||||
if time.time() < self._last_commit_ts + self.commit_interval:
|
||||
@ -195,44 +235,18 @@ class Bitrot(object):
|
||||
follow_links=self.follow_links,
|
||||
)
|
||||
paths_uni = set(normalize_path(p) for p in paths)
|
||||
futures = [self.pool.submit(compute_one, p, self.chunk_size) for p in paths]
|
||||
|
||||
for p in sorted(paths):
|
||||
p_uni = normalize_path(p)
|
||||
for future in as_completed(futures):
|
||||
try:
|
||||
st = os.stat(p)
|
||||
except OSError as ex:
|
||||
if ex.errno in IGNORED_FILE_SYSTEM_ERRORS:
|
||||
# The file disappeared between listing existing paths and
|
||||
# this run or is (temporarily?) locked with different
|
||||
# permissions. We'll just skip it for now.
|
||||
print(
|
||||
'\rwarning: `{}` is currently unavailable for '
|
||||
'reading: {}'.format(
|
||||
p_uni, ex,
|
||||
),
|
||||
file=sys.stderr,
|
||||
)
|
||||
continue
|
||||
p_uni, new_size, new_mtime, new_sha1 = future.result()
|
||||
except BitrotException:
|
||||
continue
|
||||
|
||||
raise # Not expected? https://github.com/ambv/bitrot/issues/
|
||||
|
||||
new_mtime = int(st.st_mtime)
|
||||
current_size += st.st_size
|
||||
current_size += new_size
|
||||
if self.verbosity:
|
||||
self.report_progress(current_size, total_size)
|
||||
|
||||
try:
|
||||
new_sha1 = sha1(p, self.chunk_size)
|
||||
except (IOError, OSError) as e:
|
||||
print(
|
||||
'\rwarning: cannot compute hash of {} [{}]'.format(
|
||||
p, errno.errorcode[e.args[0]],
|
||||
),
|
||||
file=sys.stderr,
|
||||
)
|
||||
missing_paths.discard(p_uni)
|
||||
continue
|
||||
|
||||
if p_uni not in missing_paths:
|
||||
# We are not expecting this path, it wasn't in the database yet.
|
||||
# It's either new or a rename. Let's handle that.
|
||||
@ -271,11 +285,11 @@ class Bitrot(object):
|
||||
continue
|
||||
|
||||
if stored_sha1 != new_sha1:
|
||||
errors.append(p)
|
||||
errors.append(p_uni)
|
||||
print(
|
||||
'\rerror: SHA1 mismatch for {}: expected {}, got {}.'
|
||||
' Last good hash checked on {}.'.format(
|
||||
p.decode(FSENCODING), stored_sha1, new_sha1, stored_ts
|
||||
p_uni, stored_sha1, new_sha1, stored_ts
|
||||
),
|
||||
file=sys.stderr,
|
||||
)
|
||||
@ -538,6 +552,9 @@ def run_from_command_line():
|
||||
'--commit-interval', type=float, default=300,
|
||||
help='min time in seconds between commits '
|
||||
'(0 commits on every operation)')
|
||||
parser.add_argument(
|
||||
'-w', '--workers', type=int, default=os.cpu_count(),
|
||||
help='run this many workers (use -w1 for slow magnetic disks)')
|
||||
parser.add_argument(
|
||||
'--chunk-size', type=int, default=DEFAULT_CHUNK_SIZE,
|
||||
help='read files this many bytes at a time')
|
||||
@ -563,6 +580,7 @@ def run_from_command_line():
|
||||
follow_links=args.follow_links,
|
||||
commit_interval=args.commit_interval,
|
||||
chunk_size=args.chunk_size,
|
||||
workers=args.workers,
|
||||
)
|
||||
if args.fsencoding:
|
||||
FSENCODING = args.fsencoding
|
||||
|
Loading…
x
Reference in New Issue
Block a user