parent
104e07b66b
commit
0dc3390b7f
10
bin/bitrot
10
bin/bitrot
@ -2,7 +2,7 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright (C) 2013 by Łukasz Langa
|
# Copyright (C) 2013 by Łukasz Langa
|
||||||
#
|
#
|
||||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
# of this software and associated documentation files (the "Software"), to deal
|
# of this software and associated documentation files (the "Software"), to deal
|
||||||
# in the Software without restriction, including without limitation the rights
|
# in the Software without restriction, including without limitation the rights
|
||||||
@ -26,5 +26,11 @@ from __future__ import division
|
|||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from multiprocessing import freeze_support
|
||||||
|
|
||||||
from bitrot import run_from_command_line
|
from bitrot import run_from_command_line
|
||||||
run_from_command_line()
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
freeze_support()
|
||||||
|
run_from_command_line()
|
||||||
|
2
setup.py
2
setup.py
@ -58,8 +58,8 @@ setup(
|
|||||||
include_package_data = True,
|
include_package_data = True,
|
||||||
zip_safe = False, # if only because of the readme file
|
zip_safe = False, # if only because of the readme file
|
||||||
install_requires = [
|
install_requires = [
|
||||||
|
'futures; python_version == "2.7"'
|
||||||
],
|
],
|
||||||
|
|
||||||
classifiers = [
|
classifiers = [
|
||||||
'Development Status :: 4 - Beta',
|
'Development Status :: 4 - Beta',
|
||||||
'License :: OSI Approved :: MIT License',
|
'License :: OSI Approved :: MIT License',
|
||||||
|
@ -40,6 +40,8 @@ import tempfile
|
|||||||
import time
|
import time
|
||||||
import unicodedata
|
import unicodedata
|
||||||
|
|
||||||
|
from concurrent.futures import ProcessPoolExecutor, wait, as_completed
|
||||||
|
|
||||||
|
|
||||||
DEFAULT_CHUNK_SIZE = 16384 # block size in HFS+; 4X the block size in ext4
|
DEFAULT_CHUNK_SIZE = 16384 # block size in HFS+; 4X the block size in ext4
|
||||||
DOT_THRESHOLD = 200
|
DOT_THRESHOLD = 200
|
||||||
@ -144,6 +146,43 @@ def list_existing_paths(directory, expected=(), ignored=(), follow_links=False):
|
|||||||
return paths, total_size
|
return paths, total_size
|
||||||
|
|
||||||
|
|
||||||
|
def compute_one(path, chunk_size):
|
||||||
|
"""Return a tuple with (unicode path, size, mtime, sha1). Takes a binary path."""
|
||||||
|
p_uni = normalize_path(path)
|
||||||
|
try:
|
||||||
|
st = os.stat(path)
|
||||||
|
except OSError as ex:
|
||||||
|
if ex.errno in IGNORED_FILE_SYSTEM_ERRORS:
|
||||||
|
# The file disappeared between listing existing paths and
|
||||||
|
# this run or is (temporarily?) locked with different
|
||||||
|
# permissions. We'll just skip it for now.
|
||||||
|
print(
|
||||||
|
'\rwarning: `{}` is currently unavailable for '
|
||||||
|
'reading: {}'.format(
|
||||||
|
p_uni, ex,
|
||||||
|
),
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
raise BitrotException
|
||||||
|
|
||||||
|
raise # Not expected? https://github.com/ambv/bitrot/issues/
|
||||||
|
|
||||||
|
new_mtime = int(st.st_mtime)
|
||||||
|
|
||||||
|
try:
|
||||||
|
new_sha1 = sha1(path, chunk_size)
|
||||||
|
except (IOError, OSError) as e:
|
||||||
|
print(
|
||||||
|
'\rwarning: cannot compute hash of {} [{}]'.format(
|
||||||
|
p_uni, errno.errorcode[e.args[0]],
|
||||||
|
),
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
raise BitrotException
|
||||||
|
|
||||||
|
return p_uni, st.st_size, int(st.st_mtime), new_sha1
|
||||||
|
|
||||||
|
|
||||||
class BitrotException(Exception):
|
class BitrotException(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@ -151,7 +190,7 @@ class BitrotException(Exception):
|
|||||||
class Bitrot(object):
|
class Bitrot(object):
|
||||||
def __init__(
|
def __init__(
|
||||||
self, verbosity=1, test=False, follow_links=False, commit_interval=300,
|
self, verbosity=1, test=False, follow_links=False, commit_interval=300,
|
||||||
chunk_size=DEFAULT_CHUNK_SIZE,
|
chunk_size=DEFAULT_CHUNK_SIZE, workers=os.cpu_count(),
|
||||||
):
|
):
|
||||||
self.verbosity = verbosity
|
self.verbosity = verbosity
|
||||||
self.test = test
|
self.test = test
|
||||||
@ -160,6 +199,7 @@ class Bitrot(object):
|
|||||||
self.chunk_size = chunk_size
|
self.chunk_size = chunk_size
|
||||||
self._last_reported_size = ''
|
self._last_reported_size = ''
|
||||||
self._last_commit_ts = 0
|
self._last_commit_ts = 0
|
||||||
|
self.pool = ProcessPoolExecutor(max_workers=workers)
|
||||||
|
|
||||||
def maybe_commit(self, conn):
|
def maybe_commit(self, conn):
|
||||||
if time.time() < self._last_commit_ts + self.commit_interval:
|
if time.time() < self._last_commit_ts + self.commit_interval:
|
||||||
@ -195,44 +235,18 @@ class Bitrot(object):
|
|||||||
follow_links=self.follow_links,
|
follow_links=self.follow_links,
|
||||||
)
|
)
|
||||||
paths_uni = set(normalize_path(p) for p in paths)
|
paths_uni = set(normalize_path(p) for p in paths)
|
||||||
|
futures = [self.pool.submit(compute_one, p, self.chunk_size) for p in paths]
|
||||||
|
|
||||||
for p in sorted(paths):
|
for future in as_completed(futures):
|
||||||
p_uni = normalize_path(p)
|
|
||||||
try:
|
try:
|
||||||
st = os.stat(p)
|
p_uni, new_size, new_mtime, new_sha1 = future.result()
|
||||||
except OSError as ex:
|
except BitrotException:
|
||||||
if ex.errno in IGNORED_FILE_SYSTEM_ERRORS:
|
continue
|
||||||
# The file disappeared between listing existing paths and
|
|
||||||
# this run or is (temporarily?) locked with different
|
|
||||||
# permissions. We'll just skip it for now.
|
|
||||||
print(
|
|
||||||
'\rwarning: `{}` is currently unavailable for '
|
|
||||||
'reading: {}'.format(
|
|
||||||
p_uni, ex,
|
|
||||||
),
|
|
||||||
file=sys.stderr,
|
|
||||||
)
|
|
||||||
continue
|
|
||||||
|
|
||||||
raise # Not expected? https://github.com/ambv/bitrot/issues/
|
current_size += new_size
|
||||||
|
|
||||||
new_mtime = int(st.st_mtime)
|
|
||||||
current_size += st.st_size
|
|
||||||
if self.verbosity:
|
if self.verbosity:
|
||||||
self.report_progress(current_size, total_size)
|
self.report_progress(current_size, total_size)
|
||||||
|
|
||||||
try:
|
|
||||||
new_sha1 = sha1(p, self.chunk_size)
|
|
||||||
except (IOError, OSError) as e:
|
|
||||||
print(
|
|
||||||
'\rwarning: cannot compute hash of {} [{}]'.format(
|
|
||||||
p, errno.errorcode[e.args[0]],
|
|
||||||
),
|
|
||||||
file=sys.stderr,
|
|
||||||
)
|
|
||||||
missing_paths.discard(p_uni)
|
|
||||||
continue
|
|
||||||
|
|
||||||
if p_uni not in missing_paths:
|
if p_uni not in missing_paths:
|
||||||
# We are not expecting this path, it wasn't in the database yet.
|
# We are not expecting this path, it wasn't in the database yet.
|
||||||
# It's either new or a rename. Let's handle that.
|
# It's either new or a rename. Let's handle that.
|
||||||
@ -271,11 +285,11 @@ class Bitrot(object):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
if stored_sha1 != new_sha1:
|
if stored_sha1 != new_sha1:
|
||||||
errors.append(p)
|
errors.append(p_uni)
|
||||||
print(
|
print(
|
||||||
'\rerror: SHA1 mismatch for {}: expected {}, got {}.'
|
'\rerror: SHA1 mismatch for {}: expected {}, got {}.'
|
||||||
' Last good hash checked on {}.'.format(
|
' Last good hash checked on {}.'.format(
|
||||||
p.decode(FSENCODING), stored_sha1, new_sha1, stored_ts
|
p_uni, stored_sha1, new_sha1, stored_ts
|
||||||
),
|
),
|
||||||
file=sys.stderr,
|
file=sys.stderr,
|
||||||
)
|
)
|
||||||
@ -538,6 +552,9 @@ def run_from_command_line():
|
|||||||
'--commit-interval', type=float, default=300,
|
'--commit-interval', type=float, default=300,
|
||||||
help='min time in seconds between commits '
|
help='min time in seconds between commits '
|
||||||
'(0 commits on every operation)')
|
'(0 commits on every operation)')
|
||||||
|
parser.add_argument(
|
||||||
|
'-w', '--workers', type=int, default=os.cpu_count(),
|
||||||
|
help='run this many workers (use -w1 for slow magnetic disks)')
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--chunk-size', type=int, default=DEFAULT_CHUNK_SIZE,
|
'--chunk-size', type=int, default=DEFAULT_CHUNK_SIZE,
|
||||||
help='read files this many bytes at a time')
|
help='read files this many bytes at a time')
|
||||||
@ -563,6 +580,7 @@ def run_from_command_line():
|
|||||||
follow_links=args.follow_links,
|
follow_links=args.follow_links,
|
||||||
commit_interval=args.commit_interval,
|
commit_interval=args.commit_interval,
|
||||||
chunk_size=args.chunk_size,
|
chunk_size=args.chunk_size,
|
||||||
|
workers=args.workers,
|
||||||
)
|
)
|
||||||
if args.fsencoding:
|
if args.fsencoding:
|
||||||
FSENCODING = args.fsencoding
|
FSENCODING = args.fsencoding
|
||||||
|
Loading…
x
Reference in New Issue
Block a user