diff --git a/README.rst b/README.rst index 07975de..dfbaa8b 100644 --- a/README.rst +++ b/README.rst @@ -5,10 +5,38 @@ bitrot Detects bit rotten files on the hard drive to save your precious photo and music collection from slow decay. +Usage +----- + +Go to the desired directory and simply invoke:: + + $ bitrot + +This will start digging through your directory structure recursively indexing +all files found. The index is stored in a ``.bitrot.db`` file which is a DBM +database. + +Next time you run ``bitrot`` it will add new files and update the index for +files with a changed modification date. Most importantly however, it will +report all errors, e.g. files that changed on the hard drive but still have the +same modification date. + +Performance +----------- + +Obviously depends on how fast the underlying drive is. No rigorous performance +tests have been done. For informational purposes, on my typical 5400 RPM laptop +hard drive scanning a 60+ GB music library takes around 20 minutes. On an OCZ +Vertex 3 SSD drive ``bitrot`` is able to scan a 100 GB Aperture library in +under 10 minutes. Both tests on HFS+. + Change Log ---------- -No versions tagged yet. +0.1.0 +~~~~~ + +* First published version. Authors ------- diff --git a/bin/bitrot b/bin/bitrot new file mode 100644 index 0000000..df097cf --- /dev/null +++ b/bin/bitrot @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright (C) 2013 by Łukasz Langa +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +from bitrot import run +run() diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..7ccb061 --- /dev/null +++ b/setup.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright (C) 2013 by Łukasz Langa +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import os +import sys +from setuptools import setup, find_packages + +reload(sys) +sys.setdefaultencoding('utf8') + +ld_file = open(os.path.join(os.path.dirname(__file__), 'README.rst')) +try: + long_description = ld_file.read() +finally: + ld_file.close() +# We let it die a horrible tracebacking death if reading the file fails. +# We couldn't sensibly recover anyway: we need the long description. + +setup( + name = 'bitrot', + version = '0.1.0', + author = 'Łukasz Langa', + author_email = 'lukasz@langa.pl', + description = ("Detects bit rotten files on the hard drive to save your " + "precious photo and music collection from slow decay."), + long_description = long_description, + url = 'https://github.com/ambv/bitrot/', + keywords = 'file checksum database', + platforms = ['any'], + license = 'MIT', + package_dir = {'': 'src'}, + packages = find_packages('src'), + py_modules = ['bitrot'], + scripts = ['bin/bitrot'], + include_package_data = True, + zip_safe = False, # if only because of the readme file + install_requires = [ + ], + + classifiers = [ + 'Development Status :: 3 - Alpha', + 'License :: OSI Approved :: MIT License', + 'Natural Language :: English', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 2 :: Only', + 'Programming Language :: Python', + 'Topic :: System :: Filesystems', + 'Topic :: System :: Monitoring', + 'Topic :: Software Development :: Libraries :: Python Modules', + ] + ) diff --git a/src/bitrot.py b/src/bitrot.py new file mode 100644 index 0000000..8e88f78 --- /dev/null +++ b/src/bitrot.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright (C) 2013 by Łukasz Langa +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import atexit +import datetime +import dbm +import hashlib +import os +import sys + + +CHUNK_SIZE = 16384 +DOT_THRESHOLD = 200 +VERSION = (0, 1, 0) + +def sha1(path): + digest = hashlib.sha1() + with open(path) as f: + d = f.read(CHUNK_SIZE) + while d: + digest.update(d) + d = f.read(CHUNK_SIZE) + return digest.hexdigest() + + +def run(): + current_dir = b'.' # sic, relative path + bitrot_db = os.path.join(current_dir, b'.bitrot') + db = dbm.open(bitrot_db, 'c') + bitrot_db += b'.db' + atexit.register(db.close) + new_count = 0 + update_count = 0 + error_count = 0 + dot_count = 0 + for path, _, files in os.walk(current_dir): + for f in files: + dot_count = (dot_count + 1) % DOT_THRESHOLD + if not dot_count: + sys.stdout.write('.') + sys.stdout.flush() + p = os.path.join(path, f) + if p == bitrot_db: + continue + new_mtime = int(os.stat(p).st_mtime) + new_sha1 = sha1(p) + try: + stored_mtime, stored_sha1, update_ts = db[p].split(b' ') + if int(stored_mtime) != new_mtime: + new_count -= 1 + update_count += 1 + raise KeyError("out of date") + except (KeyError, ValueError): + new_count += 1 + update_ts = datetime.datetime.utcnow().strftime( + "%Y-%m-%d\u00a0%H:%M:%S%z".encode('utf8') + ) + db[p] = b'{} {} {}'.format(new_mtime, new_sha1, update_ts) + else: + if stored_sha1 != new_sha1: + error_count += 1 + print("\rerror: SHA1 mismatch for {}: expected {}, got {}." + " Original info from {}.".format( + p, stored_sha1, new_sha1, update_ts + ), + file=sys.stderr, + ) + print("\nFinished. {} errors found.".format(error_count)) + print("{} entries in the database, {} new, {} updated.".format( + len(db), new_count, update_count + )) + if error_count: + sys.exit(1) + + +if __name__ == '__main__': + run()