first usable version backed by DBM
This commit is contained in:
parent
7a3545b55a
commit
b244bdd4d9
30
README.rst
30
README.rst
@ -5,10 +5,38 @@ bitrot
|
||||
Detects bit rotten files on the hard drive to save your precious photo and
|
||||
music collection from slow decay.
|
||||
|
||||
Usage
|
||||
-----
|
||||
|
||||
Go to the desired directory and simply invoke::
|
||||
|
||||
$ bitrot
|
||||
|
||||
This will start digging through your directory structure recursively indexing
|
||||
all files found. The index is stored in a ``.bitrot.db`` file which is a DBM
|
||||
database.
|
||||
|
||||
Next time you run ``bitrot`` it will add new files and update the index for
|
||||
files with a changed modification date. Most importantly however, it will
|
||||
report all errors, e.g. files that changed on the hard drive but still have the
|
||||
same modification date.
|
||||
|
||||
Performance
|
||||
-----------
|
||||
|
||||
Obviously depends on how fast the underlying drive is. No rigorous performance
|
||||
tests have been done. For informational purposes, on my typical 5400 RPM laptop
|
||||
hard drive scanning a 60+ GB music library takes around 20 minutes. On an OCZ
|
||||
Vertex 3 SSD drive ``bitrot`` is able to scan a 100 GB Aperture library in
|
||||
under 10 minutes. Both tests on HFS+.
|
||||
|
||||
Change Log
|
||||
----------
|
||||
|
||||
No versions tagged yet.
|
||||
0.1.0
|
||||
~~~~~
|
||||
|
||||
* First published version.
|
||||
|
||||
Authors
|
||||
-------
|
||||
|
30
bin/bitrot
Normal file
30
bin/bitrot
Normal file
@ -0,0 +1,30 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright (C) 2013 by Łukasz Langa
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from bitrot import run
|
||||
run()
|
71
setup.py
Normal file
71
setup.py
Normal file
@ -0,0 +1,71 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright (C) 2013 by Łukasz Langa
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
|
||||
import os
|
||||
import sys
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
reload(sys)
|
||||
sys.setdefaultencoding('utf8')
|
||||
|
||||
ld_file = open(os.path.join(os.path.dirname(__file__), 'README.rst'))
|
||||
try:
|
||||
long_description = ld_file.read()
|
||||
finally:
|
||||
ld_file.close()
|
||||
# We let it die a horrible tracebacking death if reading the file fails.
|
||||
# We couldn't sensibly recover anyway: we need the long description.
|
||||
|
||||
setup(
|
||||
name = 'bitrot',
|
||||
version = '0.1.0',
|
||||
author = 'Łukasz Langa',
|
||||
author_email = 'lukasz@langa.pl',
|
||||
description = ("Detects bit rotten files on the hard drive to save your "
|
||||
"precious photo and music collection from slow decay."),
|
||||
long_description = long_description,
|
||||
url = 'https://github.com/ambv/bitrot/',
|
||||
keywords = 'file checksum database',
|
||||
platforms = ['any'],
|
||||
license = 'MIT',
|
||||
package_dir = {'': 'src'},
|
||||
packages = find_packages('src'),
|
||||
py_modules = ['bitrot'],
|
||||
scripts = ['bin/bitrot'],
|
||||
include_package_data = True,
|
||||
zip_safe = False, # if only because of the readme file
|
||||
install_requires = [
|
||||
],
|
||||
|
||||
classifiers = [
|
||||
'Development Status :: 3 - Alpha',
|
||||
'License :: OSI Approved :: MIT License',
|
||||
'Natural Language :: English',
|
||||
'Programming Language :: Python :: 2.7',
|
||||
'Programming Language :: Python :: 2 :: Only',
|
||||
'Programming Language :: Python',
|
||||
'Topic :: System :: Filesystems',
|
||||
'Topic :: System :: Monitoring',
|
||||
'Topic :: Software Development :: Libraries :: Python Modules',
|
||||
]
|
||||
)
|
102
src/bitrot.py
Normal file
102
src/bitrot.py
Normal file
@ -0,0 +1,102 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright (C) 2013 by Łukasz Langa
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import atexit
|
||||
import datetime
|
||||
import dbm
|
||||
import hashlib
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
CHUNK_SIZE = 16384
|
||||
DOT_THRESHOLD = 200
|
||||
VERSION = (0, 1, 0)
|
||||
|
||||
def sha1(path):
|
||||
digest = hashlib.sha1()
|
||||
with open(path) as f:
|
||||
d = f.read(CHUNK_SIZE)
|
||||
while d:
|
||||
digest.update(d)
|
||||
d = f.read(CHUNK_SIZE)
|
||||
return digest.hexdigest()
|
||||
|
||||
|
||||
def run():
|
||||
current_dir = b'.' # sic, relative path
|
||||
bitrot_db = os.path.join(current_dir, b'.bitrot')
|
||||
db = dbm.open(bitrot_db, 'c')
|
||||
bitrot_db += b'.db'
|
||||
atexit.register(db.close)
|
||||
new_count = 0
|
||||
update_count = 0
|
||||
error_count = 0
|
||||
dot_count = 0
|
||||
for path, _, files in os.walk(current_dir):
|
||||
for f in files:
|
||||
dot_count = (dot_count + 1) % DOT_THRESHOLD
|
||||
if not dot_count:
|
||||
sys.stdout.write('.')
|
||||
sys.stdout.flush()
|
||||
p = os.path.join(path, f)
|
||||
if p == bitrot_db:
|
||||
continue
|
||||
new_mtime = int(os.stat(p).st_mtime)
|
||||
new_sha1 = sha1(p)
|
||||
try:
|
||||
stored_mtime, stored_sha1, update_ts = db[p].split(b' ')
|
||||
if int(stored_mtime) != new_mtime:
|
||||
new_count -= 1
|
||||
update_count += 1
|
||||
raise KeyError("out of date")
|
||||
except (KeyError, ValueError):
|
||||
new_count += 1
|
||||
update_ts = datetime.datetime.utcnow().strftime(
|
||||
"%Y-%m-%d\u00a0%H:%M:%S%z".encode('utf8')
|
||||
)
|
||||
db[p] = b'{} {} {}'.format(new_mtime, new_sha1, update_ts)
|
||||
else:
|
||||
if stored_sha1 != new_sha1:
|
||||
error_count += 1
|
||||
print("\rerror: SHA1 mismatch for {}: expected {}, got {}."
|
||||
" Original info from {}.".format(
|
||||
p, stored_sha1, new_sha1, update_ts
|
||||
),
|
||||
file=sys.stderr,
|
||||
)
|
||||
print("\nFinished. {} errors found.".format(error_count))
|
||||
print("{} entries in the database, {} new, {} updated.".format(
|
||||
len(db), new_count, update_count
|
||||
))
|
||||
if error_count:
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
run()
|
Loading…
x
Reference in New Issue
Block a user