2013-01-17 15:01:22 +01:00
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (C) 2013 by Łukasz Langa
2013-11-11 00:38:05 -08:00
2013-01-17 15:01:22 +01:00
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
2013-02-10 19:16:10 +01:00
import argparse
2013-01-17 15:01:22 +01:00
import atexit
import datetime
2013-10-17 11:42:23 -07:00
import errno
2013-01-17 15:01:22 +01:00
import hashlib
import os
2013-03-15 17:12:04 +01:00
import shutil
2013-01-17 15:59:29 +01:00
import sqlite3
2013-03-04 00:49:42 +01:00
import stat
2013-01-17 15:01:22 +01:00
import sys
2013-03-15 17:12:04 +01:00
import tempfile
2013-08-29 15:39:33 -07:00
import time
2020-05-18 00:27:05 +08:00
import unicodedata
2013-01-17 15:01:22 +01:00
2016-11-01 12:02:34 -07:00
DEFAULT_CHUNK_SIZE = 16384 # block size in HFS+; 4X the block size in ext4
2013-01-17 15:01:22 +01:00
DOT_THRESHOLD = 200
2016-11-01 12:02:34 -07:00
VERSION = ( 0 , 9 , 2 )
2015-06-22 18:08:26 -07:00
IGNORED_FILE_SYSTEM_ERRORS = { errno . ENOENT , errno . EACCES }
2016-05-02 17:49:25 -07:00
FSENCODING = sys . getfilesystemencoding ( )
2013-01-17 15:01:22 +01:00
2013-01-17 15:59:29 +01:00
2016-08-09 14:51:57 -07:00
if sys . version [ 0 ] == ' 2 ' :
str = type ( u ' text ' )
# use `bytes` for bytestrings
2020-05-18 00:27:05 +08:00
def normalize_path ( path ) :
2020-05-17 18:33:23 +02:00
if FSENCODING in ( ' utf-8 ' , ' UTF-8 ' ) :
return unicodedata . normalize ( ' NFKD ' , path )
2020-05-18 00:27:05 +08:00
else :
return path
2013-10-17 11:40:01 -07:00
def sha1 ( path , chunk_size ) :
2013-01-17 15:01:22 +01:00
digest = hashlib . sha1 ( )
2015-11-02 16:23:18 -08:00
with open ( path , ' rb ' ) as f :
2013-10-17 11:40:01 -07:00
d = f . read ( chunk_size )
2013-01-17 15:01:22 +01:00
while d :
digest . update ( d )
2013-10-17 11:40:01 -07:00
d = f . read ( chunk_size )
2013-01-17 15:01:22 +01:00
return digest . hexdigest ( )
2013-10-27 06:45:25 +01:00
2015-06-22 18:08:26 -07:00
def ts ( ) :
return datetime . datetime . utcnow ( ) . strftime ( ' % Y- % m- %d % H: % M: % S % z ' )
2013-01-17 15:01:22 +01:00
2013-10-27 06:45:25 +01:00
2013-03-15 17:12:04 +01:00
def get_sqlite3_cursor ( path , copy = False ) :
2016-08-09 14:51:57 -07:00
path = path . decode ( FSENCODING )
2013-03-15 17:12:04 +01:00
if copy :
if not os . path . exists ( path ) :
raise ValueError ( " error: bitrot database at {} does not exist. "
" " . format ( path ) )
db_copy = tempfile . NamedTemporaryFile ( prefix = ' bitrot_ ' , suffix = ' .db ' ,
delete = False )
with open ( path , ' rb ' ) as db_orig :
try :
shutil . copyfileobj ( db_orig , db_copy )
finally :
db_copy . close ( )
path = db_copy . name
atexit . register ( os . unlink , path )
2013-01-17 15:59:29 +01:00
conn = sqlite3 . connect ( path )
atexit . register ( conn . close )
cur = conn . cursor ( )
2013-11-11 00:43:22 -08:00
tables = set ( t for t , in cur . execute ( ' SELECT name FROM sqlite_master ' ) )
if ' bitrot ' not in tables :
2013-01-17 15:59:29 +01:00
cur . execute ( ' CREATE TABLE bitrot (path TEXT PRIMARY KEY, '
' mtime INTEGER, hash TEXT, timestamp TEXT) ' )
2013-11-11 00:43:22 -08:00
if ' bitrot_hash_idx ' not in tables :
2013-08-26 18:59:00 -07:00
cur . execute ( ' CREATE INDEX bitrot_hash_idx ON bitrot (hash) ' )
2013-10-27 06:49:55 +01:00
atexit . register ( conn . commit )
2013-01-17 15:59:29 +01:00
return conn
2015-06-22 18:08:26 -07:00
def list_existing_paths ( directory , expected = ( ) , ignored = ( ) , follow_links = False ) :
""" list_existing_paths( ' /dir ' ) -> ([path1, path2, ...], total_size)
Returns a tuple with a list with existing files in ` directory ` and their
` total_size ` .
Doesn ' t add entries listed in `ignored`. Doesn ' t add symlinks if
` follow_links ` is False ( the default ) . All entries present in ` expected `
must be files ( can ' t be directories or symlinks).
"""
2017-03-03 19:16:46 +01:00
paths = set ( )
2020-05-18 00:27:05 +08:00
paths_decoded_and_normalized = set ( )
2015-06-22 18:08:26 -07:00
total_size = 0
for path , _ , files in os . walk ( directory ) :
2013-01-17 15:01:22 +01:00
for f in files :
p = os . path . join ( path , f )
2016-05-02 17:49:25 -07:00
try :
p_uni = p . decode ( FSENCODING )
except UnicodeDecodeError :
2016-10-29 19:09:08 -07:00
binary_stderr = getattr ( sys . stderr , ' buffer ' , sys . stderr )
binary_stderr . write ( b " warning: cannot decode file name: " )
binary_stderr . write ( p )
binary_stderr . write ( b " \n " )
2016-05-02 17:49:25 -07:00
continue
2013-08-18 20:16:36 -07:00
try :
2020-05-18 00:27:05 +08:00
if follow_links or normalize_path ( p_uni ) in expected :
2013-11-11 00:38:05 -08:00
st = os . stat ( p )
else :
st = os . lstat ( p )
2013-08-18 20:16:36 -07:00
except OSError as ex :
2015-06-22 18:08:26 -07:00
if ex . errno not in IGNORED_FILE_SYSTEM_ERRORS :
2013-08-18 20:16:36 -07:00
raise
else :
2015-06-22 18:08:26 -07:00
if not stat . S_ISREG ( st . st_mode ) or p in ignored :
2013-08-18 20:16:36 -07:00
continue
2017-03-03 19:16:46 +01:00
paths . add ( p )
2020-05-18 00:27:05 +08:00
paths_decoded_and_normalized . add ( normalize_path ( p . decode ( FSENCODING ) ) )
2013-08-18 20:16:36 -07:00
total_size + = st . st_size
2020-05-18 00:27:05 +08:00
return paths , total_size , paths_decoded_and_normalized
2015-06-22 18:08:26 -07:00
class BitrotException ( Exception ) :
pass
class Bitrot ( object ) :
def __init__ (
self , verbosity = 1 , test = False , follow_links = False , commit_interval = 300 ,
chunk_size = DEFAULT_CHUNK_SIZE ,
) :
self . verbosity = verbosity
self . test = test
self . follow_links = follow_links
self . commit_interval = commit_interval
self . chunk_size = chunk_size
self . _last_reported_size = ' '
self . _last_commit_ts = 0
def maybe_commit ( self , conn ) :
if time . time ( ) < self . _last_commit_ts + self . commit_interval :
# no time for commit yet!
return
conn . commit ( )
self . _last_commit_ts = time . time ( )
def run ( self ) :
2016-08-09 14:51:57 -07:00
check_sha512_integrity ( verbosity = self . verbosity )
2016-05-02 17:49:25 -07:00
bitrot_db = get_path ( )
bitrot_sha512 = get_path ( ext = b ' sha512 ' )
2013-11-11 00:38:05 -08:00
try :
2015-06-22 18:08:26 -07:00
conn = get_sqlite3_cursor ( bitrot_db , copy = self . test )
except ValueError :
raise BitrotException (
2 ,
' No database exists so cannot test. Run the tool once first. ' ,
)
cur = conn . cursor ( )
new_paths = [ ]
updated_paths = [ ]
renamed_paths = [ ]
2016-07-13 12:58:20 -06:00
errors = [ ]
2015-06-22 18:08:26 -07:00
current_size = 0
missing_paths = self . select_all_paths ( cur )
2017-03-03 19:16:46 +01:00
hashes = self . select_all_hashes ( cur )
2020-05-18 00:27:05 +08:00
paths , total_size , paths_decoded_and_normalized = list_existing_paths (
2016-05-02 17:49:25 -07:00
b ' . ' , expected = missing_paths , ignored = { bitrot_db , bitrot_sha512 } ,
2015-06-22 18:08:26 -07:00
follow_links = self . follow_links ,
2013-03-04 00:49:42 +01:00
)
2015-06-22 18:08:26 -07:00
2017-03-03 19:16:46 +01:00
for p in sorted ( paths ) :
2016-10-31 14:03:59 +00:00
p_uni = p . decode ( FSENCODING )
2015-06-22 18:08:26 -07:00
try :
st = os . stat ( p )
except OSError as ex :
if ex . errno in IGNORED_FILE_SYSTEM_ERRORS :
# The file disappeared between listing existing paths and
# this run or is (temporarily?) locked with different
# permissions. We'll just skip it for now.
2016-09-21 17:37:36 +02:00
print (
' \r warning: ` {} ` is currently unavailable for '
' reading: {} ' . format (
p_uni , ex ,
) ,
file = sys . stderr ,
)
2015-06-22 18:08:26 -07:00
continue
raise # Not expected? https://github.com/ambv/bitrot/issues/
new_mtime = int ( st . st_mtime )
current_size + = st . st_size
if self . verbosity :
self . report_progress ( current_size , total_size )
2020-05-18 00:27:05 +08:00
missing_paths . discard ( normalize_path ( p_uni ) )
2015-06-22 18:08:26 -07:00
try :
new_sha1 = sha1 ( p , self . chunk_size )
except ( IOError , OSError ) as e :
2016-09-21 17:40:42 +02:00
print (
' \r warning: cannot compute hash of {} [ {} ] ' . format (
p , errno . errorcode [ e . args [ 0 ] ] ,
) ,
file = sys . stderr ,
)
2015-06-22 18:08:26 -07:00
continue
cur . execute ( ' SELECT mtime, hash, timestamp FROM bitrot WHERE '
2020-05-18 00:27:05 +08:00
' path=? ' , ( normalize_path ( p_uni ) , ) )
2015-06-22 18:08:26 -07:00
row = cur . fetchone ( )
if not row :
stored_path = self . handle_unknown_path (
2020-05-18 00:27:05 +08:00
cur , p_uni , new_mtime , new_sha1 , paths_decoded_and_normalized , hashes
2015-06-22 18:08:26 -07:00
)
self . maybe_commit ( conn )
if p_uni == stored_path :
new_paths . append ( p ) # FIXME: shouldn't that be p_uni?
else :
2013-03-04 00:49:42 +01:00
renamed_paths . append ( ( stored_path , p_uni ) )
2020-05-18 00:27:05 +08:00
missing_paths . discard ( normalize_path ( stored_path ) )
2015-06-22 18:08:26 -07:00
continue
stored_mtime , stored_sha1 , stored_ts = row
if int ( stored_mtime ) != new_mtime :
updated_paths . append ( p )
cur . execute ( ' UPDATE bitrot SET mtime=?, hash=?, timestamp=? '
' WHERE path=? ' ,
2020-05-18 00:27:05 +08:00
( new_mtime , new_sha1 , ts ( ) , normalize_path ( p_uni ) ) )
2015-06-22 18:08:26 -07:00
self . maybe_commit ( conn )
continue
if stored_sha1 != new_sha1 :
2016-07-13 12:58:20 -06:00
errors . append ( p )
2015-06-22 18:08:26 -07:00
print (
' \r error: SHA1 mismatch for {} : expected {} , got {} . '
' Last good hash checked on {} . ' . format (
2020-05-17 08:07:47 -07:00
p . decode ( FSENCODING ) , stored_sha1 , new_sha1 , stored_ts
2015-06-22 18:08:26 -07:00
) ,
file = sys . stderr ,
2013-11-11 00:43:22 -08:00
)
2015-06-22 18:08:26 -07:00
for path in missing_paths :
2020-05-18 00:27:05 +08:00
cur . execute ( ' DELETE FROM bitrot WHERE path=? ' , ( normalize_path ( path ) , ) ) # it is expected that content of missing_paths is already normalized, but just to be sure
2015-06-22 18:08:26 -07:00
conn . commit ( )
2017-06-13 16:34:44 -04:00
if not self . test :
cur . execute ( ' vacuum ' )
2015-06-22 18:08:26 -07:00
if self . verbosity :
cur . execute ( ' SELECT COUNT(path) FROM bitrot ' )
all_count = cur . fetchone ( ) [ 0 ]
self . report_done (
total_size ,
all_count ,
2016-07-13 12:58:20 -06:00
len ( errors ) ,
2015-06-22 18:08:26 -07:00
new_paths ,
updated_paths ,
renamed_paths ,
missing_paths ,
)
2016-08-09 14:51:57 -07:00
update_sha512_integrity ( verbosity = self . verbosity )
2016-05-02 17:49:25 -07:00
2016-07-14 08:28:05 -06:00
if errors :
2015-06-22 18:08:26 -07:00
raise BitrotException (
2016-07-13 12:58:20 -06:00
1 , ' There were {} errors found. ' . format ( len ( errors ) ) , errors ,
2013-03-04 00:49:42 +01:00
)
2015-06-22 18:08:26 -07:00
def select_all_paths ( self , cur ) :
result = set ( )
cur . execute ( ' SELECT path FROM bitrot ' )
row = cur . fetchone ( )
while row :
result . add ( row [ 0 ] )
row = cur . fetchone ( )
return result
2017-03-03 19:16:46 +01:00
def select_all_hashes ( self , cur ) :
result = { }
cur . execute ( ' SELECT hash, path FROM bitrot ' )
row = cur . fetchone ( )
while row :
rhash , rpath = row
result . setdefault ( rhash , set ( ) ) . add ( rpath )
row = cur . fetchone ( )
return result
2015-06-22 18:08:26 -07:00
def report_progress ( self , current_size , total_size ) :
size_fmt = ' \r {:>6.1%} ' . format ( current_size / ( total_size or 1 ) )
if size_fmt == self . _last_reported_size :
return
sys . stdout . write ( size_fmt )
sys . stdout . flush ( )
self . _last_reported_size = size_fmt
def report_done (
self , total_size , all_count , error_count , new_paths , updated_paths ,
renamed_paths , missing_paths ) :
2013-03-04 01:41:45 +01:00
print ( ' \r Finished. {:.2f} MiB of data read. {} errors found. '
2015-06-22 18:08:26 -07:00
' ' . format ( total_size / 1024 / 1024 , error_count ) )
if self . verbosity == 1 :
2013-11-11 00:43:22 -08:00
print (
' {} entries in the database, {} new, {} updated, '
' {} renamed, {} missing. ' . format (
all_count , len ( new_paths ) , len ( updated_paths ) ,
len ( renamed_paths ) , len ( missing_paths ) ,
) ,
)
2015-06-22 18:08:26 -07:00
elif self . verbosity > 1 :
2013-03-04 01:41:45 +01:00
print ( ' {} entries in the database. ' . format ( all_count ) , end = ' ' )
2013-02-10 19:16:10 +01:00
if new_paths :
2013-03-04 01:41:45 +01:00
print ( ' {} entries new: ' . format ( len ( new_paths ) ) )
2013-02-10 19:16:10 +01:00
new_paths . sort ( )
for path in new_paths :
2016-10-29 19:09:08 -07:00
print ( ' ' , path . decode ( FSENCODING ) )
2013-02-10 19:16:10 +01:00
if updated_paths :
2013-03-04 01:41:45 +01:00
print ( ' {} entries updated: ' . format ( len ( updated_paths ) ) )
2013-02-10 19:16:10 +01:00
updated_paths . sort ( )
for path in updated_paths :
2016-10-29 19:09:08 -07:00
print ( ' ' , path . decode ( FSENCODING ) )
2013-03-04 00:49:42 +01:00
if renamed_paths :
2013-03-04 01:41:45 +01:00
print ( ' {} entries renamed: ' . format ( len ( renamed_paths ) ) )
2013-03-04 00:49:42 +01:00
renamed_paths . sort ( )
for path in renamed_paths :
2016-10-29 19:09:08 -07:00
print (
' from ' ,
2020-05-17 08:07:47 -07:00
path [ 0 ] ,
2016-10-29 19:09:08 -07:00
' to ' ,
2020-05-17 08:07:47 -07:00
path [ 1 ] ,
2016-10-29 19:09:08 -07:00
)
2013-02-10 19:16:10 +01:00
if missing_paths :
2013-03-04 01:41:45 +01:00
print ( ' {} entries missing: ' . format ( len ( missing_paths ) ) )
2013-02-10 19:16:10 +01:00
missing_paths = sorted ( missing_paths )
for path in missing_paths :
2013-03-04 01:41:45 +01:00
print ( ' ' , path )
2013-02-10 19:16:10 +01:00
if not any ( ( new_paths , updated_paths , missing_paths ) ) :
print ( )
2016-08-09 14:51:57 -07:00
if self . test and self . verbosity :
2013-03-15 23:32:14 +01:00
print ( ' warning: database file not updated on disk (test mode). ' )
2015-06-22 18:08:26 -07:00
2020-05-18 00:27:05 +08:00
def handle_unknown_path ( self , cur , new_path , new_mtime , new_sha1 , paths_decoded_and_normalized , hashes ) :
2015-06-22 18:08:26 -07:00
""" Either add a new entry to the database or update the existing entry
on rename .
Returns ` new_path ` if the entry was indeed new or the ` stored_path ` ( e . g .
outdated path ) if there was a rename .
"""
2017-03-03 19:16:46 +01:00
try : # if the path isn't in the database
2020-05-18 00:27:05 +08:00
found = [ path for path in hashes [ new_sha1 ] if path not in paths_decoded_and_normalized ]
2017-03-03 19:16:46 +01:00
renamed = found . pop ( )
2015-06-22 18:08:26 -07:00
# update the path in the database
cur . execute (
' UPDATE bitrot SET mtime=?, path=?, timestamp=? WHERE path=? ' ,
2020-05-18 00:27:05 +08:00
( new_mtime , normalize_path ( new_path ) , ts ( ) , normalize_path ( renamed ) ) ,
2015-06-22 18:08:26 -07:00
)
2017-03-03 19:16:46 +01:00
return renamed
# From hashes[new_sha1] or found.pop()
except ( KeyError , IndexError ) :
cur . execute (
' INSERT INTO bitrot VALUES (?, ?, ?, ?) ' ,
2020-05-18 00:27:05 +08:00
( normalize_path ( new_path ) , new_mtime , new_sha1 , ts ( ) ) ,
2017-03-03 19:16:46 +01:00
)
return new_path
2013-01-17 15:01:22 +01:00
2016-05-02 17:49:25 -07:00
def get_path ( directory = b ' . ' , ext = b ' db ' ) :
""" Compose the path to the selected bitrot file. """
return os . path . join ( directory , b ' .bitrot. ' + ext )
2016-10-29 19:27:18 -07:00
def stable_sum ( bitrot_db = None ) :
2016-05-02 17:49:25 -07:00
""" Calculates a stable SHA512 of all entries in the database.
Useful for comparing if two directories hold the same data , as it ignores
timing information . """
2016-10-29 19:27:18 -07:00
if bitrot_db is None :
bitrot_db = get_path ( )
2013-02-26 15:44:51 +01:00
digest = hashlib . sha512 ( )
conn = get_sqlite3_cursor ( bitrot_db )
cur = conn . cursor ( )
cur . execute ( ' SELECT hash FROM bitrot ORDER BY path ' )
row = cur . fetchone ( )
while row :
2016-10-29 19:27:18 -07:00
digest . update ( row [ 0 ] . encode ( ' ascii ' ) )
2013-02-26 15:44:51 +01:00
row = cur . fetchone ( )
return digest . hexdigest ( )
2016-08-09 14:51:57 -07:00
def check_sha512_integrity ( verbosity = 1 ) :
sha512_path = get_path ( ext = b ' sha512 ' )
2016-05-02 17:49:25 -07:00
if not os . path . exists ( sha512_path ) :
return
2016-08-09 14:51:57 -07:00
if verbosity :
print ( ' Checking bitrot.db integrity... ' , end = ' ' )
sys . stdout . flush ( )
2016-05-02 17:49:25 -07:00
with open ( sha512_path , ' rb ' ) as f :
old_sha512 = f . read ( ) . strip ( )
bitrot_db = get_path ( )
digest = hashlib . sha512 ( )
with open ( bitrot_db , ' rb ' ) as f :
digest . update ( f . read ( ) )
2016-08-09 14:51:57 -07:00
new_sha512 = digest . hexdigest ( ) . encode ( ' ascii ' )
2016-05-02 17:49:25 -07:00
if new_sha512 != old_sha512 :
2016-08-09 14:51:57 -07:00
if verbosity :
if len ( old_sha512 ) == 128 :
print (
" error: SHA512 of the file is different, bitrot.db might "
" be corrupt. " ,
)
else :
print (
" error: SHA512 of the file is different but bitrot.sha512 "
" has a suspicious length. It might be corrupt. " ,
)
2016-05-02 17:49:25 -07:00
print (
2016-08-09 14:51:57 -07:00
" If you ' d like to continue anyway, delete the .bitrot.sha512 "
" file and try again. " ,
file = sys . stderr ,
2016-05-02 17:49:25 -07:00
)
raise BitrotException (
3 , ' bitrot.db integrity check failed, cannot continue. ' ,
)
2016-08-09 14:51:57 -07:00
if verbosity :
print ( ' ok. ' )
2016-05-02 17:49:25 -07:00
2016-08-09 14:51:57 -07:00
def update_sha512_integrity ( verbosity = 1 ) :
2016-05-02 17:49:25 -07:00
old_sha512 = 0
2016-08-09 14:51:57 -07:00
sha512_path = get_path ( ext = b ' sha512 ' )
2016-05-02 17:49:25 -07:00
if os . path . exists ( sha512_path ) :
with open ( sha512_path , ' rb ' ) as f :
old_sha512 = f . read ( ) . strip ( )
bitrot_db = get_path ( )
digest = hashlib . sha512 ( )
with open ( bitrot_db , ' rb ' ) as f :
digest . update ( f . read ( ) )
2016-08-09 14:51:57 -07:00
new_sha512 = digest . hexdigest ( ) . encode ( ' ascii ' )
2016-05-02 17:49:25 -07:00
if new_sha512 != old_sha512 :
2016-08-09 14:51:57 -07:00
if verbosity :
print ( ' Updating bitrot.sha512... ' , end = ' ' )
sys . stdout . flush ( )
2016-05-02 17:49:25 -07:00
with open ( sha512_path , ' wb ' ) as f :
f . write ( new_sha512 )
2016-08-09 14:51:57 -07:00
if verbosity :
print ( ' done. ' )
2016-05-02 17:49:25 -07:00
2013-02-10 19:16:10 +01:00
def run_from_command_line ( ) :
2016-05-02 17:49:25 -07:00
global FSENCODING
2013-02-10 19:16:10 +01:00
parser = argparse . ArgumentParser ( prog = ' bitrot ' )
2013-11-11 00:43:22 -08:00
parser . add_argument (
' -l ' , ' --follow-links ' , action = ' store_true ' ,
2013-11-11 00:38:05 -08:00
help = ' follow symbolic links and store target files \' hashes. Once '
' a path is present in the database, it will be checked against '
' changes in content even if it becomes a symbolic link. In '
' other words, if you run `bitrot -l`, on subsequent runs '
' symbolic links registered during the first run will be '
' properly followed and checked even if you run without `-l`. ' )
2013-11-11 00:43:22 -08:00
parser . add_argument (
' -q ' , ' --quiet ' , action = ' store_true ' ,
2013-02-10 19:16:10 +01:00
help = ' don \' t print anything besides checksum errors ' )
2013-11-11 00:43:22 -08:00
parser . add_argument (
' -s ' , ' --sum ' , action = ' store_true ' ,
2013-02-26 15:44:51 +01:00
help = ' using only the data already gathered, return a SHA-512 sum '
' of hashes of all the entries in the database. No timestamps '
' are used in calculation. ' )
2013-11-11 00:43:22 -08:00
parser . add_argument (
' -v ' , ' --verbose ' , action = ' store_true ' ,
2013-02-10 19:16:10 +01:00
help = ' list new, updated and missing entries ' )
2013-11-11 00:43:22 -08:00
parser . add_argument (
' -t ' , ' --test ' , action = ' store_true ' ,
2013-03-15 17:12:04 +01:00
help = ' just test against an existing database, don \' t update anything ' )
2013-11-11 00:43:22 -08:00
parser . add_argument (
' --version ' , action = ' version ' ,
2013-02-10 19:16:10 +01:00
version = ' %(prog)s {} . {} . {} ' . format ( * VERSION ) )
2013-11-11 00:43:22 -08:00
parser . add_argument (
' --commit-interval ' , type = float , default = 300 ,
2013-11-11 00:38:05 -08:00
help = ' min time in seconds between commits '
' (0 commits on every operation) ' )
2013-11-11 00:43:22 -08:00
parser . add_argument (
' --chunk-size ' , type = int , default = DEFAULT_CHUNK_SIZE ,
2013-08-29 15:51:06 -07:00
help = ' read files this many bytes at a time ' )
2016-05-02 17:49:25 -07:00
parser . add_argument (
' --fsencoding ' , default = ' ' ,
help = ' override the codec to decode filenames, otherwise taken from '
' the LANG environment variables ' )
2013-02-10 19:16:10 +01:00
args = parser . parse_args ( )
2013-02-26 15:44:51 +01:00
if args . sum :
try :
print ( stable_sum ( ) )
except RuntimeError as e :
2016-08-09 14:51:57 -07:00
print ( str ( e ) . encode ( ' utf8 ' ) , file = sys . stderr )
2013-02-26 15:44:51 +01:00
else :
verbosity = 1
if args . quiet :
verbosity = 0
elif args . verbose :
verbosity = 2
2015-06-22 18:08:26 -07:00
bt = Bitrot (
2013-11-11 00:43:22 -08:00
verbosity = verbosity ,
2013-11-11 00:38:05 -08:00
test = args . test ,
follow_links = args . follow_links ,
2013-08-29 15:51:06 -07:00
commit_interval = args . commit_interval ,
2013-11-11 00:38:05 -08:00
chunk_size = args . chunk_size ,
)
2016-05-02 17:49:25 -07:00
if args . fsencoding :
FSENCODING = args . fsencoding
2015-06-22 18:08:26 -07:00
try :
bt . run ( )
except BitrotException as bre :
print ( ' error: ' , bre . args [ 1 ] , file = sys . stderr )
sys . exit ( bre . args [ 0 ] )
2013-02-10 19:16:10 +01:00
2013-01-17 15:01:22 +01:00
if __name__ == ' __main__ ' :
2013-02-10 19:16:10 +01:00
run_from_command_line ( )