Swap sqlite cursor with dictionary and set data structures (#24)
1. Use 2 new data structures: -paths (set) contains all the files in the actual filesystem -hashes (dictionary) substitute the sqlite query with dict[hash] = set(db paths) 2. Minimal unitary tests created with bats (bash script) See https://github.com/ambv/bitrot/issues/23 for details.
This commit is contained in:
parent
6b4a1fd46a
commit
a8e52626ef
51
src/bitrot.py
Normal file → Executable file
51
src/bitrot.py
Normal file → Executable file
@ -104,7 +104,7 @@ def list_existing_paths(directory, expected=(), ignored=(), follow_links=False):
|
||||
`follow_links` is False (the default). All entries present in `expected`
|
||||
must be files (can't be directories or symlinks).
|
||||
"""
|
||||
paths = []
|
||||
paths = set()
|
||||
total_size = 0
|
||||
for path, _, files in os.walk(directory):
|
||||
for f in files:
|
||||
@ -129,9 +129,8 @@ def list_existing_paths(directory, expected=(), ignored=(), follow_links=False):
|
||||
else:
|
||||
if not stat.S_ISREG(st.st_mode) or p in ignored:
|
||||
continue
|
||||
paths.append(p)
|
||||
paths.add(p)
|
||||
total_size += st.st_size
|
||||
paths.sort()
|
||||
return paths, total_size
|
||||
|
||||
|
||||
@ -180,12 +179,13 @@ class Bitrot(object):
|
||||
errors = []
|
||||
current_size = 0
|
||||
missing_paths = self.select_all_paths(cur)
|
||||
hashes = self.select_all_hashes(cur)
|
||||
paths, total_size = list_existing_paths(
|
||||
b'.', expected=missing_paths, ignored={bitrot_db, bitrot_sha512},
|
||||
follow_links=self.follow_links,
|
||||
)
|
||||
|
||||
for p in paths:
|
||||
for p in sorted(paths):
|
||||
p_uni = p.decode(FSENCODING)
|
||||
try:
|
||||
st = os.stat(p)
|
||||
@ -227,7 +227,7 @@ class Bitrot(object):
|
||||
row = cur.fetchone()
|
||||
if not row:
|
||||
stored_path = self.handle_unknown_path(
|
||||
cur, p_uni, new_mtime, new_sha1,
|
||||
cur, p_uni, new_mtime, new_sha1, paths, hashes
|
||||
)
|
||||
self.maybe_commit(conn)
|
||||
|
||||
@ -291,6 +291,16 @@ class Bitrot(object):
|
||||
row = cur.fetchone()
|
||||
return result
|
||||
|
||||
def select_all_hashes(self, cur):
|
||||
result = {}
|
||||
cur.execute('SELECT hash, path FROM bitrot')
|
||||
row = cur.fetchone()
|
||||
while row:
|
||||
rhash, rpath = row
|
||||
result.setdefault(rhash, set()).add(rpath)
|
||||
row = cur.fetchone()
|
||||
return result
|
||||
|
||||
def report_progress(self, current_size, total_size):
|
||||
size_fmt = '\r{:>6.1%}'.format(current_size/(total_size or 1))
|
||||
if size_fmt == self._last_reported_size:
|
||||
@ -345,37 +355,32 @@ class Bitrot(object):
|
||||
if self.test and self.verbosity:
|
||||
print('warning: database file not updated on disk (test mode).')
|
||||
|
||||
def handle_unknown_path(self, cur, new_path, new_mtime, new_sha1):
|
||||
def handle_unknown_path(self, cur, new_path, new_mtime, new_sha1, paths, hashes):
|
||||
"""Either add a new entry to the database or update the existing entry
|
||||
on rename.
|
||||
|
||||
Returns `new_path` if the entry was indeed new or the `stored_path` (e.g.
|
||||
outdated path) if there was a rename.
|
||||
"""
|
||||
cur.execute('SELECT mtime, path, timestamp FROM bitrot WHERE hash=?',
|
||||
(new_sha1,))
|
||||
rows = cur.fetchall()
|
||||
for row in rows:
|
||||
stored_mtime, stored_path, stored_ts = row
|
||||
if os.path.exists(stored_path):
|
||||
# file still exists, move on
|
||||
continue
|
||||
|
||||
try: # if the path isn't in the database
|
||||
found = [path for path in hashes[new_sha1] if path not in paths]
|
||||
renamed = found.pop()
|
||||
# update the path in the database
|
||||
cur.execute(
|
||||
'UPDATE bitrot SET mtime=?, path=?, timestamp=? WHERE path=?',
|
||||
(new_mtime, new_path, ts(), stored_path),
|
||||
(new_mtime, new_path, ts(), renamed),
|
||||
)
|
||||
|
||||
return stored_path
|
||||
|
||||
# no rename, just a new file with the same hash
|
||||
cur.execute(
|
||||
'INSERT INTO bitrot VALUES (?, ?, ?, ?)',
|
||||
(new_path, new_mtime, new_sha1, ts()),
|
||||
)
|
||||
return new_path
|
||||
return renamed
|
||||
|
||||
# From hashes[new_sha1] or found.pop()
|
||||
except (KeyError,IndexError):
|
||||
cur.execute(
|
||||
'INSERT INTO bitrot VALUES (?, ?, ?, ?)',
|
||||
(new_path, new_mtime, new_sha1, ts()),
|
||||
)
|
||||
return new_path
|
||||
|
||||
def get_path(directory=b'.', ext=b'db'):
|
||||
"""Compose the path to the selected bitrot file."""
|
||||
|
220
tests/test-bitrot.bats
Executable file
220
tests/test-bitrot.bats
Executable file
@ -0,0 +1,220 @@
|
||||
#!/usr/bin/env bats
|
||||
|
||||
load test_helper
|
||||
|
||||
|
||||
#change it to your testing bitrot
|
||||
cmd=~/Clones/bitrot/src/bitrot.py
|
||||
|
||||
# cmd=bitrot
|
||||
|
||||
test_dir=/tmp/bitrot_dir-$USER
|
||||
mkdir -p $test_dir
|
||||
cd $test_dir || exit
|
||||
|
||||
###########
|
||||
# BASIC #
|
||||
###########
|
||||
|
||||
@test "bitrot detects new files in a tree dir" {
|
||||
mkdir -p notemptydirs/dir2/
|
||||
touch notemptydirs/dir2/new-file-{a,b}.txt
|
||||
echo $RANDOM >> notemptydirs/dir2/new-file-b.txt
|
||||
run $cmd -v
|
||||
# check_fail "${lines[@]}"
|
||||
|
||||
(( $status == 0 ))
|
||||
# [[ ${lines[0]} = "Finished. 0.00 MiB of data read. 0 errors found." ]]
|
||||
[[ ${lines[1]} = "2 entries in the database. 2 entries new:" ]]
|
||||
[[ ${lines[2]} = " ./notemptydirs/dir2/new-file-a.txt" ]]
|
||||
[[ ${lines[3]} = " ./notemptydirs/dir2/new-file-b.txt" ]]
|
||||
[[ ${lines[4]} = "Updating bitrot.sha512... done." ]]
|
||||
|
||||
}
|
||||
|
||||
|
||||
@test "bitrot detects modified files in a tree dir" {
|
||||
sleep 1
|
||||
echo $RANDOM >> notemptydirs/dir2/new-file-a.txt
|
||||
run $cmd -v
|
||||
# check_fail "${lines[@]}"
|
||||
|
||||
(( $status == 0 ))
|
||||
[[ ${lines[0]} = "Checking bitrot.db integrity... ok." ]]
|
||||
# [[ ${lines[1]} = "Finished. 0.00 MiB of data read. 0 errors found." ]]
|
||||
[[ ${lines[2]} = "2 entries in the database. 1 entries updated:" ]]
|
||||
[[ ${lines[3]} = " ./notemptydirs/dir2/new-file-a.txt" ]]
|
||||
[[ ${lines[4]} = "Updating bitrot.sha512... done." ]]
|
||||
|
||||
}
|
||||
|
||||
@test "bitrot detects renamed files in a tree dir" {
|
||||
sleep 1
|
||||
mv notemptydirs/dir2/new-file-a.txt notemptydirs/dir2/new-file-a.txt2
|
||||
run $cmd -v
|
||||
# check_fail "${lines[@]}"
|
||||
|
||||
(( $status == 0 ))
|
||||
[[ ${lines[0]} = "Checking bitrot.db integrity... ok." ]]
|
||||
# [[ ${lines[1]} = "Finished. 0.00 MiB of data read. 0 errors found." ]]
|
||||
[[ ${lines[2]} = "2 entries in the database. 1 entries renamed:" ]]
|
||||
[[ ${lines[3]} = " from ./notemptydirs/dir2/new-file-a.txt to ./notemptydirs/dir2/new-file-a.txt2" ]]
|
||||
[[ ${lines[4]} = "Updating bitrot.sha512... done." ]]
|
||||
|
||||
}
|
||||
|
||||
@test "bitrot detects delete files in a tree dir" {
|
||||
sleep 1
|
||||
rm notemptydirs/dir2/new-file-a.txt2
|
||||
run $cmd -v
|
||||
# check_fail "${lines[@]}"
|
||||
|
||||
(( $status == 0 ))
|
||||
[[ ${lines[0]} = "Checking bitrot.db integrity... ok." ]]
|
||||
# [[ ${lines[1]} = "Finished. 0.00 MiB of data read. 0 errors found." ]]
|
||||
[[ ${lines[2]} = "1 entries in the database. 1 entries missing:" ]]
|
||||
[[ ${lines[3]} = " ./notemptydirs/dir2/new-file-a.txt2" ]]
|
||||
[[ ${lines[4]} = "Updating bitrot.sha512... done." ]]
|
||||
|
||||
}
|
||||
|
||||
|
||||
@test "bitrot detects new files and modified in a tree dir " {
|
||||
sleep 1
|
||||
touch more-files-{a,b,c,d,e,f,g}.txt
|
||||
echo $RANDOM >> notemptydirs/dir2/new-file-b.txt
|
||||
run $cmd -v
|
||||
#check_fail "${lines[@]}"
|
||||
|
||||
(( $status == 0 ))
|
||||
|
||||
# [[ ${lines[1]} = "Finished. 0.00 MiB of data read. 0 errors found." ]]
|
||||
[[ ${lines[2]} = "8 entries in the database. 7 entries new:" ]]
|
||||
[[ ${lines[3]} = " ./more-files-a.txt" ]]
|
||||
[[ ${lines[4]} = " ./more-files-b.txt" ]]
|
||||
[[ ${lines[5]} = " ./more-files-c.txt" ]]
|
||||
[[ ${lines[6]} = " ./more-files-d.txt" ]]
|
||||
[[ ${lines[7]} = " ./more-files-e.txt" ]]
|
||||
[[ ${lines[8]} = " ./more-files-f.txt" ]]
|
||||
[[ ${lines[9]} = " ./more-files-g.txt" ]]
|
||||
[[ ${lines[10]} = "1 entries updated:" ]]
|
||||
[[ ${lines[11]} = " ./notemptydirs/dir2/new-file-b.txt" ]]
|
||||
[[ ${lines[12]} = "Updating bitrot.sha512... done." ]]
|
||||
}
|
||||
|
||||
@test "bitrot detects new files, modified, deleted and moved in a tree dir " {
|
||||
sleep 1
|
||||
for fil in {a,b,c,d,e,f,g}; do
|
||||
echo $RANDOM >> notemptydirs/pl-more-files-$fil.txt
|
||||
done
|
||||
echo $RANDOM >> notemptydirs/dir2/new-file-b.txt
|
||||
mv more-files-a.txt more-files-a.txt2
|
||||
rm more-files-g.txt
|
||||
run $cmd -v
|
||||
|
||||
(( $status == 0 ))
|
||||
|
||||
# [[ ${lines[1]} = "Finished. 0.00 MiB of data read. 0 errors found." ]]
|
||||
[[ ${lines[2]} = "14 entries in the database. 7 entries new:" ]]
|
||||
[[ ${lines[3]} = " ./notemptydirs/pl-more-files-a.txt" ]]
|
||||
[[ ${lines[4]} = " ./notemptydirs/pl-more-files-b.txt" ]]
|
||||
[[ ${lines[5]} = " ./notemptydirs/pl-more-files-c.txt" ]]
|
||||
[[ ${lines[6]} = " ./notemptydirs/pl-more-files-d.txt" ]]
|
||||
[[ ${lines[7]} = " ./notemptydirs/pl-more-files-e.txt" ]]
|
||||
[[ ${lines[8]} = " ./notemptydirs/pl-more-files-f.txt" ]]
|
||||
[[ ${lines[9]} = " ./notemptydirs/pl-more-files-g.txt" ]]
|
||||
[[ ${lines[10]} = "1 entries updated:" ]]
|
||||
[[ ${lines[11]} = " ./notemptydirs/dir2/new-file-b.txt" ]]
|
||||
[[ ${lines[12]} = "1 entries renamed:" ]]
|
||||
[[ ${lines[13]} = " from ./more-files-a.txt to ./more-files-a.txt2" ]]
|
||||
[[ ${lines[14]} = "1 entries missing:" ]]
|
||||
[[ ${lines[15]} = " ./more-files-g.txt" ]]
|
||||
[[ ${lines[16]} = "Updating bitrot.sha512... done." ]]
|
||||
}
|
||||
|
||||
|
||||
@test "bitrot detects new files, modified, deleted and moved in a tree dir 2" {
|
||||
sleep 1
|
||||
for fil in {a,b,c,d,e,f,g}; do
|
||||
echo $RANDOM >> notemptydirs/pl2-more-files-$fil.txt
|
||||
done
|
||||
echo $RANDOM >> notemptydirs/pl-more-files-a.txt
|
||||
|
||||
mv notemptydirs/pl-more-files-b.txt notemptydirs/pl-more-files-b.txt2
|
||||
cp notemptydirs/pl-more-files-g.txt notemptydirs/pl2-more-files-g.txt2
|
||||
cp notemptydirs/pl-more-files-d.txt notemptydirs/pl2-more-files-d.txt2
|
||||
|
||||
rm more-files-f.txt notemptydirs/pl-more-files-c.txt
|
||||
|
||||
run $cmd -v
|
||||
|
||||
# check_fail "${lines[@]}"
|
||||
|
||||
(( $status == 0 ))
|
||||
|
||||
# [[ ${lines[1]} = "Finished. 0.00 MiB of data read. 0 errors found." ]]
|
||||
[[ ${lines[2]} = "21 entries in the database. 9 entries new:" ]]
|
||||
[[ ${lines[3]} = " ./notemptydirs/pl2-more-files-a.txt" ]]
|
||||
[[ ${lines[4]} = " ./notemptydirs/pl2-more-files-b.txt" ]]
|
||||
[[ ${lines[5]} = " ./notemptydirs/pl2-more-files-c.txt" ]]
|
||||
[[ ${lines[6]} = " ./notemptydirs/pl2-more-files-d.txt" ]]
|
||||
[[ ${lines[7]} = " ./notemptydirs/pl2-more-files-d.txt2" ]]
|
||||
[[ ${lines[8]} = " ./notemptydirs/pl2-more-files-e.txt" ]]
|
||||
[[ ${lines[9]} = " ./notemptydirs/pl2-more-files-f.txt" ]]
|
||||
[[ ${lines[10]} = " ./notemptydirs/pl2-more-files-g.txt" ]]
|
||||
[[ ${lines[11]} = " ./notemptydirs/pl2-more-files-g.txt2" ]]
|
||||
[[ ${lines[12]} = "1 entries updated:" ]]
|
||||
[[ ${lines[13]} = " ./notemptydirs/pl-more-files-a.txt" ]]
|
||||
[[ ${lines[14]} = "1 entries renamed:" ]]
|
||||
[[ ${lines[15]} = " from ./notemptydirs/pl-more-files-b.txt to ./notemptydirs/pl-more-files-b.txt2" ]]
|
||||
[[ ${lines[16]} = "2 entries missing:" ]]
|
||||
[[ ${lines[17]} = " ./more-files-f.txt" ]]
|
||||
[[ ${lines[18]} = " ./notemptydirs/pl-more-files-c.txt" ]]
|
||||
[[ ${lines[19]} = "Updating bitrot.sha512... done." ]]
|
||||
}
|
||||
|
||||
|
||||
@test "bitrot can operate with 3278 files easily in a dir" {
|
||||
sleep 1
|
||||
mkdir -p alotfiles/here; cd alotfiles/here
|
||||
#create a 320KB file
|
||||
dd if=/dev/urandom of=masterfile bs=1 count=327680
|
||||
#split it in 3277 files (instantly) + masterfile = 3278
|
||||
split -b 100 -a 10 masterfile
|
||||
cd $test_dir
|
||||
run $cmd
|
||||
|
||||
(( $status == 0 ))
|
||||
[[ ${lines[2]} = "3299 entries in the database, 3278 new, 0 updated, 0 renamed, 0 missing." ]]
|
||||
|
||||
}
|
||||
|
||||
@test "bitrot can operate with 3278 files easily in a dir 2 " {
|
||||
sleep 1
|
||||
mv alotfiles/here alotfiles/here-moved
|
||||
run $cmd
|
||||
#check_fail "${lines[@]}"
|
||||
|
||||
(( $status == 0 ))
|
||||
[[ ${lines[2]} = "3299 entries in the database, 0 new, 0 updated, 3278 renamed, 0 missing." ]]
|
||||
|
||||
}
|
||||
|
||||
@test "bitrot can detetect a bitrot in a dir ! " {
|
||||
sleep 1
|
||||
generate_bitrot ./bitrot-file 10 2 $cmd
|
||||
run $cmd -q
|
||||
|
||||
#check_fail "${lines[@]}"
|
||||
|
||||
(( $status == 1 ))
|
||||
[[ ${lines[0]} = *"error: SHA1 mismatch for ./bitrot-file: expected"* ]]
|
||||
[[ ${lines[1]} = "error: There were 1 errors found." ]]
|
||||
}
|
||||
|
||||
|
||||
@test "Clean everything" {
|
||||
run chmod -f a+w *
|
||||
\rm -rf * $test_dir $BITROT_BACKUPS
|
||||
}
|
||||
|
61
tests/test_helper.bash
Normal file
61
tests/test_helper.bash
Normal file
@ -0,0 +1,61 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# LC_ALL=en_US.UTF-8
|
||||
# LANGUAGE=en_US.UTF-8
|
||||
LANG=C
|
||||
|
||||
check_fail() {
|
||||
local temp=/tmp/bats.log
|
||||
> $temp
|
||||
for line; do
|
||||
echo "$line" >> $temp
|
||||
done
|
||||
# cat /tmp/.bitrot.log >> $temp
|
||||
}
|
||||
|
||||
|
||||
generate_bitrot() {
|
||||
local dest=$1 temp=/tmp/temp-base
|
||||
local -i count=$(($2*100)) percent=${3:-5}
|
||||
local cmd=$4
|
||||
mkdir -p "${dest%/*}"
|
||||
local dir_base=${dest%%/*}
|
||||
touch "$dest" $temp
|
||||
#let's make sure they shared the same timestamp
|
||||
touch "$dest" -r $temp
|
||||
|
||||
dd if=/dev/zero of="$dest" bs=1k count=$count &>/dev/null
|
||||
run $cmd
|
||||
#modify it and change modify date to base-file, simulate real bitrot so
|
||||
dd seek=1k if=/dev/urandom of="$dest" bs=1k count=$((count*percent/100)) conv=notrunc &>/dev/null
|
||||
touch "$dest" -r $temp
|
||||
\rm -f $tmp
|
||||
run $cmd
|
||||
}
|
||||
|
||||
generate_bitrots() {
|
||||
local dest=$1 dest2=$2 temp=/tmp/temp-base
|
||||
local -i count=$(($3*100)) percent=${4:-5}
|
||||
mkdir -p "${dest%/*}"
|
||||
mkdir -p "${dest2%/*}"
|
||||
local dir_base=${dest%/*}
|
||||
local dir_base2=${dest2%/*}
|
||||
touch "$dest2" "$dest" $temp
|
||||
#let's make sure they shared the same timestamp
|
||||
touch "$dest" -r $temp
|
||||
touch "$dest2" -r $temp
|
||||
|
||||
dd if=/dev/zero of="$dest" bs=1k count=$count &>/dev/null
|
||||
dd if=/dev/zero of="$dest2" bs=1k count=$count &>/dev/null
|
||||
run $r "$dir_base" "$dir_base2"
|
||||
#modify it and change modify date to base-file, simulate bitrot so
|
||||
dd seek=1k if=/dev/urandom of="$dest" bs=1k count=$((count*percent/100)) conv=notrunc &>/dev/null
|
||||
dd seek=1k if=/dev/urandom of="$dest2" bs=1k count=$((count*percent/100)) conv=notrunc &>/dev/null
|
||||
touch "$dest" -r $temp
|
||||
touch "$dest2" -r $temp
|
||||
\rm -f $tmp
|
||||
echo $status > /tmp/status
|
||||
run $r "$dir_base" "$dir_base2"
|
||||
echo $status >> /tmp/status
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user