add sha512, see #3
This commit is contained in:
parent
457d38b19b
commit
d0c78ec3ee
29
README.md
29
README.md
@ -39,22 +39,23 @@ Run `chkbit -u PATH` to create/update the chkbit index.
|
||||
chkbit will
|
||||
|
||||
- create a `.chkbit` index in every subdirectory of the path it was given.
|
||||
- update the index with md5 hashes for every file.
|
||||
- update the index with md5/sha512 hashes for every file.
|
||||
- report damage for files that failed the integrity check since the last run (check the exit status).
|
||||
|
||||
Run `chkbit PATH` to verify only.
|
||||
|
||||
```
|
||||
usage: chkbit.py [-h] [-u] [-f] [-i] [-w N] [-q] [-v] [PATH [PATH ...]]
|
||||
usage: chkbit.py [-h] [-u] [--algo ALGO] [-f] [-i] [-w N] [-q] [-v] [PATH ...]
|
||||
|
||||
Checks the data integrity of your files. See https://github.com/laktak/chkbit-py
|
||||
|
||||
positional arguments:
|
||||
PATH directories to check
|
||||
|
||||
optional arguments:
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
-u, --update update indices (without this chkbit will only verify files)
|
||||
--algo ALGO hash algorithm: md5, sha512
|
||||
-f, --force force update of damaged items
|
||||
-i, --verify-index verify files in the index only (will not report new files)
|
||||
-w N, --workers N number of workers to use, default=5
|
||||
@ -112,12 +113,28 @@ The disadvantage is obviously that you get hidden `.chkbit` files in your conten
|
||||
|
||||
chkbit operates on files.
|
||||
|
||||
When run for the first time it records a md5 hash of the file contents as well as the file modification time.
|
||||
When run for the first time it records a hash of the file contents as well as the file modification time.
|
||||
|
||||
When you run it again it first checks the modification time,
|
||||
|
||||
- if the time changed (because you made an edit) it records a new md5 hash.
|
||||
- otherwise it will compare the current md5 to the recorded value and report an error if they do not match.
|
||||
- if the time changed (because you made an edit) it records a new hash.
|
||||
- otherwise it will compare the current hash to the recorded value and report an error if they do not match.
|
||||
|
||||
### I wish to use a stronger hash algorithm
|
||||
|
||||
chkbit now supports sha512. You can specify it with `--algo sha512`.
|
||||
|
||||
Note that existing index files will use the hash that they were created with. If you wish to update all hashes you need to delete your existing indexes first.
|
||||
|
||||
### How can I delete the index files?
|
||||
|
||||
List them with
|
||||
|
||||
```
|
||||
find . -name .chkbit
|
||||
```
|
||||
|
||||
and add `-delete` to delete.
|
||||
|
||||
### Can I test if chkbit is working correctly?
|
||||
|
||||
|
13
chkbit/context.py
Normal file
13
chkbit/context.py
Normal file
@ -0,0 +1,13 @@
|
||||
import hashlib
|
||||
|
||||
|
||||
class Context:
|
||||
def __init__(self, verify_index, update, force, hash_algo):
|
||||
|
||||
self.verify_index = verify_index
|
||||
self.update = update
|
||||
self.force = force
|
||||
self.hash_algo = hash_algo
|
||||
|
||||
if hash_algo not in ["md5", "sha512"]:
|
||||
raise Exception(f"{hash_algo} is unknown.")
|
@ -1,18 +1,25 @@
|
||||
import hashlib
|
||||
|
||||
|
||||
BLOCKSIZE = 2 ** 10 * 128 # kb
|
||||
BLOCKSIZE = 2**10 * 128 # kb
|
||||
|
||||
|
||||
def hashfile(path):
|
||||
md5 = hashlib.md5()
|
||||
def hashfile(path, hash_algo=None):
|
||||
|
||||
if not hash_algo or hash_algo == "md5":
|
||||
h = hashlib.md5()
|
||||
elif hash_algo == "sha512":
|
||||
h = hashlib.sha512()
|
||||
else:
|
||||
raise Exception(f"{hash_algo} is unknown.")
|
||||
|
||||
with open(path, "rb") as f:
|
||||
while True:
|
||||
buf = f.read(BLOCKSIZE)
|
||||
if len(buf) <= 0:
|
||||
break
|
||||
md5.update(buf)
|
||||
return md5.hexdigest()
|
||||
h.update(buf)
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def hashtext(text):
|
||||
|
@ -58,12 +58,22 @@ class Index:
|
||||
self.log(stat, os.path.join(self.path, name))
|
||||
|
||||
# calc new hashes for this index
|
||||
def update(self):
|
||||
def update(self, context):
|
||||
for name in self.files:
|
||||
if self.should_ignore(name):
|
||||
self._log(Stat.SKIP, name)
|
||||
continue
|
||||
self.new[name] = self._calc_file(name)
|
||||
|
||||
a = context.hash_algo
|
||||
# check previously used hash
|
||||
if name in self.old:
|
||||
old = self.old[name]
|
||||
if "md5" in old:
|
||||
a = "md5" # legacy structure
|
||||
self.old[name] = {"mod": old["mod"], "a": a, "h": old["md5"]}
|
||||
elif "a" in old:
|
||||
a = old["a"]
|
||||
self.new[name] = self._calc_file(name, a)
|
||||
|
||||
# check/update the index (old vs new)
|
||||
def check_fix(self, force):
|
||||
@ -77,7 +87,7 @@ class Index:
|
||||
b = self.new[name]
|
||||
amod = a["mod"]
|
||||
bmod = b["mod"]
|
||||
if a["md5"] == b["md5"]:
|
||||
if a["h"] == b["h"]:
|
||||
# ok, if the content stays the same the mod time does not matter
|
||||
self._log(Stat.OK, name)
|
||||
if amod != bmod:
|
||||
@ -101,11 +111,11 @@ class Index:
|
||||
self._log(Stat.WARN_OLD, name)
|
||||
self._setmod()
|
||||
|
||||
def _calc_file(self, name):
|
||||
def _calc_file(self, name, a):
|
||||
path = os.path.join(self.path, name)
|
||||
info = os.stat(path)
|
||||
mtime = int(info.st_mtime * 1000)
|
||||
return {"mod": mtime, "md5": hashfile(path)}
|
||||
return {"mod": mtime, "a": a, "h": hashfile(path, a)}
|
||||
|
||||
def save(self):
|
||||
if self.modified:
|
||||
@ -114,7 +124,7 @@ class Index:
|
||||
data["idx_hash"] = hashtext(text)
|
||||
|
||||
with open(self.idx_file, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f)
|
||||
json.dump(data, f, separators=(",", ":"))
|
||||
self.modified = False
|
||||
return True
|
||||
else:
|
||||
@ -129,7 +139,11 @@ class Index:
|
||||
if "data" in data:
|
||||
# extract old format from js version
|
||||
for item in json.loads(data["data"]):
|
||||
self.old[item["name"]] = {"mod": item["mod"], "md5": item["md5"]}
|
||||
self.old[item["name"]] = {
|
||||
"mod": item["mod"],
|
||||
"a": "md5",
|
||||
"h": item["md5"],
|
||||
}
|
||||
elif "idx" in data:
|
||||
self.old = data["idx"]
|
||||
text = json.dumps(self.old, separators=(",", ":"))
|
||||
|
@ -6,11 +6,11 @@ from chkbit import Index, Stat
|
||||
|
||||
|
||||
class IndexThread:
|
||||
def __init__(self, idx, args, res_queue, todo_queue):
|
||||
def __init__(self, idx, context, res_queue, todo_queue):
|
||||
self.idx = idx
|
||||
self.verify_index_only = args.verify_index
|
||||
self.update = args.update and not self.verify_index_only
|
||||
self.force = args.force
|
||||
self.verify_index_only = context.verify_index
|
||||
self.update = context.update and not self.verify_index_only
|
||||
self.context = context
|
||||
self.todo_queue = todo_queue
|
||||
self.res_queue = res_queue
|
||||
self.t = threading.Thread(target=self.run)
|
||||
@ -40,10 +40,10 @@ class IndexThread:
|
||||
if e.load() or not self.verify_index_only:
|
||||
|
||||
# calc the new hashes
|
||||
e.update()
|
||||
e.update(self.context)
|
||||
|
||||
# compare
|
||||
e.check_fix(self.force)
|
||||
e.check_fix(self.context.force)
|
||||
|
||||
# save if update is set
|
||||
if self.update:
|
||||
|
@ -4,7 +4,7 @@ import time
|
||||
import argparse
|
||||
import queue
|
||||
import threading
|
||||
from chkbit import IndexThread, Stat
|
||||
from chkbit import Context, IndexThread, Stat
|
||||
|
||||
STATUS_CODES = """
|
||||
Status codes:
|
||||
@ -63,6 +63,13 @@ class Main:
|
||||
help="update indices (without this chkbit will only verify files)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--algo",
|
||||
type=str,
|
||||
default="md5",
|
||||
help="hash algorithm: md5, sha512",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-f", "--force", action="store_true", help="force update of damaged items"
|
||||
)
|
||||
@ -90,6 +97,7 @@ class Main:
|
||||
action="store_true",
|
||||
help="quiet, don't show progress/information",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-v", "--verbose", action="store_true", help="verbose output"
|
||||
)
|
||||
@ -120,9 +128,16 @@ class Main:
|
||||
for path in self.args.paths:
|
||||
todo_queue.put(path)
|
||||
|
||||
context = Context(
|
||||
self.args.verify_index,
|
||||
self.args.update,
|
||||
self.args.force,
|
||||
self.args.algo,
|
||||
)
|
||||
|
||||
# start indexing
|
||||
workers = [
|
||||
IndexThread(idx, self.args, self.res_queue, todo_queue)
|
||||
IndexThread(idx, context, self.res_queue, todo_queue)
|
||||
for idx in range(self.args.workers)
|
||||
]
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user