.chkbitignore support for subdirectories #8
This commit is contained in:
parent
0f55a94658
commit
df44bc7bf1
19
README.md
19
README.md
@ -74,7 +74,7 @@ chkbit will
|
||||
Run `chkbit PATH` to verify only.
|
||||
|
||||
```
|
||||
usage: chkbit [-h] [-u] [--algo ALGO] [-f] [-s] [--index-name NAME] [--ignore-name NAME] [-w N] [--plain] [-q] [-v] [PATH ...]
|
||||
usage: chkbit [-h] [-u] [--show-ignored-only] [--algo ALGO] [-f] [-s] [--index-name NAME] [--ignore-name NAME] [-w N] [--plain] [-q] [-v] [PATH ...]
|
||||
|
||||
Checks the data integrity of your files. See https://github.com/laktak/chkbit-py
|
||||
|
||||
@ -84,6 +84,7 @@ positional arguments:
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
-u, --update update indices (without this chkbit will verify files in readonly mode)
|
||||
--show-ignored-only only show ignored files
|
||||
--algo ALGO hash algorithm: md5, sha512, blake3 (default: blake3)
|
||||
-f, --force force update of damaged items
|
||||
-s, --skip-symlinks do not follow symlinks
|
||||
@ -94,6 +95,12 @@ options:
|
||||
-q, --quiet quiet, don't show progress/information
|
||||
-v, --verbose verbose output
|
||||
|
||||
.chkbitignore rules:
|
||||
each line should contain exactly one name
|
||||
you may use Unix shell-style wildcards (see README)
|
||||
lines starting with `#` are skipped
|
||||
lines starting with `/` are only applied to the current directory
|
||||
|
||||
Status codes:
|
||||
DMG: error, data damage detected
|
||||
EIX: error, index damaged
|
||||
@ -101,7 +108,7 @@ Status codes:
|
||||
new: new file
|
||||
upd: file updated
|
||||
ok : check ok
|
||||
skp: skipped (see .chkbitignore)
|
||||
ign: ignored (see .chkbitignore)
|
||||
EXC: internal exception
|
||||
```
|
||||
|
||||
@ -123,9 +130,13 @@ You should
|
||||
Add a `.chkbitignore` file containing the names of the files/directories you wish to ignore
|
||||
|
||||
- each line should contain exactly one name
|
||||
- you may use [Unix shell-style wildcards](https://docs.python.org/3/library/fnmatch.html)
|
||||
- `*` matches everything
|
||||
- `?` matches any single character
|
||||
- `[seq]` matches any character in seq
|
||||
- `[!seq]` matches any character not in seq
|
||||
- lines starting with `#` are skipped
|
||||
- you may use [Unix shell-style wildcards](https://docs.python.org/3.8/library/fnmatch.html)
|
||||
- at the moment does not allow to match files in subdirectories (PR welcome)
|
||||
- lines starting with `/` are only applied to the current directory
|
||||
|
||||
## FAQ
|
||||
|
||||
|
@ -1,4 +1,6 @@
|
||||
from chkbit.status import Status
|
||||
from chkbit.ignore import Ignore
|
||||
from chkbit.input_item import InputItem
|
||||
from chkbit.context import Context
|
||||
from chkbit.hashfile import hashfile, hashtext
|
||||
from chkbit.index import Index
|
||||
|
@ -1,5 +1,8 @@
|
||||
from __future__ import annotations
|
||||
import queue
|
||||
from chkbit import Status
|
||||
import chkbit
|
||||
from typing import Optional
|
||||
from chkbit import InputItem
|
||||
|
||||
|
||||
class Context:
|
||||
@ -9,6 +12,7 @@ class Context:
|
||||
num_workers=5,
|
||||
force=False,
|
||||
update=False,
|
||||
show_ignored_only=False,
|
||||
hash_algo="blake3",
|
||||
skip_symlinks=False,
|
||||
index_filename=".chkbit",
|
||||
@ -17,19 +21,30 @@ class Context:
|
||||
self.num_workers = num_workers
|
||||
self.force = force
|
||||
self.update = update
|
||||
self.show_ignored_only = show_ignored_only
|
||||
self.hash_algo = hash_algo
|
||||
self.skip_symlinks = skip_symlinks
|
||||
self.index_filename = index_filename
|
||||
self.ignore_filename = ignore_filename
|
||||
|
||||
# the input queue is used to distribute the work
|
||||
# to the index threads
|
||||
self.input_queue = queue.Queue()
|
||||
|
||||
self.result_queue = queue.Queue()
|
||||
self.hit_queue = queue.Queue()
|
||||
|
||||
if hash_algo not in ["md5", "sha512", "blake3"]:
|
||||
raise Exception(f"{hash_algo} is unknown.")
|
||||
|
||||
def log(self, stat: Status, path: str):
|
||||
def log(self, stat: chkbit.Status, path: str):
|
||||
self.result_queue.put((0, stat, path))
|
||||
|
||||
def hit(self, *, cfiles: int = 0, cbytes: int = 0):
|
||||
self.result_queue.put((1, cfiles, cbytes))
|
||||
|
||||
def add_input(self, path: str, *, ignore: Optional[chkbit.Ignore] = None):
|
||||
self.input_queue.put(InputItem(path, ignore=ignore))
|
||||
|
||||
def end_input(self):
|
||||
self.input_queue.put(None)
|
||||
|
@ -29,7 +29,7 @@ def hashfile(path: str, hash_algo: str, *, hit: Callable[[str], None]):
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def hashtext(text):
|
||||
def hashtext(text: str):
|
||||
md5 = hashlib.md5()
|
||||
md5.update(text.encode("utf-8"))
|
||||
return md5.hexdigest()
|
||||
|
56
chkbit/ignore.py
Normal file
56
chkbit/ignore.py
Normal file
@ -0,0 +1,56 @@
|
||||
from __future__ import annotations
|
||||
import fnmatch
|
||||
import os
|
||||
import sys
|
||||
import chkbit
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class Ignore:
|
||||
def __init__(
|
||||
self,
|
||||
context: chkbit.Context,
|
||||
path: str,
|
||||
*,
|
||||
parent_ignore: Optional[chkbit.Ignore],
|
||||
):
|
||||
self.parent_ignore = parent_ignore
|
||||
self.context = context
|
||||
self.path = path
|
||||
self.name = os.path.basename(path) + "/"
|
||||
self.ignore = []
|
||||
self.load_ignore()
|
||||
|
||||
@property
|
||||
def ignore_filepath(self):
|
||||
return os.path.join(self.path, self.context.ignore_filename)
|
||||
|
||||
def load_ignore(self):
|
||||
if not os.path.exists(self.ignore_filepath):
|
||||
return
|
||||
with open(self.ignore_filepath, "r", encoding="utf-8") as f:
|
||||
text = f.read()
|
||||
|
||||
self.ignore = list(
|
||||
filter(
|
||||
lambda x: x and x[0] != "#" and len(x.strip()) > 0, text.splitlines()
|
||||
)
|
||||
)
|
||||
|
||||
def should_ignore(self, name: str, *, fullname: str = None):
|
||||
for ignore in self.ignore:
|
||||
if ignore.startswith("/"):
|
||||
if fullname:
|
||||
continue
|
||||
else:
|
||||
ignore = ignore[1:]
|
||||
if fnmatch.fnmatch(name, ignore):
|
||||
return True
|
||||
if fullname and fnmatch.fnmatch(fullname, ignore):
|
||||
return True
|
||||
if self.parent_ignore:
|
||||
return self.parent_ignore.should_ignore(
|
||||
fullname or name, fullname=self.name + (fullname or name)
|
||||
)
|
||||
return False
|
@ -1,40 +1,38 @@
|
||||
from __future__ import annotations
|
||||
import fnmatch
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import json
|
||||
import chkbit
|
||||
from chkbit import hashfile, hashtext, Status
|
||||
from typing import Optional
|
||||
|
||||
VERSION = 2 # index version
|
||||
|
||||
|
||||
class Index:
|
||||
def __init__(self, context, path, files, *, readonly=False):
|
||||
def __init__(
|
||||
self,
|
||||
context: chkbit.Context,
|
||||
path: str,
|
||||
files: list[str],
|
||||
*,
|
||||
readonly: bool = False,
|
||||
):
|
||||
self.context = context
|
||||
self.path = path
|
||||
self.files = files
|
||||
self.old = {}
|
||||
self.new = {}
|
||||
self.ignore = []
|
||||
self.load_ignore()
|
||||
self.updates = []
|
||||
self.modified = None
|
||||
self.readonly = readonly
|
||||
|
||||
@property
|
||||
def ignore_filepath(self):
|
||||
return os.path.join(self.path, self.context.ignore_filename)
|
||||
|
||||
@property
|
||||
def index_filepath(self):
|
||||
return os.path.join(self.path, self.context.index_filename)
|
||||
|
||||
def should_ignore(self, name):
|
||||
for ignore in self.ignore:
|
||||
if fnmatch.fnmatch(name, ignore):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _setmod(self, value=True):
|
||||
self.modified = value
|
||||
|
||||
@ -42,10 +40,10 @@ class Index:
|
||||
self.context.log(stat, os.path.join(self.path, name))
|
||||
|
||||
# calc new hashes for this index
|
||||
def update(self):
|
||||
def calc_hashes(self, *, ignore: Optional[chkbit.Ignore] = None):
|
||||
for name in self.files:
|
||||
if self.should_ignore(name):
|
||||
self._log(Status.SKIP, name)
|
||||
if ignore and ignore.should_ignore(name):
|
||||
self._log(Status.IGNORE, name)
|
||||
continue
|
||||
|
||||
a = self.context.hash_algo
|
||||
@ -65,8 +63,13 @@ class Index:
|
||||
else:
|
||||
self.new[name] = self._calc_file(name, a)
|
||||
|
||||
def show_ignored_only(self, ignore: chkbit.Ignore):
|
||||
for name in self.files:
|
||||
if ignore.should_ignore(name):
|
||||
self._log(Status.IGNORE, name)
|
||||
|
||||
# check/update the index (old vs new)
|
||||
def check_fix(self, force):
|
||||
def check_fix(self, force: bool):
|
||||
for name in self.new.keys():
|
||||
if not name in self.old:
|
||||
self._log(Status.NEW, name)
|
||||
@ -101,7 +104,7 @@ class Index:
|
||||
self._log(Status.WARN_OLD, name)
|
||||
self._setmod()
|
||||
|
||||
def _list_file(self, name, a):
|
||||
def _list_file(self, name: str, a: str):
|
||||
# produce a dummy entry for new files when the index is not updated
|
||||
return {
|
||||
"mod": None,
|
||||
@ -109,7 +112,7 @@ class Index:
|
||||
"h": None,
|
||||
}
|
||||
|
||||
def _calc_file(self, name, a):
|
||||
def _calc_file(self, name: str, a: str):
|
||||
path = os.path.join(self.path, name)
|
||||
info = os.stat(path)
|
||||
mtime = int(info.st_mtime * 1000)
|
||||
@ -158,15 +161,3 @@ class Index:
|
||||
self._setmod()
|
||||
self._log(Status.ERR_IDX, self.index_filepath)
|
||||
return True
|
||||
|
||||
def load_ignore(self):
|
||||
if not os.path.exists(self.ignore_filepath):
|
||||
return
|
||||
with open(self.ignore_filepath, "r", encoding="utf-8") as f:
|
||||
text = f.read()
|
||||
|
||||
self.ignore = list(
|
||||
filter(
|
||||
lambda x: x and x[0] != "#" and len(x.strip()) > 0, text.splitlines()
|
||||
)
|
||||
)
|
||||
|
@ -1,27 +1,29 @@
|
||||
from __future__ import annotations
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import threading
|
||||
from chkbit import Index, Status
|
||||
import chkbit
|
||||
from chkbit import Index, Status, Ignore
|
||||
|
||||
|
||||
class IndexThread:
|
||||
def __init__(self, thread_no, context, input_queue):
|
||||
def __init__(self, thread_no: int, context: chkbit.Context):
|
||||
self.thread_no = thread_no
|
||||
self.update = context.update
|
||||
self.context = context
|
||||
self.input_queue = input_queue
|
||||
self.input_queue = context.input_queue
|
||||
self.t = threading.Thread(target=self._run)
|
||||
self.t.daemon = True
|
||||
self.t.start()
|
||||
|
||||
def _process_root(self, parent):
|
||||
def _process_root(self, iitem: chkbit.InputItem):
|
||||
files = []
|
||||
dirs = []
|
||||
|
||||
# load files and subdirs
|
||||
for name in os.listdir(path=parent):
|
||||
path = os.path.join(parent, name)
|
||||
for name in os.listdir(path=iitem.path):
|
||||
path = os.path.join(iitem.path, name)
|
||||
if name[0] == ".":
|
||||
continue
|
||||
if os.path.isdir(path):
|
||||
@ -33,36 +35,42 @@ class IndexThread:
|
||||
files.append(name)
|
||||
|
||||
# load index
|
||||
index = Index(self.context, parent, files, readonly=not self.update)
|
||||
index = Index(self.context, iitem.path, files, readonly=not self.update)
|
||||
index.load()
|
||||
|
||||
# calc the new hashes
|
||||
index.update()
|
||||
# load ignore
|
||||
ignore = Ignore(self.context, iitem.path, parent_ignore=iitem.ignore)
|
||||
|
||||
# compare
|
||||
index.check_fix(self.context.force)
|
||||
if self.context.show_ignored_only:
|
||||
index.show_ignored_only(ignore)
|
||||
else:
|
||||
# calc the new hashes
|
||||
index.calc_hashes(ignore=ignore)
|
||||
|
||||
# save if update is set
|
||||
if self.update:
|
||||
if index.save():
|
||||
self.context.log(Status.UPDATE_INDEX, "")
|
||||
# compare
|
||||
index.check_fix(self.context.force)
|
||||
|
||||
# save if update is set
|
||||
if self.update:
|
||||
if index.save():
|
||||
self.context.log(Status.UPDATE_INDEX, "")
|
||||
|
||||
# process subdirs
|
||||
for name in dirs:
|
||||
if not index.should_ignore(name):
|
||||
self.input_queue.put(os.path.join(parent, name))
|
||||
if not ignore.should_ignore(name):
|
||||
self.context.add_input(os.path.join(iitem.path, name), ignore=ignore)
|
||||
else:
|
||||
self.context.log(Status.SKIP, name + "/")
|
||||
self.context.log(Status.IGNORE, name + "/")
|
||||
|
||||
def _run(self):
|
||||
while True:
|
||||
parent = self.input_queue.get()
|
||||
if parent is None:
|
||||
iitem = self.input_queue.get()
|
||||
if iitem is None:
|
||||
break
|
||||
try:
|
||||
self._process_root(parent)
|
||||
self._process_root(iitem)
|
||||
except Exception as e:
|
||||
self.context.log(Status.INTERNALEXCEPTION, f"{parent}: {e}")
|
||||
self.context.log(Status.INTERNALEXCEPTION, f"{iitem.path}: {e}")
|
||||
self.input_queue.task_done()
|
||||
|
||||
def join(self):
|
||||
|
9
chkbit/input_item.py
Normal file
9
chkbit/input_item.py
Normal file
@ -0,0 +1,9 @@
|
||||
from __future__ import annotations
|
||||
from typing import Optional
|
||||
import chkbit
|
||||
|
||||
|
||||
class InputItem:
|
||||
def __init__(self, path: str, *, ignore: Optional[chkbit.Ignore] = None):
|
||||
self.path = path
|
||||
self.ignore = ignore
|
@ -8,6 +8,6 @@ class Status(Enum):
|
||||
NEW = "new"
|
||||
UPDATE = "upd"
|
||||
OK = "ok "
|
||||
SKIP = "skp"
|
||||
IGNORE = "ign"
|
||||
INTERNALEXCEPTION = "EXC"
|
||||
UPDATE_INDEX = "iup"
|
||||
|
@ -10,7 +10,13 @@ from chkbit import Context, Status, IndexThread
|
||||
from chkbit_cli import CLI, Progress, RateCalc, sparkify
|
||||
|
||||
|
||||
STATUS_CODES = """
|
||||
EPILOG = """
|
||||
.chkbitignore rules:
|
||||
each line should contain exactly one name
|
||||
you may use Unix shell-style wildcards (see README)
|
||||
lines starting with `#` are skipped
|
||||
lines starting with `/` are only applied to the current directory
|
||||
|
||||
Status codes:
|
||||
DMG: error, data damage detected
|
||||
EIX: error, index damaged
|
||||
@ -18,7 +24,7 @@ Status codes:
|
||||
new: new file
|
||||
upd: file updated
|
||||
ok : check ok
|
||||
skp: skipped (see .chkbitignore)
|
||||
ign: ignored (see .chkbitignore)
|
||||
EXC: internal exception
|
||||
"""
|
||||
|
||||
@ -67,7 +73,7 @@ class Main:
|
||||
elif stat == Status.NEW:
|
||||
self.num_new += 1
|
||||
|
||||
if self.verbose or not stat in [Status.OK, Status.SKIP]:
|
||||
if self.verbose or not stat in [Status.OK, Status.IGNORE]:
|
||||
CLI.printline(stat.value, " ", path)
|
||||
|
||||
def _res_worker(self, context: Context):
|
||||
@ -113,18 +119,15 @@ class Main:
|
||||
print(self.total, end="\r")
|
||||
|
||||
def process(self, args):
|
||||
# the input queue is used to distribute the work
|
||||
# to the index threads
|
||||
input_queue = queue.Queue()
|
||||
|
||||
# put the initial paths into the queue
|
||||
for path in args.paths:
|
||||
input_queue.put(path)
|
||||
if args.update and args.show_ignored_only:
|
||||
print("Error: use either --update or --show-ignored-only!", file=sys.stderr)
|
||||
return None
|
||||
|
||||
context = Context(
|
||||
num_workers=args.workers,
|
||||
force=args.force,
|
||||
update=args.update,
|
||||
show_ignored_only=args.show_ignored_only,
|
||||
hash_algo=args.algo,
|
||||
skip_symlinks=args.skip_symlinks,
|
||||
index_filename=args.index_name,
|
||||
@ -132,10 +135,12 @@ class Main:
|
||||
)
|
||||
self.result_queue = context.result_queue
|
||||
|
||||
# put the initial paths into the queue
|
||||
for path in args.paths:
|
||||
context.add_input(path)
|
||||
|
||||
# start indexing
|
||||
workers = [
|
||||
IndexThread(i, context, input_queue) for i in range(context.num_workers)
|
||||
]
|
||||
workers = [IndexThread(i, context) for i in range(context.num_workers)]
|
||||
|
||||
# log the results from the workers
|
||||
res_worker = threading.Thread(target=self._res_worker, args=(context,))
|
||||
@ -143,11 +148,11 @@ class Main:
|
||||
res_worker.start()
|
||||
|
||||
# wait for work to finish
|
||||
input_queue.join()
|
||||
context.input_queue.join()
|
||||
|
||||
# signal workers to exit
|
||||
for worker in workers:
|
||||
input_queue.put(None)
|
||||
context.end_input()
|
||||
|
||||
# signal res_worker to exit
|
||||
self.result_queue.put(None)
|
||||
@ -231,7 +236,7 @@ class Main:
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="chkbit",
|
||||
description="Checks the data integrity of your files. See https://github.com/laktak/chkbit-py",
|
||||
epilog=STATUS_CODES,
|
||||
epilog=EPILOG,
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
)
|
||||
|
||||
@ -246,6 +251,10 @@ class Main:
|
||||
help="update indices (without this chkbit will verify files in readonly mode)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--show-ignored-only", action="store_true", help="only show ignored files"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--algo",
|
||||
type=str,
|
||||
@ -305,7 +314,7 @@ class Main:
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
self.verbose = args.verbose
|
||||
self.verbose = args.verbose or args.show_ignored_only
|
||||
if args.quiet:
|
||||
self.progress = Progress.Quiet
|
||||
elif not sys.stdout.isatty():
|
||||
@ -315,7 +324,8 @@ class Main:
|
||||
|
||||
if args.paths:
|
||||
context = self.process(args)
|
||||
self.print_result(context)
|
||||
if context and not context.show_ignored_only:
|
||||
self.print_result(context)
|
||||
else:
|
||||
parser.print_help()
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "chkbit"
|
||||
version = "3.0.2"
|
||||
version = "4.0.0"
|
||||
description = "chkbit checks the data integrity of your files"
|
||||
authors = [
|
||||
{name = "Christian Zangl", email = "laktak@cdak.net"},
|
||||
@ -8,7 +8,7 @@ authors = [
|
||||
dependencies = [
|
||||
"blake3>=0.3.4",
|
||||
]
|
||||
requires-python = ">=3.6.0"
|
||||
requires-python = ">=3.7.0"
|
||||
readme = "README.md"
|
||||
license = {file = "LICENSE"}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user