Compare commits

..

No commits in common. "master" and "v4.1.0" have entirely different histories.

49 changed files with 1290 additions and 1897 deletions

1
.github/FUNDING.yml vendored
View File

@ -1 +0,0 @@
github: laktak

92
.github/workflows/build.yml vendored Normal file
View File

@ -0,0 +1,92 @@
name: build
on: [push]
jobs:
build:
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.12'
- name: prep
shell: bash
run: |
python -m pip install --upgrade pip
python -m pip install pipenv pyinstaller==6.3.0
pipenv install
rm -rf build dist
- name: build
shell: bash
run: |
echo "RUNNER_OS: $RUNNER_OS"
# get path to venv site-packages (for blake3)
pipenv run python -c "import site; print(site.getsitepackages())"
SITEPKG=$(pipenv run python -c "import site; print(site.getsitepackages()[-1])")
pipenv run pyinstaller run.py --hidden-import chkbit --hidden-import chkbit_cli --onefile --name chkbit --console --paths $SITEPKG
cat build/chkbit/warn-chkbit.txt
cd dist; ls -l
if [ "$RUNNER_OS" == "Linux" ]; then
tar -czf chkbit-linux_amd64.tar.gz chkbit
elif [ "$RUNNER_OS" == "macOS" ]; then
tar -czf chkbit-macos_amd64.tar.gz chkbit
elif [ "$RUNNER_OS" == "Windows" ]; then
7z a -tzip chkbit-windows_amd64.zip chkbit.exe
else
echo 'unknown runner'
exit 1
fi
- name: artifact
uses: actions/upload-artifact@v4
if: runner.os == 'Linux'
with:
name: binary-${{ matrix.os }}
path: dist/chkbit*.tar.gz
- name: artifact
uses: actions/upload-artifact@v4
if: runner.os == 'macOS'
with:
name: binary-${{ matrix.os }}
path: dist/chkbit*.tar.gz
- name: artifact
uses: actions/upload-artifact@v4
if: runner.os == 'Windows'
with:
name: binary-${{ matrix.os }}
path: dist/chkbit*.zip
publish:
runs-on: ubuntu-latest
needs: build
if: ${{ startsWith(github.ref, 'refs/tags/v') }}
steps:
- name: get-artifacts
uses: actions/download-artifact@v4
with:
path: dist
merge-multiple: true
- name: list
shell: bash
run: |
find
ls -l dist
- name: publish-release
uses: softprops/action-gh-release@v1
with:
draft: true
files: dist/*

View File

@ -1,27 +0,0 @@
name: ci
on:
push:
branches: []
pull_request:
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version: "1.22"
- name: chkfmt
run: scripts/chkfmt
- name: tests
run: |
scripts/tests
- name: xbuild
run: scripts/xbuild

14
.github/workflows/lint.yml vendored Normal file
View File

@ -0,0 +1,14 @@
name: lint
on: [push, pull_request]
jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.12'
- uses: psf/black@stable

View File

@ -1,32 +0,0 @@
name: release
on:
push:
tags: ["v*"]
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version: "1.22"
- name: chkfmt
run: scripts/chkfmt
- name: tests
run: |
scripts/tests
- name: xbuild
run: version=${GITHUB_REF#$"refs/tags/v"} scripts/xbuild
- name: release
uses: softprops/action-gh-release@v2
with:
draft: true
files: dist/*

13
.gitignore vendored
View File

@ -1,3 +1,10 @@
# bin
/chkbit
dist
README.rst
build/
dist/
chkbit.spec
.cache/
.chkbit
.pytest_cache/
*.egg-info/
*.pyc
_*

10
Pipfile Normal file
View File

@ -0,0 +1,10 @@
[[source]]
url = "https://pypi.org/simple"
verify_ssl = true
name = "pypi"
# keep in sync with pyproject.toml
[packages]
blake3 = ">=0.3.4"
[dev-packages]

55
Pipfile.lock generated Normal file
View File

@ -0,0 +1,55 @@
{
"_meta": {
"hash": {
"sha256": "e8cd87a62cdc293b2cab0606525f4eb3bdfeb4f0373a64a5be10685b235d1078"
},
"pipfile-spec": 6,
"requires": {},
"sources": [
{
"name": "pypi",
"url": "https://pypi.org/simple",
"verify_ssl": true
}
]
},
"default": {
"blake3": {
"hashes": [
"sha256:01787135e4003c41e9a07f6d83396a54bb1ace07758f0a4a8d446699ab18c489",
"sha256:0d53c8f129e4f76dba7c255633403c3fa8d390f61fa09ea7a530c987e2c62de6",
"sha256:13d4830e3c0d178784588594cb6f15b1c905efbb848db0f6be2519f87f2407ac",
"sha256:1a2671602aad7d2078ccb1c2d9b670dd7b4733a452898d77dc63472dea7b6933",
"sha256:22ae74485e0148be2a751e0689e74c345d209a12a8bc6332067f887cc46148c8",
"sha256:24868e2cb41feeb37286981afcc214242adfeba6a40ba773daf45168e80f76e0",
"sha256:25fce3f5f8b69c8655864cbc2a210c4df4779c8bedcc71ef0e45823c510b26ba",
"sha256:269e1f20c412c5cc28db3461f24dcc6f5915cdf1335538a7146d92af8f001bb3",
"sha256:29ae9df9b7f2a08935cf24a9b6637327ac988f1f26e54e6b1b137a00ec57a35e",
"sha256:42136484a1df1a8ba7efc901b44b8ff78b7d3c99f59fe109dad1c23d15c7e9a5",
"sha256:448bc6b96139c9061c6882c66d0dabf1bba354e01ac865f38bff1e5a9ad11748",
"sha256:4b7ef354144a2a19d7dbbfebce11735f68154e5190f9cc53825237bdb1bb78af",
"sha256:4e0c86416cb05bfbb90c6dcbe3d670bc3280791746374456b342114adb43253b",
"sha256:4ee1b49badfcddabe9f0c557105c0efa003043efea5573873f764d9726526c26",
"sha256:4fee299071879a2983bd7e5c560e303ef063238c557d6b11c5d59b03cad847ad",
"sha256:56f2bd6893139c468cf6f700ef34b16f33ed58b036d0f3d5aeb35c4a9a00fb98",
"sha256:5cedb4b5c69e5c35d96b6f567152358977f906b822b097c2113f8c355ce7885a",
"sha256:6628f15a8d6fe39c729f4924c44248f9caf3aecdaa110b69b1c09db5d42be5b1",
"sha256:772899b8cc1af8703956d9c4c175318fca64edede7f0a7379db3b515925e0f34",
"sha256:a1affb1fad469bc453e9e73f7335ece80c90bd4ef533f07ea643a91a89f71d0c",
"sha256:a3b3c3d596bc35bd6a56ea8554d3bc9ba3bdbc1edfa0a889a7cffd3925eaf18a",
"sha256:a871b60ffbc61b9b487ff7e8f9f918cc1da24cb5b87a58c983b3b242e665dedc",
"sha256:ae1b8e6d584231ad32fb39920e4044f38f6f2d85ce64c433fadd8baf6981b772",
"sha256:b59d62e3cb2d68b2318b53b5d08443e6693f428ddc6a1d7b423a266f9774a4f0",
"sha256:b9072cfa473ff3b659179bd6a600b6d07259221029d2d8d0595a576958e8bf16",
"sha256:bf2fa57a752364586739c2dcff4c604e745cee603ee43b24faa0d1369f8e7a81",
"sha256:c29a31f0e8eb5e34503296be966a54c0fe5ab34d57f9594bc761ffc549fc4d39",
"sha256:c8ea8fd94e0ee879ca623258b751f9427b3f20da228e55f1b491fedbdeb57ab8",
"sha256:d4626e6f0af151d157c1c9a03bb0bd65b5661c745c6cccef212f28c7ce7fc07b",
"sha256:e140c339873479bbc114456760ed1a7a28062c3ca7c54575a2a3ecc661efdb0e"
],
"index": "pypi",
"version": "==0.3.4"
}
},
"develop": {}
}

179
README.md
View File

@ -1,67 +1,60 @@
# chkbit
chkbit is a tool that ensures the safety of your files by checking if their *data integrity remains intact over time*, especially during transfers and backups. It helps detect issues like disk damage, filesystem errors, and malware interference.
chkbit is a lightweight tool to check the data integrity of your files. It allows you to verify *that the data has not changed* since you put it there and that it is still the same when you move it somewhere else.
![gif of chkbit](https://raw.githubusercontent.com/wiki/laktak/chkbit/readme/chkbit.gif "chkbit")
cross-platform support for [Linux, macOS and Windows](https://github.com/laktak/chkbit-py/releases)!
- [How it works](#how-it-works)
- [Use it](#use-it)
- [On your Disk](#on-your-disk)
- [On your Backup](#on-your-backup)
- [For Data in the Cloud](#for-data-in-the-cloud)
- [Installation](#installation)
- [Usage](#usage)
- [Repair](#repair)
- [Ignore files](#ignore-files)
- [chkbit as a Go module](#chkbit-as-a-go-module)
- [FAQ](#faq)
- [Should I run `chkbit` on my whole drive?](#should-i-run-chkbit-on-my-whole-drive)
- [Why is chkbit placing the index in `.chkbit` files (vs a database)?](#why-is-chkbit-placing-the-index-in-chkbit-files-vs-a-database)
- [How does chkbit work?](#how-does-chkbit-work)
- [I wish to use a stronger hash algorithm](#i-wish-to-use-a-stronger-hash-algorithm)
- [How can I delete the index files?](#how-can-i-delete-the-index-files)
- [Can I test if chkbit is working correctly?](#can-i-test-if-chkbit-is-working-correctly)
- [Development](#development)
## Use it
## How it works
### On your Disk
- **On your Disk**: chkbit starts by creating checksums for each folder on your main disk. It alerts you to potential problems such as damage on the disk, filesystem errors, and malware attacks that could alter your files.
chkbit starts with your primary disk. It creates checksums for each folder that will follow your data onto your backups.
- **On your Backup**: Regardless of your storage media, chkbit stores indexes in hidden files alongside your data during backups. When you run chkbit on your backup, it verifies that every byte was accurately transferred. If issues like [bitrot/data degradation](https://en.wikipedia.org/wiki/Data_degradation) occur, chkbit helps identify damaged files, alerting you to replace them with other backups.
Here it alerts you to
- damage on the disk
- damage caused by filesystem errors
- damage caused by malware (when it encrypts your files)
- **For Data in the Cloud**: chkbit is useful for cloud-stored data, alerting you to any changes introduced by cloud providers like video re-encoding or image compression. It ensures your files remain unchanged in the cloud.
The built in checksums from your filesystems only cover some of these cases.
Remember to always maintain multiple backups for comprehensive data protection.
### On your Backup
No matter what storage media or filesystem you use, chkbit stores its indexes in hidden files that are backed up together with your data.
When you run chkbit on your backup media you can verify that every byte was correctly transferred.
If your backup media fails or experiences [bitrot/data degradation](https://en.wikipedia.org/wiki/Data_degradation), chkbit allows you to discover what files were damaged and need to be replaced by other backups. You should always keep multiple backups :)
### For Data in the Cloud
Some cloud providers re-encode your videos or compress your images to save space. chkbit will alert you of any changes.
## Installation
Download: You can download a release directly from [github releases](https://github.com/laktak/chkbit-py/releases).
### Binary releases
You can download the official chkbit binaries from the releases page and place it in your `PATH`.
- https://github.com/laktak/chkbit/releases
### Homebrew (macOS and Linux)
For macOS and Linux it can also be installed via [Homebrew](https://formulae.brew.sh/formula/chkbit):
```shell
brew install chkbit
```
### Build from Source
Building from the source requires Go.
- Either install it directly
```shell
go install github.com/laktak/chkbit/v5/cmd/chkbit@latest
```
- or clone and build
```shell
git clone https://github.com/laktak/chkbit
chkbit/scripts/build
# binary:
ls -l chkbit/chkbit
```
If you OS/platform is not yet supported you can also use either [pipx](https://pipx.pypa.io/latest/installation/) or pip:
- `pipx install chkbit`
- `pip install --user chkbit`
## Usage
@ -76,43 +69,35 @@ chkbit will
Run `chkbit PATH` to verify only.
```
Usage: chkbit [<paths> ...] [flags]
usage: chkbit [-h] [-u] [--show-ignored-only] [--algo ALGO] [-f] [-s] [-l FILE] [--log-verbose] [--index-name NAME] [--ignore-name NAME] [-w N] [--plain] [-q] [-v] [PATH ...]
Arguments:
[<paths> ...] directories to check
Checks the data integrity of your files. See https://github.com/laktak/chkbit-py
Flags:
-h, --help Show context-sensitive help.
-H, --tips Show tips.
-c, --check check mode: chkbit will verify files in readonly mode (default mode)
-u, --update update mode: add and update indices
-a, --add-only add mode: only add new files, do not check existing (quicker)
-i, --show-ignored-only show-ignored mode: only show ignored files
-m, --show-missing show missing files/directories
--force force update of damaged items (advanced usage only)
-S, --skip-symlinks do not follow symlinks
-R, --no-recurse do not recurse into subdirectories
-D, --no-dir-in-index do not track directories in the index
-l, --log-file=STRING write to a logfile if specified
--log-verbose verbose logging
--algo="blake3" hash algorithm: md5, sha512, blake3 (default: blake3)
--index-name=".chkbit" filename where chkbit stores its hashes, needs to start with '.' (default: .chkbit)
--ignore-name=".chkbitignore" filename that chkbit reads its ignore list from, needs to start with '.' (default: .chkbitignore)
-w, --workers=5 number of workers to use (default: 5)
--plain show plain status instead of being fancy
-q, --quiet quiet, don't show progress/information
-v, --verbose verbose output
-V, --version show version information
```
positional arguments:
PATH directories to check
```
$ chkbit -H
options:
-h, --help show this help message and exit
-u, --update update indices (without this chkbit will verify files in readonly mode)
--show-ignored-only only show ignored files
--algo ALGO hash algorithm: md5, sha512, blake3 (default: blake3)
-f, --force force update of damaged items
-s, --skip-symlinks do not follow symlinks
-l FILE, --log-file FILE
write to a logfile if specified
--log-verbose verbose logging
--index-name NAME filename where chkbit stores its hashes, needs to start with '.' (default: .chkbit)
--ignore-name NAME filename that chkbit reads its ignore list from, needs to start with '.' (default: .chkbitignore)
-w N, --workers N number of workers to use (default: 5)
--plain show plain status instead of being fancy
-q, --quiet quiet, don't show progress/information
-v, --verbose verbose output
.chkbitignore rules:
each line should contain exactly one name
you may use Unix shell-style wildcards (see README)
lines starting with '#' are skipped
lines starting with '/' are only applied to the current directory
lines starting with `#` are skipped
lines starting with `/` are only applied to the current directory
Status codes:
DMG: error, data damage detected
@ -121,30 +106,29 @@ Status codes:
new: new file
upd: file updated
ok : check ok
del: file/directory removed
ign: ignored (see .chkbitignore)
EXC: exception/panic
EXC: internal exception
```
chkbit is set to use only 5 workers by default so it will not slow your system to a crawl. You can specify a higher number to make it a lot faster if the IO throughput can also keep up.
## Repair
chkbit is designed to detect "damage". To repair your files you need to think ahead:
chkbit cannot repair damage, its job is simply to detect it.
- backup regularly
- run chkbit *before* each backup
- run chkbit *after* a backup on the backup media (readonly)
- in case of any issues, *restore* from a checked backup medium.
You should
- backup regularly.
- run chkbit *before* each backup.
- check for damage on the backup media.
- in case of damage *restore* from a checked backup.
## Ignore files
Add a `.chkbitignore` file containing the names of the files/directories you wish to ignore
- each line should contain exactly one name
- you may use Unix shell-style wildcards
- you may use [Unix shell-style wildcards](https://docs.python.org/3/library/fnmatch.html)
- `*` matches everything
- `?` matches any single character
- `[seq]` matches any character in seq
@ -154,18 +138,6 @@ Add a `.chkbitignore` file containing the names of the files/directories you wis
- you can use `path/sub/name` to ignore a file/directory in a sub path
- hidden files (starting with a `.`) are ignored by default
## chkbit as a Go module
chkbit is can also be used in other Go programs.
```
go get github.com/laktak/chkbit/v5
```
For more information see the documentation on [pkg.go.dev](https://pkg.go.dev/github.com/laktak/chkbit/v5).
## FAQ
### Should I run `chkbit` on my whole drive?
@ -263,5 +235,26 @@ error: detected 1 file with damage!
`DMG` indicates damage.
## Development
With pipenv (install with `pipx install pipenv`):
```
# setup
pipenv install
# run chkbit
pipenv run python3 -m chkbit_cli.main
```
To build a source distribution package from pyproject.toml
```
pipx run build
```
You can then install your own package with
```
pipx install dist/chkbit-*.tar.gz
```
The binaries are created using pyinstaller via Github actions.

7
chkbit/__init__.py Normal file
View File

@ -0,0 +1,7 @@
from chkbit.status import Status
from chkbit.ignore import Ignore
from chkbit.input_item import InputItem
from chkbit.context import Context
from chkbit.hashfile import hashfile, hashtext
from chkbit.index import Index
from chkbit.index_thread import IndexThread

58
chkbit/context.py Normal file
View File

@ -0,0 +1,58 @@
from __future__ import annotations
import queue
import chkbit
from typing import Optional
from chkbit import InputItem
class Context:
def __init__(
self,
*,
num_workers=5,
force=False,
update=False,
show_ignored_only=False,
hash_algo="blake3",
skip_symlinks=False,
index_filename=".chkbit",
ignore_filename=".chkbitignore",
):
self.num_workers = num_workers
self.force = force
self.update = update
self.show_ignored_only = show_ignored_only
self.hash_algo = hash_algo
self.skip_symlinks = skip_symlinks
self.index_filename = index_filename
self.ignore_filename = ignore_filename
if not index_filename.startswith("."):
raise Exception("The index filename must start with a dot!")
if not ignore_filename.startswith("."):
raise Exception("The ignore filename must start with a dot!")
# the input queue is used to distribute the work
# to the index threads
self.input_queue = queue.Queue()
self.result_queue = queue.Queue()
self.hit_queue = queue.Queue()
if hash_algo not in ["md5", "sha512", "blake3"]:
raise Exception(f"{hash_algo} is unknown.")
def log(self, stat: chkbit.Status, path: str):
self.result_queue.put((0, stat, path))
def hit(self, *, cfiles: int = 0, cbytes: int = 0):
self.result_queue.put((1, cfiles, cbytes))
def add_input(self, path: str, *, ignore: Optional[chkbit.Ignore] = None):
self.input_queue.put(InputItem(path, ignore=ignore))
def end_input(self):
self.input_queue.put(None)
def is_chkbit_file(self, name):
return name in [self.index_filename, self.ignore_filename]

34
chkbit/hashfile.py Normal file
View File

@ -0,0 +1,34 @@
import hashlib
from blake3 import blake3
from typing import Callable
BLOCKSIZE = 2**10 * 128 # kb
def hashfile(path: str, hash_algo: str, *, hit: Callable[[str], None]):
if hash_algo == "md5":
h = hashlib.md5()
elif hash_algo == "sha512":
h = hashlib.sha512()
elif hash_algo == "blake3":
h = blake3()
else:
raise Exception(f"algo '{hash_algo}' is unknown.")
with open(path, "rb") as f:
while True:
buf = f.read(BLOCKSIZE)
l = len(buf)
if l <= 0:
break
h.update(buf)
if hit:
hit(l)
return h.hexdigest()
def hashtext(text: str):
md5 = hashlib.md5()
md5.update(text.encode("utf-8"))
return md5.hexdigest()

56
chkbit/ignore.py Normal file
View File

@ -0,0 +1,56 @@
from __future__ import annotations
import fnmatch
import os
import sys
import chkbit
from enum import Enum
from typing import Optional
class Ignore:
def __init__(
self,
context: chkbit.Context,
path: str,
*,
parent_ignore: Optional[chkbit.Ignore],
):
self.parent_ignore = parent_ignore
self.context = context
self.path = path
self.name = os.path.basename(path) + "/"
self.ignore = []
self.load_ignore()
@property
def ignore_filepath(self):
return os.path.join(self.path, self.context.ignore_filename)
def load_ignore(self):
if not os.path.exists(self.ignore_filepath):
return
with open(self.ignore_filepath, "r", encoding="utf-8") as f:
text = f.read()
self.ignore = list(
filter(
lambda x: x and x[0] != "#" and len(x.strip()) > 0, text.splitlines()
)
)
def should_ignore(self, name: str, *, fullname: str = None):
for ignore in self.ignore:
if ignore.startswith("/"):
if fullname:
continue
else:
ignore = ignore[1:]
if fnmatch.fnmatch(name, ignore):
return True
if fullname and fnmatch.fnmatch(fullname, ignore):
return True
if self.parent_ignore:
return self.parent_ignore.should_ignore(
fullname or name, fullname=self.name + (fullname or name)
)
return False

163
chkbit/index.py Normal file
View File

@ -0,0 +1,163 @@
from __future__ import annotations
import fnmatch
import os
import subprocess
import sys
import json
import chkbit
from chkbit import hashfile, hashtext, Status
from typing import Optional
VERSION = 2 # index version
class Index:
def __init__(
self,
context: chkbit.Context,
path: str,
files: list[str],
*,
readonly: bool = False,
):
self.context = context
self.path = path
self.files = files
self.old = {}
self.new = {}
self.updates = []
self.modified = None
self.readonly = readonly
@property
def index_filepath(self):
return os.path.join(self.path, self.context.index_filename)
def _setmod(self, value=True):
self.modified = value
def _log(self, stat: Status, name: str):
self.context.log(stat, os.path.join(self.path, name))
# calc new hashes for this index
def calc_hashes(self, *, ignore: Optional[chkbit.Ignore] = None):
for name in self.files:
if ignore and ignore.should_ignore(name):
self._log(Status.IGNORE, name)
continue
a = self.context.hash_algo
# check previously used hash
if name in self.old:
old = self.old[name]
if "md5" in old:
# legacy structure
a = "md5"
self.old[name] = {"mod": old["mod"], "a": a, "h": old["md5"]}
elif "a" in old:
a = old["a"]
self.new[name] = self._calc_file(name, a)
else:
if self.readonly:
self.new[name] = self._list_file(name, a)
else:
self.new[name] = self._calc_file(name, a)
def show_ignored_only(self, ignore: chkbit.Ignore):
for name in self.files:
if ignore.should_ignore(name):
self._log(Status.IGNORE, name)
# check/update the index (old vs new)
def check_fix(self, force: bool):
for name in self.new.keys():
if not name in self.old:
self._log(Status.NEW, name)
self._setmod()
continue
a = self.old[name]
b = self.new[name]
amod = a["mod"]
bmod = b["mod"]
if a["h"] == b["h"]:
# ok, if the content stays the same the mod time does not matter
self._log(Status.OK, name)
if amod != bmod:
self._setmod()
continue
if amod == bmod:
# damage detected
self._log(Status.ERR_DMG, name)
# replace with old so we don't loose the information on the next run
# unless force is set
if not force:
self.new[name] = a
else:
self._setmod()
elif amod < bmod:
# ok, the file was updated
self._log(Status.UPDATE, name)
self._setmod()
elif amod > bmod:
self._log(Status.WARN_OLD, name)
self._setmod()
def _list_file(self, name: str, a: str):
# produce a dummy entry for new files when the index is not updated
return {
"mod": None,
"a": a,
"h": None,
}
def _calc_file(self, name: str, a: str):
path = os.path.join(self.path, name)
info = os.stat(path)
mtime = int(info.st_mtime * 1000)
res = {
"mod": mtime,
"a": a,
"h": hashfile(path, a, hit=lambda l: self.context.hit(cbytes=l)),
}
self.context.hit(cfiles=1)
return res
def save(self):
if self.modified:
if self.readonly:
raise Exception("Error trying to save a readonly index.")
data = {"v": VERSION, "idx": self.new}
text = json.dumps(self.new, separators=(",", ":"))
data["idx_hash"] = hashtext(text)
with open(self.index_filepath, "w", encoding="utf-8") as f:
json.dump(data, f, separators=(",", ":"))
self._setmod(False)
return True
else:
return False
def load(self):
if not os.path.exists(self.index_filepath):
return False
self._setmod(False)
with open(self.index_filepath, "r", encoding="utf-8") as f:
data = json.load(f)
if "data" in data:
# extract old format from js version
for item in json.loads(data["data"]):
self.old[item["name"]] = {
"mod": item["mod"],
"a": "md5",
"h": item["md5"],
}
elif "idx" in data:
self.old = data["idx"]
text = json.dumps(self.old, separators=(",", ":"))
if data.get("idx_hash") != hashtext(text):
self._setmod()
self._log(Status.ERR_IDX, self.index_filepath)
return True

81
chkbit/index_thread.py Normal file
View File

@ -0,0 +1,81 @@
from __future__ import annotations
import os
import sys
import time
import threading
import chkbit
from chkbit import Index, Status, Ignore
class IndexThread:
def __init__(self, thread_no: int, context: chkbit.Context):
self.thread_no = thread_no
self.update = context.update
self.context = context
self.input_queue = context.input_queue
self.t = threading.Thread(target=self._run)
self.t.daemon = True
self.t.start()
def _process_root(self, iitem: chkbit.InputItem):
files = []
dirs = []
# load files and subdirs
for name in os.listdir(path=iitem.path):
path = os.path.join(iitem.path, name)
if name[0] == ".":
if self.context.show_ignored_only and not self.context.is_chkbit_file(
name
):
self.context.log(Status.IGNORE, path)
continue
if os.path.isdir(path):
if self.context.skip_symlinks and os.path.islink(path):
pass
else:
dirs.append(name)
elif os.path.isfile(path):
files.append(name)
# load index
index = Index(self.context, iitem.path, files, readonly=not self.update)
index.load()
# load ignore
ignore = Ignore(self.context, iitem.path, parent_ignore=iitem.ignore)
if self.context.show_ignored_only:
index.show_ignored_only(ignore)
else:
# calc the new hashes
index.calc_hashes(ignore=ignore)
# compare
index.check_fix(self.context.force)
# save if update is set
if self.update:
if index.save():
self.context.log(Status.UPDATE_INDEX, "")
# process subdirs
for name in dirs:
if not ignore.should_ignore(name):
self.context.add_input(os.path.join(iitem.path, name), ignore=ignore)
else:
self.context.log(Status.IGNORE, name + "/")
def _run(self):
while True:
iitem = self.input_queue.get()
if iitem is None:
break
try:
self._process_root(iitem)
except Exception as e:
self.context.log(Status.INTERNALEXCEPTION, f"{iitem.path}: {e}")
self.input_queue.task_done()
def join(self):
self.t.join()

9
chkbit/input_item.py Normal file
View File

@ -0,0 +1,9 @@
from __future__ import annotations
from typing import Optional
import chkbit
class InputItem:
def __init__(self, path: str, *, ignore: Optional[chkbit.Ignore] = None):
self.path = path
self.ignore = ignore

28
chkbit/status.py Normal file
View File

@ -0,0 +1,28 @@
from __future__ import annotations
from enum import Enum
import logging
class Status(Enum):
ERR_DMG = "DMG"
ERR_IDX = "EIX"
WARN_OLD = "old"
NEW = "new"
UPDATE = "upd"
OK = "ok "
IGNORE = "ign"
INTERNALEXCEPTION = "EXC"
UPDATE_INDEX = "iup"
@staticmethod
def get_level(status: Status):
if status == Status.INTERNALEXCEPTION:
return logging.CRITICAL
elif status in [Status.ERR_DMG, Status.ERR_IDX]:
return logging.ERROR
if status == Status.WARN_OLD:
return logging.WARNING
elif status in [Status.NEW, Status.UPDATE, Status.OK, Status.IGNORE]:
return logging.INFO
else:
return logging.DEBUG

5
chkbit_cli/__init__.py Normal file
View File

@ -0,0 +1,5 @@
from chkbit_cli.cli import CLI
from chkbit_cli.progress import Progress
from chkbit_cli.sparklines import sparkify
from chkbit_cli.rate_calc import RateCalc
from chkbit_cli.main import Main, main

61
chkbit_cli/cli.py Normal file
View File

@ -0,0 +1,61 @@
import os
import sys
class CLI:
NO_COLOR = os.environ.get("NO_COLOR", "")
class style:
reset = "\033[0m"
bold = "\033[01m"
disable = "\033[02m"
underline = "\033[04m"
reverse = "\033[07m"
strikethrough = "\033[09m"
invisible = "\033[08m"
class esc:
up = "\033[A"
down = "\033[B"
right = "\033[C"
left = "\033[D"
@staticmethod
def clear_line(opt=0):
# 0=to end, 1=from start, 2=all
return "\033[" + str(opt) + "K"
@staticmethod
def write(*text):
for t in text:
sys.stdout.write(str(t))
sys.stdout.flush()
@staticmethod
def printline(*text):
CLI.write(*text, CLI.esc.clear_line(), "\n")
# 4bit system colors
@staticmethod
def fg4(col):
# black=0,red=1,green=2,orange=3,blue=4,purple=5,cyan=6,lightgrey=7
# darkgrey=8,lightred=9,lightgreen=10,yellow=11,lightblue=12,pink=13,lightcyan=14
if CLI.NO_COLOR:
return ""
else:
return f"\033[{(30+col) if col<8 else (90-8+col)}m"
# 8bit xterm colors
@staticmethod
def fg8(col):
if CLI.NO_COLOR:
return ""
else:
return f"\033[38;5;{col}m"
@staticmethod
def bg8(col):
if CLI.NO_COLOR:
return ""
else:
return f"\033[48;5;{col}m"

385
chkbit_cli/main.py Normal file
View File

@ -0,0 +1,385 @@
import argparse
import logging
import os
import queue
import shutil
import sys
import threading
import time
from datetime import datetime, timedelta
from chkbit import Context, Status, IndexThread
from chkbit_cli import CLI, Progress, RateCalc, sparkify
EPILOG = """
.chkbitignore rules:
each line should contain exactly one name
you may use Unix shell-style wildcards (see README)
lines starting with `#` are skipped
lines starting with `/` are only applied to the current directory
Status codes:
DMG: error, data damage detected
EIX: error, index damaged
old: warning, file replaced by an older version
new: new file
upd: file updated
ok : check ok
ign: ignored (see .chkbitignore)
EXC: internal exception
"""
UPDATE_INTERVAL = timedelta(milliseconds=700)
MB = 1024 * 1024
CLI_BG = CLI.bg8(240)
CLI_SEP = "|"
CLI_SEP_FG = CLI.fg8(235)
CLI_FG1 = CLI.fg8(255)
CLI_FG2 = CLI.fg8(228)
CLI_FG3 = CLI.fg8(202)
CLI_OK_FG = CLI.fg4(2)
CLI_ALERT_FG = CLI.fg4(1)
class Main:
def __init__(self):
self.stdscr = None
self.dmg_list = []
self.err_list = []
self.num_idx_upd = 0
self.num_new = 0
self.num_upd = 0
self.verbose = False
self.log = logging.getLogger("")
self.log_verbose = False
self.progress = Progress.Fancy
self.total = 0
self.term_width = shutil.get_terminal_size()[0]
max_stat = int((self.term_width - 70) / 2)
self.fps = RateCalc(timedelta(seconds=1), max_stat=max_stat)
self.bps = RateCalc(timedelta(seconds=1), max_stat=max_stat)
# disable
self.log.setLevel(logging.CRITICAL + 1)
def _log(self, stat: Status, path: str):
if stat == Status.UPDATE_INDEX:
self.num_idx_upd += 1
else:
if stat == Status.ERR_DMG:
self.total += 1
self.dmg_list.append(path)
elif stat == Status.INTERNALEXCEPTION:
self.err_list.append(path)
elif stat in [Status.OK, Status.UPDATE, Status.NEW]:
self.total += 1
if stat == Status.UPDATE:
self.num_upd += 1
elif stat == Status.NEW:
self.num_new += 1
lvl = Status.get_level(stat)
if self.log_verbose or not stat in [Status.OK, Status.IGNORE]:
self.log.log(lvl, f"{stat.value} {path}")
if self.verbose or not stat in [Status.OK, Status.IGNORE]:
CLI.printline(
CLI_ALERT_FG if lvl >= logging.WARNING else "",
stat.value,
" ",
path,
CLI.style.reset,
)
def _res_worker(self, context: Context):
last = datetime.now()
while True:
try:
item = self.result_queue.get(timeout=0.2)
now = datetime.now()
if not item:
if self.progress == Progress.Fancy:
CLI.printline("")
break
t, *p = item
if t == 0:
self._log(*p)
last = datetime.min
else:
self.fps.push(now, p[0])
self.bps.push(now, p[1])
self.result_queue.task_done()
except queue.Empty:
now = datetime.now()
pass
if last + UPDATE_INTERVAL < now:
last = now
if self.progress == Progress.Fancy:
stat_f = f"{self.fps.last} files/s"
stat_b = f"{int(self.bps.last/MB)} MB/s"
stat = f"[{'RW' if context.update else 'RO'}:{context.num_workers}] {self.total:>5} files $ {sparkify(self.fps.stats)} {stat_f:13} $ {sparkify(self.bps.stats)} {stat_b}"
stat = stat[: self.term_width - 1]
stat = stat.replace("$", CLI_SEP_FG + CLI_SEP + CLI_FG2, 1)
stat = stat.replace("$", CLI_SEP_FG + CLI_SEP + CLI_FG3, 1)
CLI.write(
CLI_BG,
CLI_FG1,
stat,
CLI.esc.clear_line(),
CLI.style.reset,
"\r",
)
elif self.progress == Progress.Plain:
print(self.total, end="\r")
def process(self, args):
if args.update and args.show_ignored_only:
print("Error: use either --update or --show-ignored-only!", file=sys.stderr)
return None
context = Context(
num_workers=args.workers,
force=args.force,
update=args.update,
show_ignored_only=args.show_ignored_only,
hash_algo=args.algo,
skip_symlinks=args.skip_symlinks,
index_filename=args.index_name,
ignore_filename=args.ignore_name,
)
self.result_queue = context.result_queue
# put the initial paths into the queue
for path in args.paths:
context.add_input(path)
# start indexing
workers = [IndexThread(i, context) for i in range(context.num_workers)]
# log the results from the workers
res_worker = threading.Thread(target=self._res_worker, args=(context,))
res_worker.daemon = True
res_worker.start()
# wait for work to finish
context.input_queue.join()
# signal workers to exit
for worker in workers:
context.end_input()
# signal res_worker to exit
self.result_queue.put(None)
for worker in workers:
worker.join()
res_worker.join()
return context
def print_result(self, context):
def cprint(col, text):
if self.progress == Progress.Fancy:
CLI.printline(col, text, CLI.style.reset)
else:
print(text)
def eprint(col, text):
if self.progress == Progress.Fancy:
CLI.write(col)
print(text, file=sys.stderr)
CLI.write(CLI.style.reset)
else:
print(text, file=sys.stderr)
iunit = lambda x, u: f"{x} {u}{'s' if x!=1 else ''}"
iunit2 = lambda x, u1, u2: f"{x} {u2 if x!=1 else u1}"
if self.progress != Progress.Quiet:
status = f"Processed {iunit(self.total, 'file')}{' in readonly mode' if not context.update else ''}."
cprint(CLI_OK_FG, status)
self.log.info(status)
if self.progress == Progress.Fancy and self.total > 0:
elapsed = datetime.now() - self.fps.start
elapsed_s = elapsed.total_seconds()
print(f"- {str(elapsed).split('.')[0]} elapsed")
print(
f"- {(self.fps.total+self.fps.current)/elapsed_s:.2f} files/second"
)
print(
f"- {(self.bps.total+self.bps.current)/MB/elapsed_s:.2f} MB/second"
)
if context.update:
if self.num_idx_upd:
cprint(
CLI_OK_FG,
f"- {iunit2(self.num_idx_upd, 'directory was', 'directories were')} updated\n"
+ f"- {iunit2(self.num_new, 'file hash was', 'file hashes were')} added\n"
+ f"- {iunit2(self.num_upd, 'file hash was', 'file hashes were')} updated",
)
elif self.num_new + self.num_upd > 0:
cprint(
CLI_ALERT_FG,
f"No changes were made (specify -u to update):\n"
+ f"- {iunit(self.num_new, 'file')} would have been added and\n"
+ f"- {iunit(self.num_upd, 'file')} would have been updated.",
)
if self.dmg_list:
eprint(CLI_ALERT_FG, "chkbit detected damage in these files:")
for err in self.dmg_list:
print(err, file=sys.stderr)
n = len(self.dmg_list)
status = f"error: detected {iunit(n, 'file')} with damage!"
self.log.error(status)
eprint(CLI_ALERT_FG, status)
if self.err_list:
status = "chkbit ran into errors"
self.log.error(status + "!")
eprint(CLI_ALERT_FG, status + ":")
for err in self.err_list:
print(err, file=sys.stderr)
if self.dmg_list or self.err_list:
sys.exit(1)
def run(self):
parser = argparse.ArgumentParser(
prog="chkbit",
description="Checks the data integrity of your files. See https://github.com/laktak/chkbit-py",
epilog=EPILOG,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument(
"paths", metavar="PATH", type=str, nargs="*", help="directories to check"
)
parser.add_argument(
"-u",
"--update",
action="store_true",
help="update indices (without this chkbit will verify files in readonly mode)",
)
parser.add_argument(
"--show-ignored-only", action="store_true", help="only show ignored files"
)
parser.add_argument(
"--algo",
type=str,
default="blake3",
help="hash algorithm: md5, sha512, blake3 (default: blake3)",
)
parser.add_argument(
"-f", "--force", action="store_true", help="force update of damaged items"
)
parser.add_argument(
"-s", "--skip-symlinks", action="store_true", help="do not follow symlinks"
)
parser.add_argument(
"-l",
"--log-file",
metavar="FILE",
type=str,
help="write to a logfile if specified",
)
parser.add_argument(
"--log-verbose", action="store_true", help="verbose logging"
)
parser.add_argument(
"--index-name",
metavar="NAME",
type=str,
default=".chkbit",
help="filename where chkbit stores its hashes, needs to start with '.' (default: .chkbit)",
)
parser.add_argument(
"--ignore-name",
metavar="NAME",
type=str,
default=".chkbitignore",
help="filename that chkbit reads its ignore list from, needs to start with '.' (default: .chkbitignore)",
)
parser.add_argument(
"-w",
"--workers",
metavar="N",
action="store",
type=int,
default=5,
help="number of workers to use (default: 5)",
)
parser.add_argument(
"--plain",
action="store_true",
help="show plain status instead of being fancy",
)
parser.add_argument(
"-q",
"--quiet",
action="store_true",
help="quiet, don't show progress/information",
)
parser.add_argument(
"-v", "--verbose", action="store_true", help="verbose output"
)
args = parser.parse_args()
self.verbose = args.verbose or args.show_ignored_only
if args.log_file:
self.log_verbose = args.log_verbose
self.log.setLevel(logging.INFO)
fh = logging.FileHandler(args.log_file)
fh.setFormatter(
logging.Formatter(
"%(asctime)s %(levelname).4s %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
)
self.log.addHandler(fh)
if args.quiet:
self.progress = Progress.Quiet
elif not sys.stdout.isatty():
self.progress = Progress.Summary
elif args.plain:
self.progress = Progress.Plain
if args.paths:
self.log.info(f"chkbit {', '.join(args.paths)}")
context = self.process(args)
if context and not context.show_ignored_only:
self.print_result(context)
else:
parser.print_help()
def main():
try:
Main().run()
except KeyboardInterrupt:
print("abort")
sys.exit(1)
except Exception as e:
print(e, file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":
main()

8
chkbit_cli/progress.py Normal file
View File

@ -0,0 +1,8 @@
from enum import Enum
class Progress(Enum):
Quiet = (0,)
Summary = (1,)
Plain = (2,)
Fancy = (3,)

28
chkbit_cli/rate_calc.py Normal file
View File

@ -0,0 +1,28 @@
from datetime import datetime, timedelta
class RateCalc:
def __init__(self, interval: timedelta, max_stat: int):
self.interval = interval
self.max_stat = max(max_stat, 10)
self.reset()
def reset(self):
self.start = datetime.now()
self.updated = self.start
self.total = 0
self.current = 0
self.stats = [0] * self.max_stat
@property
def last(self):
return self.stats[-1]
def push(self, ts: datetime, value: int):
while self.updated + self.interval < ts:
self.stats.append(self.current)
self.stats = self.stats[-self.max_stat :]
self.total += self.current
self.current = 0
self.updated += self.interval
self.current += value

71
chkbit_cli/sparklines.py Normal file
View File

@ -0,0 +1,71 @@
import math, os, re, string, sys
"""
Copyright (c) 2021, Brandon Whaley <redkrieg@gmail.com>, et al.
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
spark_chars = "▁▂▃▄▅▆▇█"
"""Eight unicode characters of (nearly) steadily increasing height."""
def sparkify(series, minimum=None, maximum=None, rows=1):
"""Converts <series> to a sparkline string.
Example:
>>> sparkify([ 0.5, 1.2, 3.5, 7.3, 8.0, 12.5, float("nan"), 15.0, 14.2, 11.8, 6.1,
... 1.9 ])
u'▁▁▂▄▅▇ ██▆▄▂'
>>> sparkify([1, 1, -2, 3, -5, 8, -13])
u'▆▆▅▆▄█▁'
Raises ValueError if input data cannot be converted to float.
Raises TypeError if series is not an iterable.
"""
series = [float(n) for n in series]
if all(not math.isfinite(n) for n in series):
return " " * len(series)
minimum = min(filter(math.isfinite, series)) if minimum is None else minimum
maximum = max(filter(math.isfinite, series)) if maximum is None else maximum
data_range = maximum - minimum
if data_range == 0.0:
# Graph a baseline if every input value is equal.
return "".join([spark_chars[0] if math.isfinite(i) else " " for i in series])
row_res = len(spark_chars)
resolution = row_res * rows
coefficient = (resolution - 1.0) / data_range
def clamp(n):
return min(max(n, minimum), maximum)
def spark_index(n):
"""An integer from 0 to (resolution-1) proportional to the data range"""
return int(round((clamp(n) - minimum) * coefficient))
output = []
for r in range(rows - 1, -1, -1):
row_out = []
row_min = row_res * r
row_max = row_min + row_res - 1
for n in series:
if not math.isfinite(n):
row_out.append(" ")
continue
i = spark_index(n)
if i < row_min:
row_out.append(" ")
elif i > row_max:
row_out.append(spark_chars[-1])
else:
row_out.append(spark_chars[i % row_res])
output.append("".join(row_out))
return os.linesep.join(output)

View File

@ -1,25 +0,0 @@
package main
var headerHelp = `Checks the data integrity of your files.
For help tips run "chkbit -H" or go to
https://github.com/laktak/chkbit
`
var helpTips = `
.chkbitignore rules:
each line should contain exactly one name
you may use Unix shell-style wildcards (see README)
lines starting with '#' are skipped
lines starting with '/' are only applied to the current directory
Status codes:
DMG: error, data damage detected
EIX: error, index damaged
old: warning, file replaced by an older version
new: new file
upd: file updated
ok : check ok
del: file/directory removed
ign: ignored (see .chkbitignore)
EXC: exception/panic
`

View File

@ -1,351 +0,0 @@
package main
import (
"fmt"
"io"
"log"
"os"
"strings"
"sync"
"time"
"github.com/alecthomas/kong"
"github.com/laktak/chkbit/v5"
"github.com/laktak/chkbit/v5/cmd/chkbit/util"
"github.com/laktak/lterm"
)
type Progress int
const (
Quiet Progress = iota
Summary
Plain
Fancy
)
const (
updateInterval = time.Millisecond * 700
sizeMB int64 = 1024 * 1024
)
var appVersion = "vdev"
var (
termBG = lterm.Bg8(240)
termSep = "|"
termSepFG = lterm.Fg8(235)
termFG1 = lterm.Fg8(255)
termFG2 = lterm.Fg8(228)
termFG3 = lterm.Fg8(202)
termOKFG = lterm.Fg4(2)
termAlertFG = lterm.Fg4(1)
)
var cli struct {
Paths []string `arg:"" optional:"" name:"paths" help:"directories to check"`
Tips bool `short:"H" help:"Show tips."`
Check bool `short:"c" help:"check mode: chkbit will verify files in readonly mode (default mode)"`
Update bool `short:"u" help:"update mode: add and update indices"`
AddOnly bool `short:"a" help:"add mode: only add new files, do not check existing (quicker)"`
ShowIgnoredOnly bool `short:"i" help:"show-ignored mode: only show ignored files"`
ShowMissing bool `short:"m" help:"show missing files/directories"`
Force bool `help:"force update of damaged items (advanced usage only)"`
SkipSymlinks bool `short:"S" help:"do not follow symlinks"`
NoRecurse bool `short:"R" help:"do not recurse into subdirectories"`
NoDirInIndex bool `short:"D" help:"do not track directories in the index"`
LogFile string `short:"l" help:"write to a logfile if specified"`
LogVerbose bool `help:"verbose logging"`
Algo string `default:"blake3" help:"hash algorithm: md5, sha512, blake3 (default: blake3)"`
IndexName string `default:".chkbit" help:"filename where chkbit stores its hashes, needs to start with '.' (default: .chkbit)"`
IgnoreName string `default:".chkbitignore" help:"filename that chkbit reads its ignore list from, needs to start with '.' (default: .chkbitignore)"`
Workers int `short:"w" default:"5" help:"number of workers to use (default: 5)"`
Plain bool `help:"show plain status instead of being fancy"`
Quiet bool `short:"q" help:"quiet, don't show progress/information"`
Verbose bool `short:"v" help:"verbose output"`
Version bool `short:"V" help:"show version information"`
}
type Main struct {
context *chkbit.Context
dmgList []string
errList []string
verbose bool
logger *log.Logger
logVerbose bool
progress Progress
termWidth int
fps *util.RateCalc
bps *util.RateCalc
}
func (m *Main) log(text string) {
m.logger.Println(time.Now().UTC().Format("2006-01-02 15:04:05"), text)
}
func (m *Main) logStatus(stat chkbit.Status, message string) bool {
if stat == chkbit.STATUS_UPDATE_INDEX {
return false
}
if stat == chkbit.STATUS_ERR_DMG {
m.dmgList = append(m.dmgList, message)
} else if stat == chkbit.STATUS_PANIC {
m.errList = append(m.errList, message)
}
if m.logVerbose || !stat.IsVerbose() {
m.log(stat.String() + " " + message)
}
if m.verbose || !stat.IsVerbose() {
col := ""
if stat.IsErrorOrWarning() {
col = termAlertFG
}
lterm.Printline(col, stat.String(), " ", message, lterm.Reset)
return true
}
return false
}
func (m *Main) showStatus() {
last := time.Now().Add(-updateInterval)
stat := ""
for {
select {
case item := <-m.context.LogQueue:
if item == nil {
if m.progress == Fancy {
lterm.Printline("")
}
return
}
if m.logStatus(item.Stat, item.Message) {
if m.progress == Fancy {
lterm.Write(termBG, termFG1, stat, lterm.ClearLine(0), lterm.Reset, "\r")
} else {
fmt.Print(m.context.NumTotal, "\r")
}
}
case perf := <-m.context.PerfQueue:
now := time.Now()
m.fps.Push(now, perf.NumFiles)
m.bps.Push(now, perf.NumBytes)
if last.Add(updateInterval).Before(now) {
last = now
if m.progress == Fancy {
statF := fmt.Sprintf("%d files/s", m.fps.Last())
statB := fmt.Sprintf("%d MB/s", m.bps.Last()/sizeMB)
stat = "RW"
if !m.context.UpdateIndex {
stat = "RO"
}
stat = fmt.Sprintf("[%s:%d] %5d files $ %s %-13s $ %s %-13s",
stat, m.context.NumWorkers, m.context.NumTotal,
util.Sparkline(m.fps.Stats), statF,
util.Sparkline(m.bps.Stats), statB)
stat = util.LeftTruncate(stat, m.termWidth-1)
stat = strings.Replace(stat, "$", termSepFG+termSep+termFG2, 1)
stat = strings.Replace(stat, "$", termSepFG+termSep+termFG3, 1)
lterm.Write(termBG, termFG1, stat, lterm.ClearLine(0), lterm.Reset, "\r")
} else if m.progress == Plain {
fmt.Print(m.context.NumTotal, "\r")
}
}
}
}
}
func (m *Main) process() bool {
// verify mode
var b01 = map[bool]int8{false: 0, true: 1}
if b01[cli.Check]+b01[cli.Update]+b01[cli.AddOnly]+b01[cli.ShowIgnoredOnly] > 1 {
fmt.Println("Error: can only run one mode at a time!")
os.Exit(1)
}
var err error
m.context, err = chkbit.NewContext(cli.Workers, cli.Algo, cli.IndexName, cli.IgnoreName)
if err != nil {
fmt.Println(err)
return false
}
m.context.ForceUpdateDmg = cli.Force
m.context.UpdateIndex = cli.Update || cli.AddOnly
m.context.AddOnly = cli.AddOnly
m.context.ShowIgnoredOnly = cli.ShowIgnoredOnly
m.context.ShowMissing = cli.ShowMissing
m.context.SkipSymlinks = cli.SkipSymlinks
m.context.SkipSubdirectories = cli.NoRecurse
m.context.TrackDirectories = !cli.NoDirInIndex
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
m.showStatus()
}()
m.context.Start(cli.Paths)
wg.Wait()
return true
}
func (m *Main) printResult() {
cprint := func(col, text string) {
if m.progress != Quiet {
if m.progress == Fancy {
lterm.Printline(col, text, lterm.Reset)
} else {
fmt.Println(text)
}
}
}
eprint := func(col, text string) {
if m.progress == Fancy {
lterm.Write(col)
fmt.Fprintln(os.Stderr, text)
lterm.Write(lterm.Reset)
} else {
fmt.Fprintln(os.Stderr, text)
}
}
if m.progress != Quiet {
mode := ""
if !m.context.UpdateIndex {
mode = " in readonly mode"
}
status := fmt.Sprintf("Processed %s%s.", util.LangNum1MutateSuffix(m.context.NumTotal, "file"), mode)
cprint(termOKFG, status)
m.log(status)
if m.progress == Fancy && m.context.NumTotal > 0 {
elapsed := time.Since(m.fps.Start)
elapsedS := elapsed.Seconds()
fmt.Println("-", elapsed.Truncate(time.Second), "elapsed")
fmt.Printf("- %.2f files/second\n", (float64(m.fps.Total)+float64(m.fps.Current))/elapsedS)
fmt.Printf("- %.2f MB/second\n", (float64(m.bps.Total)+float64(m.bps.Current))/float64(sizeMB)/elapsedS)
}
del := ""
if m.context.UpdateIndex {
if m.context.NumIdxUpd > 0 {
if m.context.NumDel > 0 {
del = fmt.Sprintf("\n- %s been removed", util.LangNum1Choice(m.context.NumDel, "file/directory has", "files/directories have"))
}
cprint(termOKFG, fmt.Sprintf("- %s updated\n- %s added\n- %s updated%s",
util.LangNum1Choice(m.context.NumIdxUpd, "directory was", "directories were"),
util.LangNum1Choice(m.context.NumNew, "file hash was", "file hashes were"),
util.LangNum1Choice(m.context.NumUpd, "file hash was", "file hashes were"),
del))
}
} else if m.context.NumNew+m.context.NumUpd+m.context.NumDel > 0 {
if m.context.NumDel > 0 {
del = fmt.Sprintf("\n- %s would have been removed", util.LangNum1Choice(m.context.NumDel, "file/directory", "files/directories"))
}
cprint(termAlertFG, fmt.Sprintf("No changes were made (specify -u to update):\n- %s would have been added\n- %s would have been updated%s",
util.LangNum1MutateSuffix(m.context.NumNew, "file"),
util.LangNum1MutateSuffix(m.context.NumUpd, "file"),
del))
}
}
if len(m.dmgList) > 0 {
eprint(termAlertFG, "chkbit detected damage in these files:")
for _, err := range m.dmgList {
fmt.Fprintln(os.Stderr, err)
}
n := len(m.dmgList)
status := fmt.Sprintf("error: detected %s with damage!", util.LangNum1MutateSuffix(n, "file"))
m.log(status)
eprint(termAlertFG, status)
}
if len(m.errList) > 0 {
status := "chkbit ran into errors"
m.log(status + "!")
eprint(termAlertFG, status+":")
for _, err := range m.errList {
fmt.Fprintln(os.Stderr, err)
}
}
if len(m.dmgList) > 0 || len(m.errList) > 0 {
os.Exit(1)
}
}
func (m *Main) run() {
if len(os.Args) < 2 {
os.Args = append(os.Args, "--help")
}
kong.Parse(&cli,
kong.Name("chkbit"),
kong.Description(""),
kong.UsageOnError(),
)
if cli.Tips {
fmt.Println(helpTips)
os.Exit(0)
}
if cli.Version {
fmt.Println("github.com/laktak/chkbit")
fmt.Println(appVersion)
return
}
m.verbose = cli.Verbose || cli.ShowIgnoredOnly
if cli.LogFile != "" {
m.logVerbose = cli.LogVerbose
f, err := os.OpenFile(cli.LogFile, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
if err != nil {
fmt.Println(err)
return
}
defer f.Close()
m.logger = log.New(f, "", 0)
}
if cli.Quiet {
m.progress = Quiet
} else if fileInfo, _ := os.Stdout.Stat(); (fileInfo.Mode() & os.ModeCharDevice) == 0 {
m.progress = Summary
} else if cli.Plain {
m.progress = Plain
} else {
m.progress = Fancy
}
if len(cli.Paths) > 0 {
m.log("chkbit " + strings.Join(cli.Paths, ", "))
if m.process() && !m.context.ShowIgnoredOnly {
m.printResult()
}
} else {
fmt.Println("specify a path to check, see -h")
}
}
func main() {
defer func() {
if r := recover(); r != nil {
fmt.Println(r)
os.Exit(1)
}
}()
termWidth := lterm.GetWidth()
m := &Main{
logger: log.New(io.Discard, "", 0),
termWidth: termWidth,
fps: util.NewRateCalc(time.Second, (termWidth-70)/2),
bps: util.NewRateCalc(time.Second, (termWidth-70)/2),
}
m.run()
}

View File

@ -1,35 +0,0 @@
package util
import (
"math"
)
func Minimum(series []int64) int64 {
var min int64 = math.MaxInt64
for _, value := range series {
if value < min {
min = value
}
}
return min
}
func Maximum(series []int64) int64 {
var max int64 = math.MinInt64
for _, value := range series {
if value > max {
max = value
}
}
return max
}
func Clamp(min int64, max int64, n int64) int64 {
if n < min {
return min
}
if n > max {
return max
}
return n
}

View File

@ -1,19 +0,0 @@
package util
import "fmt"
func LangNum1MutateSuffix(num int, u string) string {
s := ""
if num != 1 {
s = "s"
}
return fmt.Sprintf("%d %s%s", num, u, s)
}
func LangNum1Choice(num int, u1, u2 string) string {
u := u1
if num != 1 {
u = u2
}
return fmt.Sprintf("%d %s", num, u)
}

View File

@ -1,52 +0,0 @@
package util
import (
"time"
)
type RateCalc struct {
Interval time.Duration
MaxStat int
Start time.Time
Updated time.Time
Total int64
Current int64
Stats []int64
}
func NewRateCalc(interval time.Duration, maxStat int) *RateCalc {
if maxStat < 10 {
maxStat = 10
}
rc := &RateCalc{
Interval: interval,
MaxStat: maxStat,
}
rc.Reset()
return rc
}
func (rc *RateCalc) Reset() {
rc.Start = time.Now()
rc.Updated = rc.Start
rc.Total = 0
rc.Current = 0
rc.Stats = make([]int64, rc.MaxStat)
}
func (rc *RateCalc) Last() int64 {
return rc.Stats[len(rc.Stats)-1]
}
func (rc *RateCalc) Push(ts time.Time, value int64) {
for rc.Updated.Add(rc.Interval).Before(ts) {
rc.Stats = append(rc.Stats, rc.Current)
if len(rc.Stats) > rc.MaxStat {
rc.Stats = rc.Stats[len(rc.Stats)-rc.MaxStat:]
}
rc.Total += rc.Current
rc.Current = 0
rc.Updated = rc.Updated.Add(rc.Interval)
}
rc.Current += value
}

View File

@ -1,32 +0,0 @@
package util
import (
"math"
)
var sparkChars = []rune{'▁', '▂', '▃', '▄', '▅', '▆', '▇', '█'}
func Sparkline(series []int64) string {
out := make([]rune, len(series))
min := Minimum(series)
max := Maximum(series)
dataRange := max - min
if dataRange == 0 {
for i := range series {
out[i] = sparkChars[0]
}
} else {
step := float64(len(sparkChars)-1) / float64(dataRange)
for i, n := range series {
idx := int(math.Round(float64(Clamp(min, max, n)-min) * step))
if idx < 0 {
out[i] = ' '
} else if idx > len(sparkChars) {
out[i] = sparkChars[len(sparkChars)-1]
} else {
out[i] = sparkChars[idx]
}
}
}
return string(out)
}

View File

@ -1,13 +0,0 @@
package util
import (
"testing"
)
func TestSpark(t *testing.T) {
expected := "▁▁▂▄▅▇██▆▄▂"
actual := Sparkline([]int64{5, 12, 35, 73, 80, 125, 150, 142, 118, 61, 19})
if expected != actual {
t.Error("expected:", expected, "actual:", actual)
}
}

View File

@ -1,11 +0,0 @@
package util
func LeftTruncate(s string, nMax int) string {
for i := range s {
nMax--
if nMax < 0 {
return s[:i]
}
}
return s
}

View File

@ -1,13 +0,0 @@
package util
import (
"testing"
)
func TestTrunc(t *testing.T) {
expected := "ab©def"
actual := LeftTruncate(expected+"ghijk", 6)
if expected != actual {
t.Error("expected:", expected, "actual:", actual)
}
}

View File

@ -1,195 +0,0 @@
package chkbit
import (
"errors"
"os"
"path/filepath"
"sync"
)
type Context struct {
NumWorkers int
UpdateIndex bool
AddOnly bool
ShowIgnoredOnly bool
ShowMissing bool
ForceUpdateDmg bool
HashAlgo string
TrackDirectories bool
SkipSymlinks bool
SkipSubdirectories bool
IndexFilename string
IgnoreFilename string
WorkQueue chan *WorkItem
LogQueue chan *LogEvent
PerfQueue chan *PerfEvent
wg sync.WaitGroup
mutex sync.Mutex
NumTotal int
NumIdxUpd int
NumNew int
NumUpd int
NumDel int
}
func NewContext(numWorkers int, hashAlgo string, indexFilename string, ignoreFilename string) (*Context, error) {
if indexFilename[0] != '.' {
return nil, errors.New("The index filename must start with a dot!")
}
if ignoreFilename[0] != '.' {
return nil, errors.New("The ignore filename must start with a dot!")
}
if hashAlgo != "md5" && hashAlgo != "sha512" && hashAlgo != "blake3" {
return nil, errors.New(hashAlgo + " is unknown.")
}
return &Context{
NumWorkers: numWorkers,
HashAlgo: hashAlgo,
IndexFilename: indexFilename,
IgnoreFilename: ignoreFilename,
WorkQueue: make(chan *WorkItem, numWorkers*10),
LogQueue: make(chan *LogEvent, numWorkers*100),
PerfQueue: make(chan *PerfEvent, numWorkers*10),
}, nil
}
func (context *Context) log(stat Status, message string) {
context.mutex.Lock()
defer context.mutex.Unlock()
switch stat {
case STATUS_ERR_DMG:
context.NumTotal++
case STATUS_UPDATE_INDEX:
context.NumIdxUpd++
case STATUS_UP_WARN_OLD:
context.NumTotal++
context.NumUpd++
case STATUS_UPDATE:
context.NumTotal++
context.NumUpd++
case STATUS_NEW:
context.NumTotal++
context.NumNew++
case STATUS_OK:
if !context.AddOnly {
context.NumTotal++
}
case STATUS_MISSING:
context.NumDel++
//case STATUS_PANIC:
//case STATUS_ERR_IDX:
//case STATUS_IGNORE:
}
context.LogQueue <- &LogEvent{stat, message}
}
func (context *Context) logErr(path string, err error) {
context.LogQueue <- &LogEvent{STATUS_PANIC, path + ": " + err.Error()}
}
func (context *Context) perfMonFiles(numFiles int64) {
context.PerfQueue <- &PerfEvent{numFiles, 0}
}
func (context *Context) perfMonBytes(numBytes int64) {
context.PerfQueue <- &PerfEvent{0, numBytes}
}
func (context *Context) addWork(path string, filesToIndex []string, dirList []string, ignore *Ignore) {
context.WorkQueue <- &WorkItem{path, filesToIndex, dirList, ignore}
}
func (context *Context) endWork() {
context.WorkQueue <- nil
}
func (context *Context) isChkbitFile(name string) bool {
return name == context.IndexFilename || name == context.IgnoreFilename
}
func (context *Context) Start(pathList []string) {
context.NumTotal = 0
context.NumIdxUpd = 0
context.NumNew = 0
context.NumUpd = 0
context.NumDel = 0
var wg sync.WaitGroup
wg.Add(context.NumWorkers)
for i := 0; i < context.NumWorkers; i++ {
go func(id int) {
defer wg.Done()
context.runWorker(id)
}(i)
}
go func() {
for _, path := range pathList {
context.scanDir(path, nil)
}
for i := 0; i < context.NumWorkers; i++ {
context.endWork()
}
}()
wg.Wait()
context.LogQueue <- nil
}
func (context *Context) scanDir(root string, parentIgnore *Ignore) {
files, err := os.ReadDir(root)
if err != nil {
context.logErr(root+"/", err)
return
}
isDir := func(file os.DirEntry, path string) bool {
if file.IsDir() {
return true
}
ft := file.Type()
if !context.SkipSymlinks && ft&os.ModeSymlink != 0 {
rpath, err := filepath.EvalSymlinks(path)
if err == nil {
fi, err := os.Lstat(rpath)
return err == nil && fi.IsDir()
}
}
return false
}
var dirList []string
var filesToIndex []string
ignore, err := GetIgnore(context, root, parentIgnore)
if err != nil {
context.logErr(root+"/", err)
}
for _, file := range files {
path := filepath.Join(root, file.Name())
if file.Name()[0] == '.' {
if context.ShowIgnoredOnly && !context.isChkbitFile(file.Name()) {
context.log(STATUS_IGNORE, path)
}
continue
}
if isDir(file, path) {
if !ignore.shouldIgnore(file.Name()) {
dirList = append(dirList, file.Name())
} else {
context.log(STATUS_IGNORE, file.Name()+"/")
}
} else if file.Type().IsRegular() {
filesToIndex = append(filesToIndex, file.Name())
}
}
context.addWork(root, filesToIndex, dirList, ignore)
if !context.SkipSubdirectories {
for _, name := range dirList {
context.scanDir(filepath.Join(root, name), ignore)
}
}
}

15
go.mod
View File

@ -1,15 +0,0 @@
module github.com/laktak/chkbit/v5
go 1.22.3
require (
github.com/alecthomas/kong v0.9.0
github.com/laktak/lterm v1.0.0
lukechampine.com/blake3 v1.3.0
)
require (
github.com/klauspost/cpuid/v2 v2.0.9 // indirect
golang.org/x/sys v0.24.0 // indirect
golang.org/x/term v0.23.0 // indirect
)

18
go.sum
View File

@ -1,18 +0,0 @@
github.com/alecthomas/assert/v2 v2.6.0 h1:o3WJwILtexrEUk3cUVal3oiQY2tfgr/FHWiz/v2n4FU=
github.com/alecthomas/assert/v2 v2.6.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k=
github.com/alecthomas/kong v0.9.0 h1:G5diXxc85KvoV2f0ZRVuMsi45IrBgx9zDNGNj165aPA=
github.com/alecthomas/kong v0.9.0/go.mod h1:Y47y5gKfHp1hDc7CH7OeXgLIpp+Q2m1Ni0L5s3bI8Os=
github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc=
github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4=
github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4=
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/laktak/lterm v1.0.0 h1:hxRsBDHIWlMn+IV7C+6/tk4y81WqcO8F6G+52wSZUf4=
github.com/laktak/lterm v1.0.0/go.mod h1:zwGyZi5PNuySqsDsRVNvBBYANy9k61oYgW6Flsm2AZg=
golang.org/x/sys v0.24.0 h1:Twjiwq9dn6R1fQcyiK+wQyHWfaz/BJB+YIpzU/Cv3Xg=
golang.org/x/sys v0.24.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.23.0 h1:F6D4vR+EHoL9/sWAWgAR1H2DcHr4PareCbAaCo1RpuU=
golang.org/x/term v0.23.0/go.mod h1:DgV24QBUrK6jhZXl+20l6UWznPlwAHm1Q1mGHtydmSk=
lukechampine.com/blake3 v1.3.0 h1:sJ3XhFINmHSrYCgl958hscfIa3bw8x4DqMP3u1YvoYE=
lukechampine.com/blake3 v1.3.0/go.mod h1:0OFRp7fBtAylGVCO40o87sbupkyIGgbpv1+M1k1LM6k=

View File

@ -1,57 +0,0 @@
package chkbit
import (
"crypto/md5"
"crypto/sha512"
"encoding/hex"
"errors"
"hash"
"io"
"os"
"lukechampine.com/blake3"
)
const BLOCKSIZE = 2 << 10 << 7 // kb
func Hashfile(path string, hashAlgo string, perfMonBytes func(int64)) (string, error) {
var h hash.Hash
switch hashAlgo {
case "md5":
h = md5.New()
case "sha512":
h = sha512.New()
case "blake3":
h = blake3.New(32, nil)
default:
return "", errors.New("algo '" + hashAlgo + "' is unknown.")
}
file, err := os.Open(path)
if err != nil {
return "", err
}
defer file.Close()
buf := make([]byte, BLOCKSIZE)
for {
bytesRead, err := file.Read(buf)
if err != nil && err != io.EOF {
return "", err
}
if bytesRead == 0 {
break
}
h.Write(buf[:bytesRead])
if perfMonBytes != nil {
perfMonBytes(int64(bytesRead))
}
}
return hex.EncodeToString(h.Sum(nil)), nil
}
func hashMd5(data []byte) string {
h := md5.New()
h.Write(data)
return hex.EncodeToString(h.Sum(nil))
}

View File

@ -1,90 +0,0 @@
package chkbit
import (
"bufio"
"os"
"path/filepath"
"strings"
)
type Ignore struct {
parentIgnore *Ignore
context *Context
path string
name string
itemList []string
}
func GetIgnore(context *Context, path string, parentIgnore *Ignore) (*Ignore, error) {
ignore := &Ignore{
parentIgnore: parentIgnore,
context: context,
path: path,
name: filepath.Base(path) + "/",
}
err := ignore.loadIgnore()
if err != nil {
return nil, err
}
return ignore, nil
}
func (ignore *Ignore) getIgnoreFilepath() string {
return filepath.Join(ignore.path, ignore.context.IgnoreFilename)
}
func (ignore *Ignore) loadIgnore() error {
if _, err := os.Stat(ignore.getIgnoreFilepath()); err != nil {
if os.IsNotExist(err) {
return nil
}
return err
}
file, err := os.Open(ignore.getIgnoreFilepath())
if err != nil {
return err
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line != "" && line[0] != '#' {
ignore.itemList = append(ignore.itemList, line)
}
}
return scanner.Err()
}
func (ignore *Ignore) shouldIgnore(name string) bool {
return ignore.shouldIgnore2(name, "")
}
func (ignore *Ignore) shouldIgnore2(name string, fullname string) bool {
for _, item := range ignore.itemList {
if item[0] == '/' {
if len(fullname) > 0 {
continue
} else {
item = item[1:]
}
}
if match, _ := filepath.Match(item, name); match {
return true
}
if fullname != "" {
if match, _ := filepath.Match(item, fullname); match {
return true
}
}
}
if ignore.parentIgnore != nil {
if fullname != "" {
return ignore.parentIgnore.shouldIgnore2(fullname, ignore.name+fullname)
} else {
return ignore.parentIgnore.shouldIgnore2(name, ignore.name+name)
}
}
return false
}

303
index.go
View File

@ -1,303 +0,0 @@
package chkbit
import (
"encoding/json"
"errors"
"os"
"path/filepath"
"slices"
)
const VERSION = 2 // index version
var (
algoMd5 = "md5"
)
type idxInfo struct {
ModTime int64 `json:"mod"`
Algo *string `json:"a,omitempty"`
Hash *string `json:"h,omitempty"`
LegacyHash *string `json:"md5,omitempty"`
}
type indexFile struct {
V int `json:"v"`
// IdxRaw -> map[string]idxInfo
IdxRaw json.RawMessage `json:"idx"`
IdxHash string `json:"idx_hash"`
// 2024-08 optional, list of subdirectories
Dir []string `json:"dirlist,omitempty"`
}
type idxInfo1 struct {
ModTime int64 `json:"mod"`
Hash string `json:"md5"`
}
type indexFile1 struct {
Data map[string]idxInfo1 `json:"data"`
}
type Index struct {
context *Context
path string
files []string
cur map[string]idxInfo
new map[string]idxInfo
curDirList []string
newDirList []string
modified bool
readonly bool
}
func newIndex(context *Context, path string, files []string, dirList []string, readonly bool) *Index {
slices.Sort(dirList)
return &Index{
context: context,
path: path,
files: files,
cur: make(map[string]idxInfo),
new: make(map[string]idxInfo),
curDirList: make([]string, 0),
newDirList: dirList,
readonly: readonly,
}
}
func (i *Index) getIndexFilepath() string {
return filepath.Join(i.path, i.context.IndexFilename)
}
func (i *Index) logFilePanic(name string, message string) {
i.context.log(STATUS_PANIC, filepath.Join(i.path, name)+": "+message)
}
func (i *Index) logFile(stat Status, name string) {
i.context.log(stat, filepath.Join(i.path, name))
}
func (i *Index) logDir(stat Status, name string) {
i.context.log(stat, filepath.Join(i.path, name)+"/")
}
func (i *Index) calcHashes(ignore *Ignore) {
for _, name := range i.files {
if ignore != nil && ignore.shouldIgnore(name) {
i.logFile(STATUS_IGNORE, name)
continue
}
var err error
var info *idxInfo
algo := i.context.HashAlgo
if val, ok := i.cur[name]; ok {
// existing file
if val.LegacyHash != nil {
// convert from py1 to new format
val = idxInfo{
ModTime: val.ModTime,
Algo: &algoMd5,
Hash: val.LegacyHash,
}
i.cur[name] = val
}
if val.Algo != nil {
algo = *val.Algo
}
if i.context.AddOnly {
info = &val
} else {
info, err = i.calcFile(name, algo)
}
} else {
// new file
if i.readonly {
info = &idxInfo{Algo: &algo}
} else {
info, err = i.calcFile(name, algo)
}
}
if err != nil {
i.logFilePanic(name, err.Error())
} else {
i.new[name] = *info
}
}
}
func (i *Index) showIgnoredOnly(ignore *Ignore) {
for _, name := range i.files {
if ignore.shouldIgnore(name) {
i.logFile(STATUS_IGNORE, name)
}
}
}
func (i *Index) checkFix(forceUpdateDmg bool) {
for name, b := range i.new {
if a, ok := i.cur[name]; !ok {
i.logFile(STATUS_NEW, name)
i.modified = true
} else {
amod := int64(a.ModTime)
bmod := int64(b.ModTime)
if a.Hash != nil && b.Hash != nil && *a.Hash == *b.Hash {
i.logFile(STATUS_OK, name)
if amod != bmod {
i.modified = true
}
continue
}
if amod == bmod {
i.logFile(STATUS_ERR_DMG, name)
if !forceUpdateDmg {
// keep DMG entry
i.new[name] = a
} else {
i.modified = true
}
} else if amod < bmod {
i.logFile(STATUS_UPDATE, name)
i.modified = true
} else if amod > bmod {
i.logFile(STATUS_UP_WARN_OLD, name)
i.modified = true
}
}
}
// track missing
for name := range i.cur {
if _, ok := i.new[name]; !ok {
i.modified = true
if i.context.ShowMissing {
i.logFile(STATUS_MISSING, name)
}
}
}
// dirs
m := make(map[string]bool)
for _, n := range i.newDirList {
m[n] = true
}
for _, name := range i.curDirList {
if !m[name] {
i.modified = true
if i.context.ShowMissing {
i.logDir(STATUS_MISSING, name+"/")
}
}
}
if len(i.newDirList) != len(i.curDirList) {
// added
i.modified = true
}
}
func (i *Index) calcFile(name string, a string) (*idxInfo, error) {
path := filepath.Join(i.path, name)
info, _ := os.Stat(path)
mtime := int64(info.ModTime().UnixNano() / 1e6)
h, err := Hashfile(path, a, i.context.perfMonBytes)
if err != nil {
return nil, err
}
i.context.perfMonFiles(1)
return &idxInfo{
ModTime: mtime,
Algo: &a,
Hash: &h,
}, nil
}
func (i *Index) save() (bool, error) {
if i.modified {
if i.readonly {
return false, errors.New("Error trying to save a readonly index.")
}
text, err := json.Marshal(i.new)
if err != nil {
return false, err
}
data := indexFile{
V: VERSION,
IdxRaw: text,
IdxHash: hashMd5(text),
}
if i.context.TrackDirectories {
data.Dir = i.newDirList
}
file, err := json.Marshal(data)
if err != nil {
return false, err
}
err = os.WriteFile(i.getIndexFilepath(), file, 0644)
if err != nil {
return false, err
}
i.modified = false
return true, nil
} else {
return false, nil
}
}
func (i *Index) load() error {
if _, err := os.Stat(i.getIndexFilepath()); err != nil {
if os.IsNotExist(err) {
return nil
}
return err
}
i.modified = false
file, err := os.ReadFile(i.getIndexFilepath())
if err != nil {
return err
}
var data indexFile
err = json.Unmarshal(file, &data)
if err != nil {
return err
}
if data.IdxRaw != nil {
err = json.Unmarshal(data.IdxRaw, &i.cur)
if err != nil {
return err
}
text := data.IdxRaw
if data.IdxHash != hashMd5(text) {
// old versions may have saved the JSON encoded with extra spaces
text, _ = json.Marshal(data.IdxRaw)
} else {
}
if data.IdxHash != hashMd5(text) {
i.modified = true
i.logFile(STATUS_ERR_IDX, i.getIndexFilepath())
}
} else {
var data1 indexFile1
json.Unmarshal(file, &data1)
if data1.Data != nil {
// convert from js to new format
for name, item := range data1.Data {
i.cur[name] = idxInfo{
ModTime: item.ModTime,
Algo: &algoMd5,
Hash: &item.Hash,
}
}
}
}
// dirs
if data.Dir != nil {
slices.Sort(data.Dir)
i.curDirList = data.Dir
}
return nil
}

24
pyproject.toml Normal file
View File

@ -0,0 +1,24 @@
[project]
name = "chkbit"
version = "4.1.0"
description = "chkbit checks the data integrity of your files"
authors = [
{name = "Christian Zangl", email = "laktak@cdak.net"},
]
# keep in sync with Pipfile
dependencies = [
"blake3>=0.3.4",
]
requires-python = ">=3.7.0"
readme = "README.md"
license = {file = "LICENSE"}
[project.urls]
Homepage = "https://github.com/laktak/chkbit-py"
[project.scripts]
chkbit = "chkbit_cli.main:main"
[tool.setuptools.packages.find]
include = ["chkbit","chkbit_cli"]

1
requirements.txt Normal file
View File

@ -0,0 +1 @@
blake3>=0.3.4

4
run.py Normal file
View File

@ -0,0 +1,4 @@
from chkbit_cli import main
if __name__ == "__main__":
main()

View File

@ -1,8 +0,0 @@
#!/bin/bash
set -eE -o pipefail
script_dir=$(dirname "$(realpath "$0")")
cd $script_dir/..
version=$(git describe --tags --always)
go build -ldflags="-X main.appVersion=$version" ./cmd/chkbit

View File

@ -1,13 +0,0 @@
#!/bin/bash
set -eE -o pipefail
script_dir=$(dirname "$(realpath "$0")")
cd $script_dir/..
res="$(gofmt -l . 2>&1)"
if [ -n "$res" ]; then
echo "gofmt check failed:"
echo "${res}"
exit 1
fi

View File

@ -1,7 +0,0 @@
#!/bin/bash
set -eE -o pipefail
script_dir=$(dirname "$(realpath "$0")")
cd $script_dir/..
go vet -structtag=false -composites=false ./...

View File

@ -1,342 +0,0 @@
package main
import (
"fmt"
"os"
"os/exec"
"path/filepath"
"runtime"
"strings"
"testing"
"time"
)
// perform integration test using the compiled binary
var testDir = "/tmp/chkbit"
func getCmd() string {
_, filename, _, _ := runtime.Caller(0)
prjRoot := filepath.Dir(filepath.Dir(filename))
return filepath.Join(prjRoot, "chkbit")
}
func checkOut(t *testing.T, sout string, expected string) {
if !strings.Contains(sout, expected) {
t.Errorf("Expected '%s' in output, got '%s'\n", expected, sout)
}
}
func checkNotOut(t *testing.T, sout string, notExpected string) {
if strings.Contains(sout, notExpected) {
t.Errorf("Did not expect '%s' in output, got '%s'\n", notExpected, sout)
}
}
// misc files
var (
startList = []string{"time", "year", "people", "way", "day", "thing"}
wordList = []string{"life", "world", "school", "state", "family", "student", "group", "country", "problem", "hand", "part", "place", "case", "week", "company", "system", "program", "work", "government", "number", "night", "point", "home", "water", "room", "mother", "area", "money", "story", "fact", "month", "lot", "right", "study", "book", "eye", "job", "word", "business", "issue", "side", "kind", "head", "house", "service", "friend", "father", "power", "hour", "game", "line", "end", "member", "law", "car", "city", "community", "name", "president", "team", "minute", "idea", "kid", "body", "information", "back", "face", "others", "level", "office", "door", "health", "person", "art", "war", "history", "party", "result", "change", "morning", "reason", "research", "moment", "air", "teacher", "force", "education"}
extList = []string{"txt", "md", "pdf", "jpg", "jpeg", "png", "mp4", "mp3", "csv"}
startDate = time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC)
endDate = time.Date(2024, 12, 1, 0, 0, 0, 0, time.UTC)
dateList = []time.Time{}
wordIdx = 0
extIdx = 0
dateIdx = 0
)
func nextWord() string {
word := wordList[wordIdx%len(wordList)]
wordIdx++
return word
}
func nextExt() string {
ext := extList[extIdx%len(extList)]
extIdx++
return ext
}
func setDate(filename string, r int) {
date := dateList[dateIdx%len(dateList)]
m := 17 * dateIdx / len(dateList)
date = date.Add(time.Duration(m) * time.Hour)
dateIdx++
os.Chtimes(filename, date, date)
}
func genFile(path string, size int) {
os.WriteFile(path, make([]byte, size), 0644)
setDate(path, size*size)
}
func genFiles(dir string, a int) {
os.MkdirAll(dir, 0755)
for i := 1; i <= 5; i++ {
size := a*i*wordIdx*100 + extIdx
file := nextWord() + "-" + nextWord()
if i%3 == 0 {
file += "-" + nextWord()
}
file += "." + nextExt()
genFile(filepath.Join(dir, file), size)
}
}
func genDir(root string) {
for _, start := range startList {
for i := 1; i <= 5; i++ {
dir := filepath.Join(root, start, nextWord())
genFiles(dir, 1)
if wordIdx%3 == 0 {
dir = filepath.Join(dir, nextWord())
genFiles(dir, 1)
}
}
}
}
func setupMiscFiles() {
var c int64 = 50
interval := (int64)(endDate.Sub(startDate).Seconds()) / c
for i := range make([]int64, c) {
dateList = append(dateList, startDate.Add(time.Duration(interval*(int64)(i))*time.Second))
}
root := filepath.Join(testDir, "root")
if err := os.RemoveAll(testDir); err != nil {
fmt.Println("Failed to clean", err)
panic(err)
}
genDir(root)
os.MkdirAll(filepath.Join(root, "day/car/empty"), 0755)
rootPeople := filepath.Join(root, "people")
testPeople := filepath.Join(testDir, "people")
err := os.Rename(rootPeople, testPeople)
if err != nil {
fmt.Println("Rename failed", err)
panic(err)
}
err = os.Symlink(testPeople, rootPeople)
if err != nil {
fmt.Println("Symlink failed", err)
panic(err)
}
}
func TestRoot(t *testing.T) {
setupMiscFiles()
tool := getCmd()
root := filepath.Join(testDir, "root")
// update index, no recourse
t.Run("no-recourse", func(t *testing.T) {
cmd := exec.Command(tool, "-umR", filepath.Join(root, "day/office"))
out, err := cmd.Output()
if err != nil {
t.Fatalf("failed with '%s'\n", err)
}
sout := string(out)
checkOut(t, sout, "Processed 5 files")
checkOut(t, sout, "- 1 directory was updated")
checkOut(t, sout, "- 5 file hashes were added")
checkOut(t, sout, "- 0 file hashes were updated")
checkNotOut(t, sout, "removed")
})
// update remaining index from root
t.Run("update-remaining", func(t *testing.T) {
cmd := exec.Command(tool, "-um", root)
out, err := cmd.Output()
if err != nil {
t.Fatalf("failed with '%s'\n", err)
}
sout := string(out)
checkOut(t, sout, "Processed 300 files")
checkOut(t, sout, "- 66 directories were updated")
checkOut(t, sout, "- 295 file hashes were added")
checkOut(t, sout, "- 0 file hashes were updated")
checkNotOut(t, sout, "removed")
})
// delete files, check for missing
t.Run("delete", func(t *testing.T) {
os.RemoveAll(filepath.Join(root, "thing/change"))
os.Remove(filepath.Join(root, "time/hour/minute/body-information.csv"))
cmd := exec.Command(tool, "-m", root)
out, err := cmd.Output()
if err != nil {
t.Fatalf("failed with '%s'\n", err)
}
sout := string(out)
checkOut(t, sout, "del /tmp/chkbit/root/thing/change/")
checkOut(t, sout, "2 files/directories would have been removed")
})
// do not report missing without -m
t.Run("no-missing", func(t *testing.T) {
cmd := exec.Command(tool, root)
out, err := cmd.Output()
if err != nil {
t.Fatalf("failed with '%s'\n", err)
}
sout := string(out)
checkNotOut(t, sout, "del ")
checkNotOut(t, sout, "removed")
})
// check for missing and update
t.Run("missing", func(t *testing.T) {
cmd := exec.Command(tool, "-um", root)
out, err := cmd.Output()
if err != nil {
t.Fatalf("failed with '%s'\n", err)
}
sout := string(out)
checkOut(t, sout, "del /tmp/chkbit/root/thing/change/")
checkOut(t, sout, "2 files/directories have been removed")
})
// check again
t.Run("repeat", func(t *testing.T) {
for i := 0; i < 10; i++ {
cmd := exec.Command(tool, "-uv", root)
out, err := cmd.Output()
if err != nil {
t.Fatalf("failed with '%s'\n", err)
}
sout := string(out)
checkOut(t, sout, "Processed 289 files")
checkNotOut(t, sout, "removed")
checkNotOut(t, sout, "updated")
checkNotOut(t, sout, "added")
}
})
// add files only
t.Run("add-only", func(t *testing.T) {
genFiles(filepath.Join(root, "way/add"), 99)
genFile(filepath.Join(root, "time/add-file.txt"), 500)
// modify existing, will not be reported:
genFile(filepath.Join(root, "way/job/word-business.mp3"), 500)
cmd := exec.Command(tool, "-a", root)
out, err := cmd.Output()
if err != nil {
t.Fatalf("failed with '%s'\n", err)
}
sout := string(out)
checkOut(t, sout, "Processed 6 files")
checkOut(t, sout, "- 3 directories were updated")
checkOut(t, sout, "- 6 file hashes were added")
checkOut(t, sout, "- 0 file hashes were updated")
})
// update remaining
t.Run("update-remaining-add", func(t *testing.T) {
cmd := exec.Command(tool, "-u", root)
out, err := cmd.Output()
if err != nil {
t.Fatalf("failed with '%s'\n", err)
}
sout := string(out)
checkOut(t, sout, "Processed 295 files")
checkOut(t, sout, "- 1 directory was updated")
checkOut(t, sout, "- 0 file hashes were added")
checkOut(t, sout, "- 1 file hash was updated")
})
}
func TestDMG(t *testing.T) {
testDmg := filepath.Join(testDir, "test_dmg")
if err := os.RemoveAll(testDmg); err != nil {
fmt.Println("Failed to clean", err)
panic(err)
}
if err := os.MkdirAll(testDmg, 0755); err != nil {
fmt.Println("Failed to create test directory", err)
panic(err)
}
if err := os.Chdir(testDmg); err != nil {
fmt.Println("Failed to cd test directory", err)
panic(err)
}
tool := getCmd()
testFile := filepath.Join(testDmg, "test.txt")
t1, _ := time.Parse(time.RFC3339, "2022-02-01T11:00:00Z")
t2, _ := time.Parse(time.RFC3339, "2022-02-01T12:00:00Z")
t3, _ := time.Parse(time.RFC3339, "2022-02-01T13:00:00Z")
// create test and set the modified time"
t.Run("create", func(t *testing.T) {
os.WriteFile(testFile, []byte("foo1"), 0644)
os.Chtimes(testFile, t2, t2)
cmd := exec.Command(tool, "-u", ".")
if out, err := cmd.Output(); err != nil {
t.Fatalf("failed with '%s'\n", err)
} else {
checkOut(t, string(out), "new test.txt")
}
})
// update test with different content & old modified (expect 'old')"
t.Run("expect-old", func(t *testing.T) {
os.WriteFile(testFile, []byte("foo2"), 0644)
os.Chtimes(testFile, t1, t1)
cmd := exec.Command(tool, "-u", ".")
if out, err := cmd.Output(); err != nil {
t.Fatalf("failed with '%s'\n", err)
} else {
checkOut(t, string(out), "old test.txt")
}
})
// update test & new modified (expect 'upd')"
t.Run("expect-upd", func(t *testing.T) {
os.WriteFile(testFile, []byte("foo3"), 0644)
os.Chtimes(testFile, t3, t3)
cmd := exec.Command(tool, "-u", ".")
if out, err := cmd.Output(); err != nil {
t.Fatalf("failed with '%s'\n", err)
} else {
checkOut(t, string(out), "upd test.txt")
}
})
// Now update test with the same modified to simulate damage (expect DMG)"
t.Run("expect-DMG", func(t *testing.T) {
os.WriteFile(testFile, []byte("foo4"), 0644)
os.Chtimes(testFile, t3, t3)
cmd := exec.Command(tool, "-u", ".")
if out, err := cmd.Output(); err != nil {
if cmd.ProcessState.ExitCode() != 1 {
t.Fatalf("expected to fail with exit code 1 vs %d!", cmd.ProcessState.ExitCode())
}
checkOut(t, string(out), "DMG test.txt")
} else {
t.Fatal("expected to fail!")
}
})
}

View File

@ -1,11 +0,0 @@
#!/bin/bash
set -e
script_dir=$(dirname "$(realpath "$0")")
cd $script_dir/..
# prep
$script_dir/build
go test -v ./cmd/chkbit/util -count=1
go test -v ./scripts -count=1

View File

@ -1,55 +0,0 @@
#!/bin/bash
set -eE -o pipefail
script_dir=$(dirname "$(realpath "$0")")
cd $script_dir/..
if [ -z "$version" ]; then
version=$(git rev-parse HEAD)
fi
echo "building version $version"
mkdir -p dist
rm -f dist/*
build() {
echo "- $1-$2"
rm -f dist/chkbit
CGO_ENABLED=0 GOOS="$1" GOARCH="$2" go build -o dist -ldflags="-X main.appVersion=$version" ./cmd/chkbit
pushd dist
case "$1" in
windows)
outfile="chkbit-$1-$2.zip"
zip "$outfile" chkbit.exe --move
;;
*)
outfile="chkbit-$1-$2.tar.gz"
tar -czf "$outfile" chkbit --remove-files
;;
esac
popd
}
if [[ -z $2 ]]; then
build android arm64
build darwin amd64
build darwin arm64
build freebsd amd64
build freebsd arm64
build freebsd riscv64
build linux amd64
build linux arm64
build linux riscv64
build netbsd amd64
build netbsd arm64
build openbsd amd64
build openbsd arm64
build windows amd64
build windows arm64
else
build $1 $2
fi

View File

@ -1,38 +0,0 @@
package chkbit
type Status string
const (
STATUS_PANIC Status = "EXC"
STATUS_ERR_IDX Status = "EIX"
STATUS_ERR_DMG Status = "DMG"
STATUS_UPDATE_INDEX Status = "iup"
STATUS_UP_WARN_OLD Status = "old"
STATUS_UPDATE Status = "upd"
STATUS_NEW Status = "new"
STATUS_OK Status = "ok "
STATUS_IGNORE Status = "ign"
STATUS_MISSING Status = "del"
)
func (s Status) String() string {
return (string)(s)
}
func (s Status) IsErrorOrWarning() bool {
return s == STATUS_PANIC || s == STATUS_ERR_DMG || s == STATUS_ERR_IDX || s == STATUS_UP_WARN_OLD
}
func (s Status) IsVerbose() bool {
return s == STATUS_OK || s == STATUS_IGNORE
}
type LogEvent struct {
Stat Status
Message string
}
type PerfEvent struct {
NumFiles int64
NumBytes int64
}

View File

@ -1,38 +0,0 @@
package chkbit
type WorkItem struct {
path string
filesToIndex []string
dirList []string
ignore *Ignore
}
func (context *Context) runWorker(id int) {
for {
item := <-context.WorkQueue
if item == nil {
break
}
index := newIndex(context, item.path, item.filesToIndex, item.dirList, !context.UpdateIndex)
err := index.load()
if err != nil {
context.log(STATUS_PANIC, index.getIndexFilepath()+": "+err.Error())
}
if context.ShowIgnoredOnly {
index.showIgnoredOnly(item.ignore)
} else {
index.calcHashes(item.ignore)
index.checkFix(context.ForceUpdateDmg)
if context.UpdateIndex {
if changed, err := index.save(); err != nil {
context.logErr(item.path, err)
} else if changed {
context.log(STATUS_UPDATE_INDEX, "")
}
}
}
}
}