track directories and report missing files/dirs (-m) #16

This commit is contained in:
Christian Zangl 2024-08-20 16:39:11 +02:00
parent 181b3d8c9a
commit a1327a4d0c
No known key found for this signature in database
GPG Key ID: 6D468AC36E2A4B3D
9 changed files with 240 additions and 78 deletions

View File

@ -97,9 +97,11 @@ Flags:
-H, --tips Show tips.
-u, --update update indices (without this chkbit will verify files in readonly mode)
--show-ignored-only only show ignored files
-m, --show-missing show missing files/directories
--algo="blake3" hash algorithm: md5, sha512, blake3 (default: blake3)
-f, --force force update of damaged items
-s, --skip-symlinks do not follow symlinks
-D, --no-dir-in-index do not track directories in the index
-l, --log-file=STRING write to a logfile if specified
--log-verbose verbose logging
--index-name=".chkbit" filename where chkbit stores its hashes, needs to start with '.' (default: .chkbit)
@ -111,6 +113,27 @@ Flags:
-V, --version show version information
```
```
$ chkbit -H
.chkbitignore rules:
each line should contain exactly one name
you may use Unix shell-style wildcards (see README)
lines starting with '#' are skipped
lines starting with '/' are only applied to the current directory
Status codes:
DMG: error, data damage detected
EIX: error, index damaged
old: warning, file replaced by an older version
new: new file
upd: file updated
ok : check ok
del: file/directory removed
ign: ignored (see .chkbitignore)
EXC: exception/panic
```
chkbit is set to use only 5 workers by default so it will not slow your system to a crawl. You can specify a higher number to make it a lot faster if the IO throughput can also keep up.

View File

@ -19,6 +19,7 @@ Status codes:
new: new file
upd: file updated
ok : check ok
del: file/directory removed
ign: ignored (see .chkbitignore)
EXC: exception/panic
`

View File

@ -46,9 +46,11 @@ var cli struct {
Tips bool `short:"H" help:"Show tips."`
Update bool `short:"u" help:"update indices (without this chkbit will verify files in readonly mode)"`
ShowIgnoredOnly bool `help:"only show ignored files"`
ShowMissing bool `short:"m" help:"show missing files/directories"`
Algo string `default:"blake3" help:"hash algorithm: md5, sha512, blake3 (default: blake3)"`
Force bool `short:"f" help:"force update of damaged items"`
SkipSymlinks bool `short:"s" help:"do not follow symlinks"`
NoDirInIndex bool `short:"D" help:"do not track directories in the index"`
LogFile string `short:"l" help:"write to a logfile if specified"`
LogVerbose bool `help:"verbose logging"`
IndexName string `default:".chkbit" help:"filename where chkbit stores its hashes, needs to start with '.' (default: .chkbit)"`
@ -63,14 +65,10 @@ var cli struct {
type Main struct {
dmgList []string
errList []string
numIdxUpd int
numNew int
numUpd int
verbose bool
logger *log.Logger
logVerbose bool
progress Progress
total int
termWidth int
fps *util.RateCalc
bps *util.RateCalc
@ -82,23 +80,16 @@ func (m *Main) log(text string) {
func (m *Main) logStatus(stat chkbit.Status, message string) bool {
if stat == chkbit.STATUS_UPDATE_INDEX {
m.numIdxUpd++
} else {
return false
}
if stat == chkbit.STATUS_ERR_DMG {
m.total++
m.dmgList = append(m.dmgList, message)
} else if stat == chkbit.STATUS_PANIC {
m.errList = append(m.errList, message)
} else if stat == chkbit.STATUS_OK || stat == chkbit.STATUS_UPDATE || stat == chkbit.STATUS_NEW || stat == chkbit.STATUS_UP_WARN_OLD {
m.total++
if stat == chkbit.STATUS_UPDATE || stat == chkbit.STATUS_UP_WARN_OLD {
m.numUpd++
} else if stat == chkbit.STATUS_NEW {
m.numNew++
}
}
if m.logVerbose || stat != chkbit.STATUS_OK && stat != chkbit.STATUS_IGNORE {
if m.logVerbose || !stat.IsVerbose() {
m.log(stat.String() + " " + message)
}
@ -110,7 +101,6 @@ func (m *Main) logStatus(stat chkbit.Status, message string) bool {
lterm.Printline(col, stat.String(), " ", message, lterm.Reset)
return true
}
}
return false
}
@ -130,7 +120,7 @@ func (m *Main) showStatus(context *chkbit.Context) {
if m.progress == Fancy {
lterm.Write(termBG, termFG1, stat, lterm.ClearLine(0), lterm.Reset, "\r")
} else {
fmt.Print(m.total, "\r")
fmt.Print(context.NumTotal, "\r")
}
}
case perf := <-context.PerfQueue:
@ -147,7 +137,7 @@ func (m *Main) showStatus(context *chkbit.Context) {
stat = "RO"
}
stat = fmt.Sprintf("[%s:%d] %5d files $ %s %-13s $ %s %-13s",
stat, context.NumWorkers, m.total,
stat, context.NumWorkers, context.NumTotal,
util.Sparkline(m.fps.Stats), statF,
util.Sparkline(m.bps.Stats), statB)
stat = util.LeftTruncate(stat, m.termWidth-1)
@ -155,7 +145,7 @@ func (m *Main) showStatus(context *chkbit.Context) {
stat = strings.Replace(stat, "$", termSepFG+termSep+termFG3, 1)
lterm.Write(termBG, termFG1, stat, lterm.ClearLine(0), lterm.Reset, "\r")
} else if m.progress == Plain {
fmt.Print(m.total, "\r")
fmt.Print(context.NumTotal, "\r")
}
}
}
@ -176,7 +166,9 @@ func (m *Main) process() *chkbit.Context {
context.ForceUpdateDmg = cli.Force
context.UpdateIndex = cli.Update
context.ShowIgnoredOnly = cli.ShowIgnoredOnly
context.ShowMissing = cli.ShowMissing
context.SkipSymlinks = cli.SkipSymlinks
context.TrackDirectories = !cli.NoDirInIndex
var wg sync.WaitGroup
wg.Add(1)
@ -216,11 +208,11 @@ func (m *Main) printResult(context *chkbit.Context) {
if !context.UpdateIndex {
mode = " in readonly mode"
}
status := fmt.Sprintf("Processed %s%s.", util.LangNum1MutateSuffix(m.total, "file"), mode)
status := fmt.Sprintf("Processed %s%s.", util.LangNum1MutateSuffix(context.NumTotal, "file"), mode)
cprint(termOKFG, status)
m.log(status)
if m.progress == Fancy && m.total > 0 {
if m.progress == Fancy && context.NumTotal > 0 {
elapsed := time.Since(m.fps.Start)
elapsedS := elapsed.Seconds()
fmt.Println("-", elapsed.Truncate(time.Second), "elapsed")
@ -228,17 +220,26 @@ func (m *Main) printResult(context *chkbit.Context) {
fmt.Printf("- %.2f MB/second\n", (float64(m.bps.Total)+float64(m.bps.Current))/float64(sizeMB)/elapsedS)
}
del := ""
if context.UpdateIndex {
if m.numIdxUpd > 0 {
cprint(termOKFG, fmt.Sprintf("- %s updated\n- %s added\n- %s updated",
util.LangNum1Choice(m.numIdxUpd, "directory was", "directories were"),
util.LangNum1Choice(m.numNew, "file hash was", "file hashes were"),
util.LangNum1Choice(m.numUpd, "file hash was", "file hashes were")))
if context.NumIdxUpd > 0 {
if context.NumDel > 0 {
del = fmt.Sprintf("\n- %s been removed", util.LangNum1Choice(context.NumDel, "file/directory has", "files/directories have"))
}
} else if m.numNew+m.numUpd > 0 {
cprint(termAlertFG, fmt.Sprintf("No changes were made (specify -u to update):\n- %s would have been added and\n- %s would have been updated.",
util.LangNum1MutateSuffix(m.numNew, "file"),
util.LangNum1MutateSuffix(m.numUpd, "file")))
cprint(termOKFG, fmt.Sprintf("- %s updated\n- %s added\n- %s updated%s",
util.LangNum1Choice(context.NumIdxUpd, "directory was", "directories were"),
util.LangNum1Choice(context.NumNew, "file hash was", "file hashes were"),
util.LangNum1Choice(context.NumUpd, "file hash was", "file hashes were"),
del))
}
} else if context.NumNew+context.NumUpd+context.NumDel > 0 {
if context.NumDel > 0 {
del = fmt.Sprintf("\n- %s would have been removed", util.LangNum1Choice(context.NumDel, "file/directory", "files/directories"))
}
cprint(termAlertFG, fmt.Sprintf("No changes were made (specify -u to update):\n- %s would have been added\n- %s would have been updated%s",
util.LangNum1MutateSuffix(context.NumNew, "file"),
util.LangNum1MutateSuffix(context.NumUpd, "file"),
del))
}
}

View File

@ -9,10 +9,12 @@ import (
type Context struct {
NumWorkers int
ForceUpdateDmg bool
UpdateIndex bool
ShowIgnoredOnly bool
ShowMissing bool
ForceUpdateDmg bool
HashAlgo string
TrackDirectories bool
SkipSymlinks bool
IndexFilename string
IgnoreFilename string
@ -20,6 +22,12 @@ type Context struct {
LogQueue chan *LogEvent
PerfQueue chan *PerfEvent
wg sync.WaitGroup
NumTotal int
NumIdxUpd int
NumNew int
NumUpd int
NumDel int
}
func NewContext(numWorkers int, hashAlgo string, indexFilename string, ignoreFilename string) (*Context, error) {
@ -44,6 +52,29 @@ func NewContext(numWorkers int, hashAlgo string, indexFilename string, ignoreFil
}
func (context *Context) log(stat Status, message string) {
switch stat {
case STATUS_ERR_DMG:
context.NumTotal++
case STATUS_UPDATE_INDEX:
context.NumIdxUpd++
case STATUS_UP_WARN_OLD:
context.NumTotal++
context.NumUpd++
case STATUS_UPDATE:
context.NumTotal++
context.NumUpd++
case STATUS_NEW:
context.NumTotal++
context.NumNew++
case STATUS_OK:
context.NumTotal++
case STATUS_MISSING:
context.NumDel++
//case STATUS_PANIC:
//case STATUS_ERR_IDX:
//case STATUS_IGNORE:
}
context.LogQueue <- &LogEvent{stat, message}
}
@ -59,8 +90,8 @@ func (context *Context) perfMonBytes(numBytes int64) {
context.PerfQueue <- &PerfEvent{0, numBytes}
}
func (context *Context) addWork(path string, filesToIndex []string, ignore *Ignore) {
context.WorkQueue <- &WorkItem{path, filesToIndex, ignore}
func (context *Context) addWork(path string, filesToIndex []string, dirList []string, ignore *Ignore) {
context.WorkQueue <- &WorkItem{path, filesToIndex, dirList, ignore}
}
func (context *Context) endWork() {
@ -72,6 +103,12 @@ func (context *Context) isChkbitFile(name string) bool {
}
func (context *Context) Start(pathList []string) {
context.NumTotal = 0
context.NumIdxUpd = 0
context.NumNew = 0
context.NumUpd = 0
context.NumDel = 0
var wg sync.WaitGroup
wg.Add(context.NumWorkers)
for i := 0; i < context.NumWorkers; i++ {
@ -141,7 +178,7 @@ func (context *Context) scanDir(root string, parentIgnore *Ignore) {
}
}
context.addWork(root, filesToIndex, ignore)
context.addWork(root, filesToIndex, dirList, ignore)
for _, name := range dirList {
context.scanDir(filepath.Join(root, name), ignore)

View File

@ -5,6 +5,8 @@ import (
"errors"
"os"
"path/filepath"
"reflect"
"slices"
)
const VERSION = 2 // index version
@ -24,6 +26,8 @@ type indexFile struct {
// IdxRaw -> map[string]idxInfo
IdxRaw json.RawMessage `json:"idx"`
IdxHash string `json:"idx_hash"`
// 2024-08 optional, list of subdirectories
Dir []string `json:"dirlist,omitempty"`
}
type idxInfo1 struct {
@ -41,17 +45,22 @@ type Index struct {
files []string
cur map[string]idxInfo
new map[string]idxInfo
curDirList []string
newDirList []string
modified bool
readonly bool
}
func NewIndex(context *Context, path string, files []string, readonly bool) *Index {
func newIndex(context *Context, path string, files []string, dirList []string, readonly bool) *Index {
slices.Sort(dirList)
return &Index{
context: context,
path: path,
files: files,
cur: make(map[string]idxInfo),
new: make(map[string]idxInfo),
curDirList: make([]string, 0),
newDirList: dirList,
readonly: readonly,
}
}
@ -72,6 +81,10 @@ func (i *Index) logFile(stat Status, name string) {
i.context.log(stat, filepath.Join(i.path, name))
}
func (i *Index) logDir(stat Status, name string) {
i.context.log(stat, filepath.Join(i.path, name)+"/")
}
func (i *Index) calcHashes(ignore *Ignore) {
for _, name := range i.files {
if ignore != nil && ignore.shouldIgnore(name) {
@ -153,6 +166,26 @@ func (i *Index) checkFix(forceUpdateDmg bool) {
}
}
}
if i.context.ShowMissing {
for name := range i.cur {
if _, ok := i.new[name]; !ok {
i.logFile(STATUS_MISSING, name)
i.setMod(true)
}
}
// dirs
m := make(map[string]bool)
for _, n := range i.newDirList {
m[n] = true
}
for _, name := range i.curDirList {
if !m[name] {
i.logDir(STATUS_MISSING, name+"/")
i.setMod(true)
}
}
}
}
func (i *Index) calcFile(name string, a string) (*idxInfo, error) {
@ -186,6 +219,9 @@ func (i *Index) save() (bool, error) {
IdxRaw: text,
IdxHash: hashMd5(text),
}
if i.context.TrackDirectories {
data.Dir = i.newDirList
}
file, err := json.Marshal(data)
if err != nil {
@ -205,6 +241,8 @@ func (i *Index) save() (bool, error) {
func (i *Index) load() error {
if _, err := os.Stat(i.getIndexFilepath()); err != nil {
if os.IsNotExist(err) {
// todo
i.setMod(true)
return nil
}
return err
@ -248,5 +286,12 @@ func (i *Index) load() error {
}
}
}
if data.Dir != nil {
slices.Sort(data.Dir)
i.curDirList = data.Dir
if i.context.TrackDirectories && !reflect.DeepEqual(i.curDirList, i.newDirList) {
i.setMod(true)
}
}
return nil
}

View File

@ -11,6 +11,8 @@ import (
"time"
)
// perform integration test using the compiled binary
var testDir = "/tmp/chkbit"
func getCmd() string {
@ -25,6 +27,12 @@ func checkOut(t *testing.T, sout string, expected string) {
}
}
func checkNotOut(t *testing.T, sout string, notExpected string) {
if strings.Contains(sout, notExpected) {
t.Errorf("Did not expect '%s' in output, got '%s'\n", notExpected, sout)
}
}
// misc files
var (
@ -128,14 +136,59 @@ func TestRoot(t *testing.T) {
tool := getCmd()
root := filepath.Join(testDir, "root")
cmd := exec.Command(tool, "-u", root)
// step1: update index
cmd := exec.Command(tool, "-um", root)
out, err := cmd.Output()
if err != nil {
t.Fatalf("cmd.Output() failed with '%s'\n", err)
t.Fatalf("step1 failed with '%s'\n", err)
}
sout := string(out)
checkOut(t, sout, "60 directories were updated")
checkOut(t, sout, "67 directories were updated")
checkOut(t, sout, "300 file hashes were added")
checkNotOut(t, sout, "removed")
// step2: delete files, check for missing
os.RemoveAll(filepath.Join(root, "thing/change"))
os.Remove(filepath.Join(root, "time/hour/minute/body-information.csv"))
cmd = exec.Command(tool, "-m", root)
out, err = cmd.Output()
if err != nil {
t.Fatalf("step2 failed with '%s'\n", err)
}
sout = string(out)
checkOut(t, sout, "del /tmp/chkbit/root/thing/change/")
checkOut(t, sout, "2 files/directories would have been removed")
// step2a: do not report missing without -m
cmd = exec.Command(tool, root)
out, err = cmd.Output()
if err != nil {
t.Fatalf("step2a failed with '%s'\n", err)
}
sout = string(out)
checkNotOut(t, sout, "del ")
checkNotOut(t, sout, "removed")
// step3: check for missing and update
cmd = exec.Command(tool, "-um", root)
out, err = cmd.Output()
if err != nil {
t.Fatalf("step3 failed with '%s'\n", err)
}
sout = string(out)
checkOut(t, sout, "del /tmp/chkbit/root/thing/change/")
checkOut(t, sout, "2 files/directories have been removed")
// step4: check again
cmd = exec.Command(tool, "-u", root)
out, err = cmd.Output()
if err != nil {
t.Fatalf("step4 failed with '%s'\n", err)
}
sout = string(out)
checkOut(t, sout, "Processed 289 files")
}
func TestDMG(t *testing.T) {

View File

@ -7,5 +7,5 @@ cd $script_dir/..
# prep
$script_dir/build
go test -v ./cmd/chkbit/util
go test -v ./scripts
go test -v ./cmd/chkbit/util -count=1
go test -v ./scripts -count=1

View File

@ -12,6 +12,7 @@ const (
STATUS_NEW Status = "new"
STATUS_OK Status = "ok "
STATUS_IGNORE Status = "ign"
STATUS_MISSING Status = "del"
)
func (s Status) String() string {

View File

@ -3,6 +3,7 @@ package chkbit
type WorkItem struct {
path string
filesToIndex []string
dirList []string
ignore *Ignore
}
@ -13,7 +14,7 @@ func (context *Context) runWorker(id int) {
break
}
index := NewIndex(context, item.path, item.filesToIndex, !context.UpdateIndex)
index := newIndex(context, item.path, item.filesToIndex, item.dirList, !context.UpdateIndex)
err := index.load()
if err != nil {
context.log(STATUS_PANIC, index.getIndexFilepath()+": "+err.Error())