track directories and report missing files/dirs (-m) #16

This commit is contained in:
Christian Zangl 2024-08-20 16:39:11 +02:00
parent 181b3d8c9a
commit a1327a4d0c
No known key found for this signature in database
GPG Key ID: 6D468AC36E2A4B3D
9 changed files with 240 additions and 78 deletions

View File

@ -97,9 +97,11 @@ Flags:
-H, --tips Show tips. -H, --tips Show tips.
-u, --update update indices (without this chkbit will verify files in readonly mode) -u, --update update indices (without this chkbit will verify files in readonly mode)
--show-ignored-only only show ignored files --show-ignored-only only show ignored files
-m, --show-missing show missing files/directories
--algo="blake3" hash algorithm: md5, sha512, blake3 (default: blake3) --algo="blake3" hash algorithm: md5, sha512, blake3 (default: blake3)
-f, --force force update of damaged items -f, --force force update of damaged items
-s, --skip-symlinks do not follow symlinks -s, --skip-symlinks do not follow symlinks
-D, --no-dir-in-index do not track directories in the index
-l, --log-file=STRING write to a logfile if specified -l, --log-file=STRING write to a logfile if specified
--log-verbose verbose logging --log-verbose verbose logging
--index-name=".chkbit" filename where chkbit stores its hashes, needs to start with '.' (default: .chkbit) --index-name=".chkbit" filename where chkbit stores its hashes, needs to start with '.' (default: .chkbit)
@ -111,6 +113,27 @@ Flags:
-V, --version show version information -V, --version show version information
``` ```
```
$ chkbit -H
.chkbitignore rules:
each line should contain exactly one name
you may use Unix shell-style wildcards (see README)
lines starting with '#' are skipped
lines starting with '/' are only applied to the current directory
Status codes:
DMG: error, data damage detected
EIX: error, index damaged
old: warning, file replaced by an older version
new: new file
upd: file updated
ok : check ok
del: file/directory removed
ign: ignored (see .chkbitignore)
EXC: exception/panic
```
chkbit is set to use only 5 workers by default so it will not slow your system to a crawl. You can specify a higher number to make it a lot faster if the IO throughput can also keep up. chkbit is set to use only 5 workers by default so it will not slow your system to a crawl. You can specify a higher number to make it a lot faster if the IO throughput can also keep up.

View File

@ -19,6 +19,7 @@ Status codes:
new: new file new: new file
upd: file updated upd: file updated
ok : check ok ok : check ok
del: file/directory removed
ign: ignored (see .chkbitignore) ign: ignored (see .chkbitignore)
EXC: exception/panic EXC: exception/panic
` `

View File

@ -46,9 +46,11 @@ var cli struct {
Tips bool `short:"H" help:"Show tips."` Tips bool `short:"H" help:"Show tips."`
Update bool `short:"u" help:"update indices (without this chkbit will verify files in readonly mode)"` Update bool `short:"u" help:"update indices (without this chkbit will verify files in readonly mode)"`
ShowIgnoredOnly bool `help:"only show ignored files"` ShowIgnoredOnly bool `help:"only show ignored files"`
ShowMissing bool `short:"m" help:"show missing files/directories"`
Algo string `default:"blake3" help:"hash algorithm: md5, sha512, blake3 (default: blake3)"` Algo string `default:"blake3" help:"hash algorithm: md5, sha512, blake3 (default: blake3)"`
Force bool `short:"f" help:"force update of damaged items"` Force bool `short:"f" help:"force update of damaged items"`
SkipSymlinks bool `short:"s" help:"do not follow symlinks"` SkipSymlinks bool `short:"s" help:"do not follow symlinks"`
NoDirInIndex bool `short:"D" help:"do not track directories in the index"`
LogFile string `short:"l" help:"write to a logfile if specified"` LogFile string `short:"l" help:"write to a logfile if specified"`
LogVerbose bool `help:"verbose logging"` LogVerbose bool `help:"verbose logging"`
IndexName string `default:".chkbit" help:"filename where chkbit stores its hashes, needs to start with '.' (default: .chkbit)"` IndexName string `default:".chkbit" help:"filename where chkbit stores its hashes, needs to start with '.' (default: .chkbit)"`
@ -63,14 +65,10 @@ var cli struct {
type Main struct { type Main struct {
dmgList []string dmgList []string
errList []string errList []string
numIdxUpd int
numNew int
numUpd int
verbose bool verbose bool
logger *log.Logger logger *log.Logger
logVerbose bool logVerbose bool
progress Progress progress Progress
total int
termWidth int termWidth int
fps *util.RateCalc fps *util.RateCalc
bps *util.RateCalc bps *util.RateCalc
@ -82,34 +80,26 @@ func (m *Main) log(text string) {
func (m *Main) logStatus(stat chkbit.Status, message string) bool { func (m *Main) logStatus(stat chkbit.Status, message string) bool {
if stat == chkbit.STATUS_UPDATE_INDEX { if stat == chkbit.STATUS_UPDATE_INDEX {
m.numIdxUpd++ return false
} else { }
if stat == chkbit.STATUS_ERR_DMG {
m.total++
m.dmgList = append(m.dmgList, message)
} else if stat == chkbit.STATUS_PANIC {
m.errList = append(m.errList, message)
} else if stat == chkbit.STATUS_OK || stat == chkbit.STATUS_UPDATE || stat == chkbit.STATUS_NEW || stat == chkbit.STATUS_UP_WARN_OLD {
m.total++
if stat == chkbit.STATUS_UPDATE || stat == chkbit.STATUS_UP_WARN_OLD {
m.numUpd++
} else if stat == chkbit.STATUS_NEW {
m.numNew++
}
}
if m.logVerbose || stat != chkbit.STATUS_OK && stat != chkbit.STATUS_IGNORE { if stat == chkbit.STATUS_ERR_DMG {
m.log(stat.String() + " " + message) m.dmgList = append(m.dmgList, message)
} } else if stat == chkbit.STATUS_PANIC {
m.errList = append(m.errList, message)
}
if m.verbose || !stat.IsVerbose() { if m.logVerbose || !stat.IsVerbose() {
col := "" m.log(stat.String() + " " + message)
if stat.IsErrorOrWarning() { }
col = termAlertFG
} if m.verbose || !stat.IsVerbose() {
lterm.Printline(col, stat.String(), " ", message, lterm.Reset) col := ""
return true if stat.IsErrorOrWarning() {
col = termAlertFG
} }
lterm.Printline(col, stat.String(), " ", message, lterm.Reset)
return true
} }
return false return false
} }
@ -130,7 +120,7 @@ func (m *Main) showStatus(context *chkbit.Context) {
if m.progress == Fancy { if m.progress == Fancy {
lterm.Write(termBG, termFG1, stat, lterm.ClearLine(0), lterm.Reset, "\r") lterm.Write(termBG, termFG1, stat, lterm.ClearLine(0), lterm.Reset, "\r")
} else { } else {
fmt.Print(m.total, "\r") fmt.Print(context.NumTotal, "\r")
} }
} }
case perf := <-context.PerfQueue: case perf := <-context.PerfQueue:
@ -147,7 +137,7 @@ func (m *Main) showStatus(context *chkbit.Context) {
stat = "RO" stat = "RO"
} }
stat = fmt.Sprintf("[%s:%d] %5d files $ %s %-13s $ %s %-13s", stat = fmt.Sprintf("[%s:%d] %5d files $ %s %-13s $ %s %-13s",
stat, context.NumWorkers, m.total, stat, context.NumWorkers, context.NumTotal,
util.Sparkline(m.fps.Stats), statF, util.Sparkline(m.fps.Stats), statF,
util.Sparkline(m.bps.Stats), statB) util.Sparkline(m.bps.Stats), statB)
stat = util.LeftTruncate(stat, m.termWidth-1) stat = util.LeftTruncate(stat, m.termWidth-1)
@ -155,7 +145,7 @@ func (m *Main) showStatus(context *chkbit.Context) {
stat = strings.Replace(stat, "$", termSepFG+termSep+termFG3, 1) stat = strings.Replace(stat, "$", termSepFG+termSep+termFG3, 1)
lterm.Write(termBG, termFG1, stat, lterm.ClearLine(0), lterm.Reset, "\r") lterm.Write(termBG, termFG1, stat, lterm.ClearLine(0), lterm.Reset, "\r")
} else if m.progress == Plain { } else if m.progress == Plain {
fmt.Print(m.total, "\r") fmt.Print(context.NumTotal, "\r")
} }
} }
} }
@ -176,7 +166,9 @@ func (m *Main) process() *chkbit.Context {
context.ForceUpdateDmg = cli.Force context.ForceUpdateDmg = cli.Force
context.UpdateIndex = cli.Update context.UpdateIndex = cli.Update
context.ShowIgnoredOnly = cli.ShowIgnoredOnly context.ShowIgnoredOnly = cli.ShowIgnoredOnly
context.ShowMissing = cli.ShowMissing
context.SkipSymlinks = cli.SkipSymlinks context.SkipSymlinks = cli.SkipSymlinks
context.TrackDirectories = !cli.NoDirInIndex
var wg sync.WaitGroup var wg sync.WaitGroup
wg.Add(1) wg.Add(1)
@ -216,11 +208,11 @@ func (m *Main) printResult(context *chkbit.Context) {
if !context.UpdateIndex { if !context.UpdateIndex {
mode = " in readonly mode" mode = " in readonly mode"
} }
status := fmt.Sprintf("Processed %s%s.", util.LangNum1MutateSuffix(m.total, "file"), mode) status := fmt.Sprintf("Processed %s%s.", util.LangNum1MutateSuffix(context.NumTotal, "file"), mode)
cprint(termOKFG, status) cprint(termOKFG, status)
m.log(status) m.log(status)
if m.progress == Fancy && m.total > 0 { if m.progress == Fancy && context.NumTotal > 0 {
elapsed := time.Since(m.fps.Start) elapsed := time.Since(m.fps.Start)
elapsedS := elapsed.Seconds() elapsedS := elapsed.Seconds()
fmt.Println("-", elapsed.Truncate(time.Second), "elapsed") fmt.Println("-", elapsed.Truncate(time.Second), "elapsed")
@ -228,17 +220,26 @@ func (m *Main) printResult(context *chkbit.Context) {
fmt.Printf("- %.2f MB/second\n", (float64(m.bps.Total)+float64(m.bps.Current))/float64(sizeMB)/elapsedS) fmt.Printf("- %.2f MB/second\n", (float64(m.bps.Total)+float64(m.bps.Current))/float64(sizeMB)/elapsedS)
} }
del := ""
if context.UpdateIndex { if context.UpdateIndex {
if m.numIdxUpd > 0 { if context.NumIdxUpd > 0 {
cprint(termOKFG, fmt.Sprintf("- %s updated\n- %s added\n- %s updated", if context.NumDel > 0 {
util.LangNum1Choice(m.numIdxUpd, "directory was", "directories were"), del = fmt.Sprintf("\n- %s been removed", util.LangNum1Choice(context.NumDel, "file/directory has", "files/directories have"))
util.LangNum1Choice(m.numNew, "file hash was", "file hashes were"), }
util.LangNum1Choice(m.numUpd, "file hash was", "file hashes were"))) cprint(termOKFG, fmt.Sprintf("- %s updated\n- %s added\n- %s updated%s",
util.LangNum1Choice(context.NumIdxUpd, "directory was", "directories were"),
util.LangNum1Choice(context.NumNew, "file hash was", "file hashes were"),
util.LangNum1Choice(context.NumUpd, "file hash was", "file hashes were"),
del))
} }
} else if m.numNew+m.numUpd > 0 { } else if context.NumNew+context.NumUpd+context.NumDel > 0 {
cprint(termAlertFG, fmt.Sprintf("No changes were made (specify -u to update):\n- %s would have been added and\n- %s would have been updated.", if context.NumDel > 0 {
util.LangNum1MutateSuffix(m.numNew, "file"), del = fmt.Sprintf("\n- %s would have been removed", util.LangNum1Choice(context.NumDel, "file/directory", "files/directories"))
util.LangNum1MutateSuffix(m.numUpd, "file"))) }
cprint(termAlertFG, fmt.Sprintf("No changes were made (specify -u to update):\n- %s would have been added\n- %s would have been updated%s",
util.LangNum1MutateSuffix(context.NumNew, "file"),
util.LangNum1MutateSuffix(context.NumUpd, "file"),
del))
} }
} }

View File

@ -8,18 +8,26 @@ import (
) )
type Context struct { type Context struct {
NumWorkers int NumWorkers int
ForceUpdateDmg bool UpdateIndex bool
UpdateIndex bool ShowIgnoredOnly bool
ShowIgnoredOnly bool ShowMissing bool
HashAlgo string ForceUpdateDmg bool
SkipSymlinks bool HashAlgo string
IndexFilename string TrackDirectories bool
IgnoreFilename string SkipSymlinks bool
WorkQueue chan *WorkItem IndexFilename string
LogQueue chan *LogEvent IgnoreFilename string
PerfQueue chan *PerfEvent WorkQueue chan *WorkItem
wg sync.WaitGroup LogQueue chan *LogEvent
PerfQueue chan *PerfEvent
wg sync.WaitGroup
NumTotal int
NumIdxUpd int
NumNew int
NumUpd int
NumDel int
} }
func NewContext(numWorkers int, hashAlgo string, indexFilename string, ignoreFilename string) (*Context, error) { func NewContext(numWorkers int, hashAlgo string, indexFilename string, ignoreFilename string) (*Context, error) {
@ -44,6 +52,29 @@ func NewContext(numWorkers int, hashAlgo string, indexFilename string, ignoreFil
} }
func (context *Context) log(stat Status, message string) { func (context *Context) log(stat Status, message string) {
switch stat {
case STATUS_ERR_DMG:
context.NumTotal++
case STATUS_UPDATE_INDEX:
context.NumIdxUpd++
case STATUS_UP_WARN_OLD:
context.NumTotal++
context.NumUpd++
case STATUS_UPDATE:
context.NumTotal++
context.NumUpd++
case STATUS_NEW:
context.NumTotal++
context.NumNew++
case STATUS_OK:
context.NumTotal++
case STATUS_MISSING:
context.NumDel++
//case STATUS_PANIC:
//case STATUS_ERR_IDX:
//case STATUS_IGNORE:
}
context.LogQueue <- &LogEvent{stat, message} context.LogQueue <- &LogEvent{stat, message}
} }
@ -59,8 +90,8 @@ func (context *Context) perfMonBytes(numBytes int64) {
context.PerfQueue <- &PerfEvent{0, numBytes} context.PerfQueue <- &PerfEvent{0, numBytes}
} }
func (context *Context) addWork(path string, filesToIndex []string, ignore *Ignore) { func (context *Context) addWork(path string, filesToIndex []string, dirList []string, ignore *Ignore) {
context.WorkQueue <- &WorkItem{path, filesToIndex, ignore} context.WorkQueue <- &WorkItem{path, filesToIndex, dirList, ignore}
} }
func (context *Context) endWork() { func (context *Context) endWork() {
@ -72,6 +103,12 @@ func (context *Context) isChkbitFile(name string) bool {
} }
func (context *Context) Start(pathList []string) { func (context *Context) Start(pathList []string) {
context.NumTotal = 0
context.NumIdxUpd = 0
context.NumNew = 0
context.NumUpd = 0
context.NumDel = 0
var wg sync.WaitGroup var wg sync.WaitGroup
wg.Add(context.NumWorkers) wg.Add(context.NumWorkers)
for i := 0; i < context.NumWorkers; i++ { for i := 0; i < context.NumWorkers; i++ {
@ -141,7 +178,7 @@ func (context *Context) scanDir(root string, parentIgnore *Ignore) {
} }
} }
context.addWork(root, filesToIndex, ignore) context.addWork(root, filesToIndex, dirList, ignore)
for _, name := range dirList { for _, name := range dirList {
context.scanDir(filepath.Join(root, name), ignore) context.scanDir(filepath.Join(root, name), ignore)

View File

@ -5,6 +5,8 @@ import (
"errors" "errors"
"os" "os"
"path/filepath" "path/filepath"
"reflect"
"slices"
) )
const VERSION = 2 // index version const VERSION = 2 // index version
@ -24,6 +26,8 @@ type indexFile struct {
// IdxRaw -> map[string]idxInfo // IdxRaw -> map[string]idxInfo
IdxRaw json.RawMessage `json:"idx"` IdxRaw json.RawMessage `json:"idx"`
IdxHash string `json:"idx_hash"` IdxHash string `json:"idx_hash"`
// 2024-08 optional, list of subdirectories
Dir []string `json:"dirlist,omitempty"`
} }
type idxInfo1 struct { type idxInfo1 struct {
@ -36,23 +40,28 @@ type indexFile1 struct {
} }
type Index struct { type Index struct {
context *Context context *Context
path string path string
files []string files []string
cur map[string]idxInfo cur map[string]idxInfo
new map[string]idxInfo new map[string]idxInfo
modified bool curDirList []string
readonly bool newDirList []string
modified bool
readonly bool
} }
func NewIndex(context *Context, path string, files []string, readonly bool) *Index { func newIndex(context *Context, path string, files []string, dirList []string, readonly bool) *Index {
slices.Sort(dirList)
return &Index{ return &Index{
context: context, context: context,
path: path, path: path,
files: files, files: files,
cur: make(map[string]idxInfo), cur: make(map[string]idxInfo),
new: make(map[string]idxInfo), new: make(map[string]idxInfo),
readonly: readonly, curDirList: make([]string, 0),
newDirList: dirList,
readonly: readonly,
} }
} }
@ -72,6 +81,10 @@ func (i *Index) logFile(stat Status, name string) {
i.context.log(stat, filepath.Join(i.path, name)) i.context.log(stat, filepath.Join(i.path, name))
} }
func (i *Index) logDir(stat Status, name string) {
i.context.log(stat, filepath.Join(i.path, name)+"/")
}
func (i *Index) calcHashes(ignore *Ignore) { func (i *Index) calcHashes(ignore *Ignore) {
for _, name := range i.files { for _, name := range i.files {
if ignore != nil && ignore.shouldIgnore(name) { if ignore != nil && ignore.shouldIgnore(name) {
@ -153,6 +166,26 @@ func (i *Index) checkFix(forceUpdateDmg bool) {
} }
} }
} }
if i.context.ShowMissing {
for name := range i.cur {
if _, ok := i.new[name]; !ok {
i.logFile(STATUS_MISSING, name)
i.setMod(true)
}
}
// dirs
m := make(map[string]bool)
for _, n := range i.newDirList {
m[n] = true
}
for _, name := range i.curDirList {
if !m[name] {
i.logDir(STATUS_MISSING, name+"/")
i.setMod(true)
}
}
}
} }
func (i *Index) calcFile(name string, a string) (*idxInfo, error) { func (i *Index) calcFile(name string, a string) (*idxInfo, error) {
@ -186,6 +219,9 @@ func (i *Index) save() (bool, error) {
IdxRaw: text, IdxRaw: text,
IdxHash: hashMd5(text), IdxHash: hashMd5(text),
} }
if i.context.TrackDirectories {
data.Dir = i.newDirList
}
file, err := json.Marshal(data) file, err := json.Marshal(data)
if err != nil { if err != nil {
@ -205,6 +241,8 @@ func (i *Index) save() (bool, error) {
func (i *Index) load() error { func (i *Index) load() error {
if _, err := os.Stat(i.getIndexFilepath()); err != nil { if _, err := os.Stat(i.getIndexFilepath()); err != nil {
if os.IsNotExist(err) { if os.IsNotExist(err) {
// todo
i.setMod(true)
return nil return nil
} }
return err return err
@ -248,5 +286,12 @@ func (i *Index) load() error {
} }
} }
} }
if data.Dir != nil {
slices.Sort(data.Dir)
i.curDirList = data.Dir
if i.context.TrackDirectories && !reflect.DeepEqual(i.curDirList, i.newDirList) {
i.setMod(true)
}
}
return nil return nil
} }

View File

@ -11,6 +11,8 @@ import (
"time" "time"
) )
// perform integration test using the compiled binary
var testDir = "/tmp/chkbit" var testDir = "/tmp/chkbit"
func getCmd() string { func getCmd() string {
@ -25,6 +27,12 @@ func checkOut(t *testing.T, sout string, expected string) {
} }
} }
func checkNotOut(t *testing.T, sout string, notExpected string) {
if strings.Contains(sout, notExpected) {
t.Errorf("Did not expect '%s' in output, got '%s'\n", notExpected, sout)
}
}
// misc files // misc files
var ( var (
@ -128,14 +136,59 @@ func TestRoot(t *testing.T) {
tool := getCmd() tool := getCmd()
root := filepath.Join(testDir, "root") root := filepath.Join(testDir, "root")
cmd := exec.Command(tool, "-u", root)
// step1: update index
cmd := exec.Command(tool, "-um", root)
out, err := cmd.Output() out, err := cmd.Output()
if err != nil { if err != nil {
t.Fatalf("cmd.Output() failed with '%s'\n", err) t.Fatalf("step1 failed with '%s'\n", err)
} }
sout := string(out) sout := string(out)
checkOut(t, sout, "60 directories were updated") checkOut(t, sout, "67 directories were updated")
checkOut(t, sout, "300 file hashes were added") checkOut(t, sout, "300 file hashes were added")
checkNotOut(t, sout, "removed")
// step2: delete files, check for missing
os.RemoveAll(filepath.Join(root, "thing/change"))
os.Remove(filepath.Join(root, "time/hour/minute/body-information.csv"))
cmd = exec.Command(tool, "-m", root)
out, err = cmd.Output()
if err != nil {
t.Fatalf("step2 failed with '%s'\n", err)
}
sout = string(out)
checkOut(t, sout, "del /tmp/chkbit/root/thing/change/")
checkOut(t, sout, "2 files/directories would have been removed")
// step2a: do not report missing without -m
cmd = exec.Command(tool, root)
out, err = cmd.Output()
if err != nil {
t.Fatalf("step2a failed with '%s'\n", err)
}
sout = string(out)
checkNotOut(t, sout, "del ")
checkNotOut(t, sout, "removed")
// step3: check for missing and update
cmd = exec.Command(tool, "-um", root)
out, err = cmd.Output()
if err != nil {
t.Fatalf("step3 failed with '%s'\n", err)
}
sout = string(out)
checkOut(t, sout, "del /tmp/chkbit/root/thing/change/")
checkOut(t, sout, "2 files/directories have been removed")
// step4: check again
cmd = exec.Command(tool, "-u", root)
out, err = cmd.Output()
if err != nil {
t.Fatalf("step4 failed with '%s'\n", err)
}
sout = string(out)
checkOut(t, sout, "Processed 289 files")
} }
func TestDMG(t *testing.T) { func TestDMG(t *testing.T) {

View File

@ -7,5 +7,5 @@ cd $script_dir/..
# prep # prep
$script_dir/build $script_dir/build
go test -v ./cmd/chkbit/util go test -v ./cmd/chkbit/util -count=1
go test -v ./scripts go test -v ./scripts -count=1

View File

@ -12,6 +12,7 @@ const (
STATUS_NEW Status = "new" STATUS_NEW Status = "new"
STATUS_OK Status = "ok " STATUS_OK Status = "ok "
STATUS_IGNORE Status = "ign" STATUS_IGNORE Status = "ign"
STATUS_MISSING Status = "del"
) )
func (s Status) String() string { func (s Status) String() string {

View File

@ -3,6 +3,7 @@ package chkbit
type WorkItem struct { type WorkItem struct {
path string path string
filesToIndex []string filesToIndex []string
dirList []string
ignore *Ignore ignore *Ignore
} }
@ -13,7 +14,7 @@ func (context *Context) runWorker(id int) {
break break
} }
index := NewIndex(context, item.path, item.filesToIndex, !context.UpdateIndex) index := newIndex(context, item.path, item.filesToIndex, item.dirList, !context.UpdateIndex)
err := index.load() err := index.load()
if err != nil { if err != nil {
context.log(STATUS_PANIC, index.getIndexFilepath()+": "+err.Error()) context.log(STATUS_PANIC, index.getIndexFilepath()+": "+err.Error())