Compare commits
71 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
3f5eb8a0ab | ||
|
955b370815 | ||
|
ad04f72da6 | ||
|
0e9391d087 | ||
|
87e15913a5 | ||
|
929fb39782 | ||
|
7f9a2e2efc | ||
|
6168723f5b | ||
|
67e7b8c904 | ||
|
0dc3390b7f | ||
|
104e07b66b | ||
|
52677d2b5d | ||
|
45ab4501ee | ||
|
8ee84344e8 | ||
|
7608b56ea6 | ||
|
8e9e37094d | ||
|
9af31192c2 | ||
|
c73646d2e1 | ||
|
8ec9ea9629 | ||
|
c27c259282 | ||
|
74f043b3ca | ||
|
4ea0a57e0a | ||
|
6d82ff93b1 | ||
|
a043402114 | ||
|
a8e52626ef | ||
|
6b4a1fd46a | ||
|
58aa762e5c | ||
|
7cf1577be7 | ||
|
53b1a12301 | ||
|
d192fa0175 | ||
|
5e66b772d2 | ||
|
8c871b1319 | ||
|
8cfdf0cfc1 | ||
|
313347dd61 | ||
|
4e3c840eb0 | ||
|
4bd293f024 | ||
|
5ed89d8b1a | ||
|
bfb73acc70 | ||
|
49912e9c9c | ||
|
6405beaeba | ||
|
18bf67317e | ||
|
e4efbc290c | ||
|
a09f0b0ad6 | ||
|
2fc483e6b4 | ||
|
539c277bd8 | ||
|
0f4ebfa4fa | ||
|
13b0067ac8 | ||
|
08c6d436bf | ||
|
a319333fdb | ||
|
4d1ca47777 | ||
|
a6e1bb9b4c | ||
|
e5f737b09d | ||
|
1b8a582e34 | ||
|
1f94944f87 | ||
|
9521bdea00 | ||
|
a8faff93e1 | ||
|
dbdf7cf99b | ||
|
f0e2d61fc3 | ||
|
2cf550d6a3 | ||
|
af81b67d58 | ||
|
11e94f663c | ||
|
24f76c0678 | ||
|
b6faaf94fa | ||
|
fc46cb7c53 | ||
|
0afdaddd0a | ||
|
a9b57b5814 | ||
|
37104d7b78 | ||
|
3b3770d46a | ||
|
f2c37cae26 | ||
|
1db45dbd7a | ||
|
970aad33df |
13
.editorconfig
Normal file
13
.editorconfig
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
root = true
|
||||||
|
|
||||||
|
[*]
|
||||||
|
trim_trailing_whitespace = true
|
||||||
|
insert_final_newline = true
|
||||||
|
|
||||||
|
[*.{py,pyx,pxd,pxi,yml,h}]
|
||||||
|
indent_size = 4
|
||||||
|
indent_style = space
|
||||||
|
|
||||||
|
[ext/*.{c,cpp,h}]
|
||||||
|
indent_size = 4
|
||||||
|
indent_style = tab
|
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
.bitrot.db
|
||||||
|
.bitrot.sha512
|
1
Ånnóying 𝚏Ⅰlęnąme by Łukasz
Normal file
1
Ånnóying 𝚏Ⅰlęnąme by Łukasz
Normal file
@ -0,0 +1 @@
|
|||||||
|
This is a form of testing strange encodings.
|
21
LICENSE
Normal file
21
LICENSE
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
The MIT License (MIT)
|
||||||
|
|
||||||
|
Copyright (c) 2013 Łukasz Langa
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
197
README.rst
197
README.rst
@ -2,8 +2,8 @@
|
|||||||
bitrot
|
bitrot
|
||||||
======
|
======
|
||||||
|
|
||||||
Detects bit rotten files on the hard drive to save your precious photo and
|
Detects bit rotten files on the hard drive to save your precious photo
|
||||||
music collection from slow decay.
|
and music collection from slow decay.
|
||||||
|
|
||||||
Usage
|
Usage
|
||||||
-----
|
-----
|
||||||
@ -12,35 +12,174 @@ Go to the desired directory and simply invoke::
|
|||||||
|
|
||||||
$ bitrot
|
$ bitrot
|
||||||
|
|
||||||
This will start digging through your directory structure recursively indexing
|
This will start digging through your directory structure recursively
|
||||||
all files found. The index is stored in a ``.bitrot.db`` file which is a SQLite
|
indexing all files found. The index is stored in a ``.bitrot.db`` file
|
||||||
3 database.
|
which is a SQLite 3 database.
|
||||||
|
|
||||||
Next time you run ``bitrot`` it will add new files and update the index for
|
Next time you run ``bitrot`` it will add new files and update the index
|
||||||
files with a changed modification date. Most importantly however, it will
|
for files with a changed modification date. Most importantly however, it
|
||||||
report all errors, e.g. files that changed on the hard drive but still have the
|
will report all errors, e.g. files that changed on the hard drive but
|
||||||
same modification date.
|
still have the same modification date.
|
||||||
|
|
||||||
All paths stored in ``.bitrot.db`` are relative so it's safe to rescan a folder
|
All paths stored in ``.bitrot.db`` are relative so it's safe to rescan
|
||||||
after moving it to another drive.
|
a folder after moving it to another drive. Just remember to move it in
|
||||||
|
a way that doesn't touch modification dates. Otherwise the checksum
|
||||||
|
database is useless.
|
||||||
|
|
||||||
Performance
|
Performance
|
||||||
-----------
|
-----------
|
||||||
|
|
||||||
Obviously depends on how fast the underlying drive is. No rigorous performance
|
Obviously depends on how fast the underlying drive is. Historically
|
||||||
tests have been done. For informational purposes, on my typical 5400 RPM laptop
|
the script was single-threaded because back in 2013 checksum
|
||||||
hard drive scanning a 60+ GB music library takes around 15 minutes. On an OCZ
|
calculations on a single core still outran typical drives, including
|
||||||
Vertex 3 SSD drive ``bitrot`` is able to scan a 100 GB Aperture library in
|
the mobile SSDs of the day. In 2020 this is no longer the case so the
|
||||||
under 10 minutes. Both tests on HFS+.
|
script now uses a process pool to calculate SHA1 hashes and perform
|
||||||
|
`stat()` calls.
|
||||||
|
|
||||||
|
No rigorous performance tests have been done. Scanning a ~1000 file
|
||||||
|
directory totalling ~5 GB takes 2.2s on a 2018 MacBook Pro 15" with
|
||||||
|
a AP0512M SSD. Back in 2013, that same feat on a 2015 MacBook Air with
|
||||||
|
a SM0256G SSD took over 20 seconds.
|
||||||
|
|
||||||
|
On that same 2018 MacBook Pro 15", scanning a 60+ GB music library takes
|
||||||
|
24 seconds. Back in 2013, with a typical 5400 RPM laptop hard drive
|
||||||
|
it took around 15 minutes. How times have changed!
|
||||||
|
|
||||||
|
Tests
|
||||||
|
-----
|
||||||
|
|
||||||
|
There's a simple but comprehensive test scenario using
|
||||||
|
`pytest <https://pypi.org/p/pytest>`_ and
|
||||||
|
`pytest-order <https://pypi.org/p/pytest-order>`_.
|
||||||
|
|
||||||
|
Install::
|
||||||
|
|
||||||
|
$ python3 -m venv .venv
|
||||||
|
$ . .venv/bin/activate
|
||||||
|
(.venv)$ pip install -e .[test]
|
||||||
|
|
||||||
|
Run::
|
||||||
|
|
||||||
|
(.venv)$ pytest -x
|
||||||
|
==================== test session starts ====================
|
||||||
|
platform darwin -- Python 3.10.12, pytest-7.4.0, pluggy-1.2.0
|
||||||
|
rootdir: /Users/ambv/Documents/Python/bitrot
|
||||||
|
plugins: order-1.1.0
|
||||||
|
collected 12 items
|
||||||
|
|
||||||
|
tests/test_bitrot.py ............ [100%]
|
||||||
|
|
||||||
|
==================== 12 passed in 15.05s ====================
|
||||||
|
|
||||||
Change Log
|
Change Log
|
||||||
----------
|
----------
|
||||||
|
|
||||||
|
1.0.1
|
||||||
|
~~~~~
|
||||||
|
|
||||||
|
* officially remove Python 2 support that was broken since 1.0.0
|
||||||
|
anyway; now the package works with Python 3.8+ because of a few
|
||||||
|
features
|
||||||
|
|
||||||
|
1.0.0
|
||||||
|
~~~~~
|
||||||
|
|
||||||
|
* significantly sped up execution on solid state drives by using
|
||||||
|
a process pool executor to calculate SHA1 hashes and perform `stat()`
|
||||||
|
calls; use `-w1` if your runs on slow magnetic drives were
|
||||||
|
negatively affected by this change
|
||||||
|
|
||||||
|
* sped up execution by pre-loading all SQLite-stored hashes to memory
|
||||||
|
and doing comparisons using Python sets
|
||||||
|
|
||||||
|
* all UTF-8 filenames are now normalized to NFKD in the database to
|
||||||
|
enable cross-operating system checks
|
||||||
|
|
||||||
|
* the SQLite database is now vacuumed to minimize its size
|
||||||
|
|
||||||
|
* bugfix: additional Python 3 fixes when Unicode names were encountered
|
||||||
|
|
||||||
|
0.9.2
|
||||||
|
~~~~~
|
||||||
|
|
||||||
|
* bugfix: one place in the code incorrectly hardcoded UTF-8 as the
|
||||||
|
filesystem encoding
|
||||||
|
|
||||||
|
0.9.1
|
||||||
|
~~~~~
|
||||||
|
|
||||||
|
* bugfix: print the path that failed to decode with FSENCODING
|
||||||
|
|
||||||
|
* bugfix: when using -q, don't hide warnings about files that can't be
|
||||||
|
statted or read
|
||||||
|
|
||||||
|
* bugfix: -s is no longer broken on Python 3
|
||||||
|
|
||||||
|
0.9.0
|
||||||
|
~~~~~
|
||||||
|
|
||||||
|
* bugfix: bitrot.db checksum checking messages now obey --quiet
|
||||||
|
|
||||||
|
* Python 3 compatibility
|
||||||
|
|
||||||
|
0.8.0
|
||||||
|
~~~~~
|
||||||
|
|
||||||
|
* bitrot now keeps track of its own database's bitrot by storing
|
||||||
|
a checksum of .bitrot.db in .bitrot.sha512
|
||||||
|
|
||||||
|
* bugfix: now properly uses the filesystem encoding to decode file names
|
||||||
|
for use with the .bitrotdb database. Report and original patch by
|
||||||
|
pallinger.
|
||||||
|
|
||||||
|
0.7.1
|
||||||
|
~~~~~
|
||||||
|
|
||||||
|
* bugfix: SHA1 computation now works correctly on Windows; previously
|
||||||
|
opened files in text-mode. This fix will change hashes of files
|
||||||
|
containing some specific bytes like 0x1A.
|
||||||
|
|
||||||
|
0.7.0
|
||||||
|
~~~~~
|
||||||
|
|
||||||
|
* when a file changes or is renamed, the timestamp of the last check is
|
||||||
|
updated, too
|
||||||
|
|
||||||
|
* bugfix: files that disappeared during the run are now properly ignored
|
||||||
|
|
||||||
|
* bugfix: files that are locked or with otherwise denied access are
|
||||||
|
skipped. If they were read before, they will be considered "missing"
|
||||||
|
in the report.
|
||||||
|
|
||||||
|
* bugfix: if there are multiple files with the same content in the
|
||||||
|
scanned directory tree, renames are now handled properly for them
|
||||||
|
|
||||||
|
* refactored some horrible code to be a little less horrible
|
||||||
|
|
||||||
|
0.6.0
|
||||||
|
~~~~~
|
||||||
|
|
||||||
|
* more control over performance with ``--commit-interval`` and
|
||||||
|
``--chunk-size`` command-line arguments
|
||||||
|
|
||||||
|
* bugfix: symbolic links are now properly skipped (or can be followed if
|
||||||
|
``--follow-links`` is passed)
|
||||||
|
|
||||||
|
* bugfix: files that cannot be opened are now gracefully skipped
|
||||||
|
|
||||||
|
* bugfix: fixed a rare division by zero when run in an empty directory
|
||||||
|
|
||||||
|
0.5.1
|
||||||
|
~~~~~
|
||||||
|
|
||||||
|
* bugfix: warn about test mode only in test mode
|
||||||
|
|
||||||
0.5.0
|
0.5.0
|
||||||
~~~~~
|
~~~~~
|
||||||
|
|
||||||
* ``--test`` command-line argument for testing the state without updating the
|
* ``--test`` command-line argument for testing the state without
|
||||||
database on disk (works for testing databases you don't have write access to)
|
updating the database on disk (works for testing databases you don't
|
||||||
|
have write access to)
|
||||||
|
|
||||||
* size of the data read is reported upon finish
|
* size of the data read is reported upon finish
|
||||||
|
|
||||||
@ -51,19 +190,22 @@ Change Log
|
|||||||
|
|
||||||
* renames are now reported as such
|
* renames are now reported as such
|
||||||
|
|
||||||
* all non-regular files (e.g. symbolic links, pipes, sockets) are now skipped
|
* all non-regular files (e.g. symbolic links, pipes, sockets) are now
|
||||||
|
skipped
|
||||||
|
|
||||||
* progress presented in percentage
|
* progress presented in percentage
|
||||||
|
|
||||||
0.3.0
|
0.3.0
|
||||||
~~~~~
|
~~~~~
|
||||||
|
|
||||||
* ``--sum`` command-line argument for easy comparison of multiple databases
|
* ``--sum`` command-line argument for easy comparison of multiple
|
||||||
|
databases
|
||||||
|
|
||||||
0.2.1
|
0.2.1
|
||||||
~~~~~
|
~~~~~
|
||||||
|
|
||||||
* fixed regression from 0.2.0 where new files caused a ``KeyError`` exception
|
* fixed regression from 0.2.0 where new files caused a ``KeyError``
|
||||||
|
exception
|
||||||
|
|
||||||
0.2.0
|
0.2.0
|
||||||
~~~~~
|
~~~~~
|
||||||
@ -80,4 +222,15 @@ Change Log
|
|||||||
Authors
|
Authors
|
||||||
-------
|
-------
|
||||||
|
|
||||||
Glued together by `Łukasz Langa <mailto:lukasz@langa.pl>`_.
|
Glued together by `Łukasz Langa <mailto:lukasz@langa.pl>`_. Multiple
|
||||||
|
improvements by
|
||||||
|
`Ben Shepherd <mailto:bjashepherd@gmail.com>`_,
|
||||||
|
`Jean-Louis Fuchs <mailto:ganwell@fangorn.ch>`_,
|
||||||
|
`Marcus Linderoth <marcus@thingsquare.com>`_,
|
||||||
|
`p1r473 <mailto:subwayjared@gmail.com>`_,
|
||||||
|
`Peter Hofmann <mailto:scm@uninformativ.de>`_,
|
||||||
|
`Phil Lundrigan <mailto:philipbl@cs.utah.edu>`_,
|
||||||
|
`Reid Williams <rwilliams@ideo.com>`_,
|
||||||
|
`Stan Senotrusov <senotrusov@gmail.com>`_,
|
||||||
|
`Yang Zhang <mailto:yaaang@gmail.com>`_, and
|
||||||
|
`Zhuoyun Wei <wzyboy@wzyboy.org>`_.
|
||||||
|
30
bin/bitrot
30
bin/bitrot
@ -1,30 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
# Copyright (C) 2013 by Łukasz Langa
|
|
||||||
#
|
|
||||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
# of this software and associated documentation files (the "Software"), to deal
|
|
||||||
# in the Software without restriction, including without limitation the rights
|
|
||||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
# copies of the Software, and to permit persons to whom the Software is
|
|
||||||
# furnished to do so, subject to the following conditions:
|
|
||||||
|
|
||||||
# The above copyright notice and this permission notice shall be included in
|
|
||||||
# all copies or substantial portions of the Software.
|
|
||||||
|
|
||||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
||||||
# THE SOFTWARE.
|
|
||||||
|
|
||||||
from __future__ import absolute_import
|
|
||||||
from __future__ import division
|
|
||||||
from __future__ import print_function
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from bitrot import run_from_command_line
|
|
||||||
run_from_command_line()
|
|
34
pyproject.toml
Normal file
34
pyproject.toml
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
[build-system]
|
||||||
|
requires = ["setuptools", "setuptools-scm[toml]"]
|
||||||
|
build-backend = "setuptools.build_meta"
|
||||||
|
|
||||||
|
[project]
|
||||||
|
name = "bitrot"
|
||||||
|
authors = [
|
||||||
|
{name = "Łukasz Langa", email = "lukasz@langa.pl"},
|
||||||
|
]
|
||||||
|
description = "Detects bit rotten files on the hard drive to save your precious photo and music collection from slow decay."
|
||||||
|
readme = "README.rst"
|
||||||
|
requires-python = ">=3.8"
|
||||||
|
keywords = ["file", "checksum", "database"]
|
||||||
|
license = {text = "MIT"}
|
||||||
|
classifiers = [
|
||||||
|
"Development Status :: 5 - Production/Stable",
|
||||||
|
"Natural Language :: English",
|
||||||
|
"Programming Language :: Python :: 3",
|
||||||
|
"Topic :: System :: Filesystems",
|
||||||
|
"Topic :: System :: Monitoring",
|
||||||
|
"Topic :: Software Development :: Libraries :: Python Modules",
|
||||||
|
|
||||||
|
]
|
||||||
|
dependencies = []
|
||||||
|
dynamic = ["version"]
|
||||||
|
|
||||||
|
[project.optional-dependencies]
|
||||||
|
test = ["pytest", "pytest-order"]
|
||||||
|
|
||||||
|
[project.scripts]
|
||||||
|
bitrot = "bitrot:run_from_command_line"
|
||||||
|
|
||||||
|
[tool.setuptools_scm]
|
||||||
|
tag_regex = "^(?P<version>v\\d+(?:\\.\\d+){0,2}[^\\+]*)(?:\\+.*)?$"
|
76
setup.py
76
setup.py
@ -1,76 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
# Copyright (C) 2013 by Łukasz Langa
|
|
||||||
#
|
|
||||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
# of this software and associated documentation files (the "Software"), to deal
|
|
||||||
# in the Software without restriction, including without limitation the rights
|
|
||||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
# copies of the Software, and to permit persons to whom the Software is
|
|
||||||
# furnished to do so, subject to the following conditions:
|
|
||||||
|
|
||||||
# The above copyright notice and this permission notice shall be included in
|
|
||||||
# all copies or substantial portions of the Software.
|
|
||||||
|
|
||||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
||||||
# THE SOFTWARE.
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
from setuptools import setup, find_packages
|
|
||||||
|
|
||||||
reload(sys)
|
|
||||||
sys.setdefaultencoding('utf8')
|
|
||||||
|
|
||||||
current_dir = os.path.abspath(os.path.dirname(__file__))
|
|
||||||
ld_file = open(os.path.join(current_dir, 'README.rst'))
|
|
||||||
try:
|
|
||||||
long_description = ld_file.read()
|
|
||||||
finally:
|
|
||||||
ld_file.close()
|
|
||||||
# We let it die a horrible tracebacking death if reading the file fails.
|
|
||||||
# We couldn't sensibly recover anyway: we need the long description.
|
|
||||||
|
|
||||||
sys.path.insert(0, current_dir + os.sep + 'src')
|
|
||||||
from bitrot import VERSION
|
|
||||||
release = ".".join(str(num) for num in VERSION)
|
|
||||||
|
|
||||||
setup(
|
|
||||||
name = 'bitrot',
|
|
||||||
version = release,
|
|
||||||
author = 'Łukasz Langa',
|
|
||||||
author_email = 'lukasz@langa.pl',
|
|
||||||
description = ("Detects bit rotten files on the hard drive to save your "
|
|
||||||
"precious photo and music collection from slow decay."),
|
|
||||||
long_description = long_description,
|
|
||||||
url = 'https://github.com/ambv/bitrot/',
|
|
||||||
keywords = 'file checksum database',
|
|
||||||
platforms = ['any'],
|
|
||||||
license = 'MIT',
|
|
||||||
package_dir = {'': 'src'},
|
|
||||||
packages = find_packages('src'),
|
|
||||||
py_modules = ['bitrot'],
|
|
||||||
scripts = ['bin/bitrot'],
|
|
||||||
include_package_data = True,
|
|
||||||
zip_safe = False, # if only because of the readme file
|
|
||||||
install_requires = [
|
|
||||||
],
|
|
||||||
|
|
||||||
classifiers = [
|
|
||||||
'Development Status :: 3 - Alpha',
|
|
||||||
'License :: OSI Approved :: MIT License',
|
|
||||||
'Natural Language :: English',
|
|
||||||
'Programming Language :: Python :: 2.7',
|
|
||||||
'Programming Language :: Python :: 2 :: Only',
|
|
||||||
'Programming Language :: Python',
|
|
||||||
'Topic :: System :: Filesystems',
|
|
||||||
'Topic :: System :: Monitoring',
|
|
||||||
'Topic :: Software Development :: Libraries :: Python Modules',
|
|
||||||
]
|
|
||||||
)
|
|
581
src/bitrot.py
Normal file → Executable file
581
src/bitrot.py
Normal file → Executable file
@ -1,8 +1,7 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
# Copyright (C) 2013 by Łukasz Langa
|
# Copyright (C) 2013 by Łukasz Langa
|
||||||
#
|
|
||||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
# of this software and associated documentation files (the "Software"), to deal
|
# of this software and associated documentation files (the "Software"), to deal
|
||||||
# in the Software without restriction, including without limitation the rights
|
# in the Software without restriction, including without limitation the rights
|
||||||
@ -21,14 +20,12 @@
|
|||||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
# THE SOFTWARE.
|
# THE SOFTWARE.
|
||||||
|
|
||||||
from __future__ import absolute_import
|
from __future__ import annotations
|
||||||
from __future__ import division
|
|
||||||
from __future__ import print_function
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import atexit
|
import atexit
|
||||||
import datetime
|
import datetime
|
||||||
|
import errno
|
||||||
import hashlib
|
import hashlib
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
@ -36,24 +33,48 @@ import sqlite3
|
|||||||
import stat
|
import stat
|
||||||
import sys
|
import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
|
import time
|
||||||
|
import unicodedata
|
||||||
|
|
||||||
|
from concurrent.futures import ProcessPoolExecutor, as_completed
|
||||||
|
from multiprocessing import freeze_support
|
||||||
|
from importlib.metadata import version, PackageNotFoundError
|
||||||
|
|
||||||
|
|
||||||
CHUNK_SIZE = 16384
|
DEFAULT_CHUNK_SIZE = 16384 # block size in HFS+; 4X the block size in ext4
|
||||||
DOT_THRESHOLD = 200
|
DOT_THRESHOLD = 200
|
||||||
VERSION = (0, 5, 0)
|
IGNORED_FILE_SYSTEM_ERRORS = {errno.ENOENT, errno.EACCES}
|
||||||
|
FSENCODING = sys.getfilesystemencoding()
|
||||||
|
try:
|
||||||
|
VERSION = version("bitrot")
|
||||||
|
except PackageNotFoundError:
|
||||||
|
VERSION = "1.0.1"
|
||||||
|
|
||||||
|
|
||||||
def sha1(path):
|
def normalize_path(path):
|
||||||
|
path_uni = path.decode(FSENCODING)
|
||||||
|
if FSENCODING in ('utf-8', 'UTF-8'):
|
||||||
|
return unicodedata.normalize('NFKD', path_uni)
|
||||||
|
|
||||||
|
return path_uni
|
||||||
|
|
||||||
|
|
||||||
|
def sha1(path, chunk_size):
|
||||||
digest = hashlib.sha1()
|
digest = hashlib.sha1()
|
||||||
with open(path) as f:
|
with open(path, 'rb') as f:
|
||||||
d = f.read(CHUNK_SIZE)
|
d = f.read(chunk_size)
|
||||||
while d:
|
while d:
|
||||||
digest.update(d)
|
digest.update(d)
|
||||||
d = f.read(CHUNK_SIZE)
|
d = f.read(chunk_size)
|
||||||
return digest.hexdigest()
|
return digest.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def ts():
|
||||||
|
return datetime.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S%z')
|
||||||
|
|
||||||
|
|
||||||
def get_sqlite3_cursor(path, copy=False):
|
def get_sqlite3_cursor(path, copy=False):
|
||||||
|
path = path.decode(FSENCODING)
|
||||||
if copy:
|
if copy:
|
||||||
if not os.path.exists(path):
|
if not os.path.exists(path):
|
||||||
raise ValueError("error: bitrot database at {} does not exist."
|
raise ValueError("error: bitrot database at {} does not exist."
|
||||||
@ -70,111 +91,282 @@ def get_sqlite3_cursor(path, copy=False):
|
|||||||
conn = sqlite3.connect(path)
|
conn = sqlite3.connect(path)
|
||||||
atexit.register(conn.close)
|
atexit.register(conn.close)
|
||||||
cur = conn.cursor()
|
cur = conn.cursor()
|
||||||
for name, in cur.execute('SELECT name FROM sqlite_master'):
|
tables = set(t for t, in cur.execute('SELECT name FROM sqlite_master'))
|
||||||
if name == 'bitrot':
|
if 'bitrot' not in tables:
|
||||||
break
|
|
||||||
else:
|
|
||||||
cur.execute('CREATE TABLE bitrot (path TEXT PRIMARY KEY, '
|
cur.execute('CREATE TABLE bitrot (path TEXT PRIMARY KEY, '
|
||||||
'mtime INTEGER, hash TEXT, timestamp TEXT)')
|
'mtime INTEGER, hash TEXT, timestamp TEXT)')
|
||||||
|
if 'bitrot_hash_idx' not in tables:
|
||||||
|
cur.execute('CREATE INDEX bitrot_hash_idx ON bitrot (hash)')
|
||||||
|
atexit.register(conn.commit)
|
||||||
return conn
|
return conn
|
||||||
|
|
||||||
|
|
||||||
def run(verbosity=1, test=False):
|
def list_existing_paths(directory, expected=(), ignored=(), follow_links=False):
|
||||||
current_dir = b'.' # sic, relative path
|
"""list_existing_paths(b'/dir') -> ([path1, path2, ...], total_size)
|
||||||
bitrot_db = os.path.join(current_dir, b'.bitrot.db')
|
|
||||||
conn = get_sqlite3_cursor(bitrot_db, copy=test)
|
Returns a tuple with a set of existing files in `directory` and its subdirectories
|
||||||
cur = conn.cursor()
|
and their `total_size`. If directory was a bytes object, so will be the returned
|
||||||
new_paths = []
|
paths.
|
||||||
updated_paths = []
|
|
||||||
renamed_paths = []
|
Doesn't add entries listed in `ignored`. Doesn't add symlinks if
|
||||||
error_count = 0
|
`follow_links` is False (the default). All entries present in `expected`
|
||||||
|
must be files (can't be directories or symlinks).
|
||||||
|
"""
|
||||||
|
paths = set()
|
||||||
total_size = 0
|
total_size = 0
|
||||||
current_size = 0
|
for path, _, files in os.walk(directory):
|
||||||
last_reported_size = ''
|
|
||||||
missing_paths = set()
|
|
||||||
cur.execute('SELECT path FROM bitrot')
|
|
||||||
row = cur.fetchone()
|
|
||||||
while row:
|
|
||||||
missing_paths.add(row[0])
|
|
||||||
row = cur.fetchone()
|
|
||||||
paths = []
|
|
||||||
for path, _, files in os.walk(current_dir):
|
|
||||||
for f in files:
|
for f in files:
|
||||||
p = os.path.join(path, f)
|
p = os.path.join(path, f)
|
||||||
st = os.stat(p)
|
try:
|
||||||
if not stat.S_ISREG(st.st_mode) or p == bitrot_db:
|
p_uni = p.decode(FSENCODING)
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
binary_stderr = getattr(sys.stderr, 'buffer', sys.stderr)
|
||||||
|
binary_stderr.write(b"warning: cannot decode file name: ")
|
||||||
|
binary_stderr.write(p)
|
||||||
|
binary_stderr.write(b"\n")
|
||||||
continue
|
continue
|
||||||
paths.append(p)
|
|
||||||
total_size += st.st_size
|
try:
|
||||||
paths.sort()
|
if follow_links or p_uni in expected:
|
||||||
for p in paths:
|
st = os.stat(p)
|
||||||
st = os.stat(p)
|
else:
|
||||||
new_mtime = int(st.st_mtime)
|
st = os.lstat(p)
|
||||||
current_size += st.st_size
|
except OSError as ex:
|
||||||
if verbosity:
|
if ex.errno not in IGNORED_FILE_SYSTEM_ERRORS:
|
||||||
size_fmt = '\r{:>6.1%}'.format(current_size/total_size)
|
raise
|
||||||
if size_fmt != last_reported_size:
|
else:
|
||||||
sys.stdout.write(size_fmt)
|
if not stat.S_ISREG(st.st_mode) or p in ignored:
|
||||||
sys.stdout.flush()
|
continue
|
||||||
last_reported_size = size_fmt
|
paths.add(p)
|
||||||
new_sha1 = sha1(p)
|
total_size += st.st_size
|
||||||
update_ts = datetime.datetime.utcnow().strftime(
|
return paths, total_size
|
||||||
'%Y-%m-%d %H:%M:%S%z'
|
|
||||||
|
|
||||||
|
def compute_one(path, chunk_size):
|
||||||
|
"""Return a tuple with (unicode path, size, mtime, sha1). Takes a binary path."""
|
||||||
|
p_uni = normalize_path(path)
|
||||||
|
try:
|
||||||
|
st = os.stat(path)
|
||||||
|
except OSError as ex:
|
||||||
|
if ex.errno in IGNORED_FILE_SYSTEM_ERRORS:
|
||||||
|
# The file disappeared between listing existing paths and
|
||||||
|
# this run or is (temporarily?) locked with different
|
||||||
|
# permissions. We'll just skip it for now.
|
||||||
|
print(
|
||||||
|
'\rwarning: `{}` is currently unavailable for '
|
||||||
|
'reading: {}'.format(
|
||||||
|
p_uni, ex,
|
||||||
|
),
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
raise BitrotException
|
||||||
|
|
||||||
|
raise # Not expected? https://github.com/ambv/bitrot/issues/
|
||||||
|
|
||||||
|
try:
|
||||||
|
new_sha1 = sha1(path, chunk_size)
|
||||||
|
except (IOError, OSError) as e:
|
||||||
|
print(
|
||||||
|
'\rwarning: cannot compute hash of {} [{}]'.format(
|
||||||
|
p_uni, errno.errorcode[e.args[0]],
|
||||||
|
),
|
||||||
|
file=sys.stderr,
|
||||||
)
|
)
|
||||||
p_uni = p.decode('utf8')
|
raise BitrotException
|
||||||
missing_paths.discard(p_uni)
|
|
||||||
cur.execute('SELECT mtime, hash, timestamp FROM bitrot WHERE '
|
return p_uni, st.st_size, int(st.st_mtime), new_sha1
|
||||||
'path=?', (p_uni,))
|
|
||||||
row = cur.fetchone()
|
|
||||||
if not row:
|
class BitrotException(Exception):
|
||||||
cur.execute('SELECT mtime, path, timestamp FROM bitrot WHERE '
|
pass
|
||||||
'hash=?', (new_sha1,))
|
|
||||||
rows = cur.fetchall()
|
|
||||||
for row in rows:
|
class Bitrot(object):
|
||||||
stored_mtime, stored_path, update_ts = row
|
def __init__(
|
||||||
if not os.path.exists(stored_path):
|
self, verbosity=1, test=False, follow_links=False, commit_interval=300,
|
||||||
|
chunk_size=DEFAULT_CHUNK_SIZE, workers=os.cpu_count(),
|
||||||
|
):
|
||||||
|
self.verbosity = verbosity
|
||||||
|
self.test = test
|
||||||
|
self.follow_links = follow_links
|
||||||
|
self.commit_interval = commit_interval
|
||||||
|
self.chunk_size = chunk_size
|
||||||
|
self._last_reported_size = ''
|
||||||
|
self._last_commit_ts = 0
|
||||||
|
self.pool = ProcessPoolExecutor(max_workers=workers)
|
||||||
|
|
||||||
|
def maybe_commit(self, conn):
|
||||||
|
if time.time() < self._last_commit_ts + self.commit_interval:
|
||||||
|
# no time for commit yet!
|
||||||
|
return
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
self._last_commit_ts = time.time()
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
check_sha512_integrity(verbosity=self.verbosity)
|
||||||
|
|
||||||
|
bitrot_db = get_path()
|
||||||
|
bitrot_sha512 = get_path(ext=b'sha512')
|
||||||
|
try:
|
||||||
|
conn = get_sqlite3_cursor(bitrot_db, copy=self.test)
|
||||||
|
except ValueError:
|
||||||
|
raise BitrotException(
|
||||||
|
2,
|
||||||
|
'No database exists so cannot test. Run the tool once first.',
|
||||||
|
)
|
||||||
|
|
||||||
|
cur = conn.cursor()
|
||||||
|
new_paths = []
|
||||||
|
updated_paths = []
|
||||||
|
renamed_paths = []
|
||||||
|
errors = []
|
||||||
|
current_size = 0
|
||||||
|
missing_paths = self.select_all_paths(cur)
|
||||||
|
hashes = self.select_all_hashes(cur)
|
||||||
|
paths, total_size = list_existing_paths(
|
||||||
|
b'.', expected=missing_paths, ignored={bitrot_db, bitrot_sha512},
|
||||||
|
follow_links=self.follow_links,
|
||||||
|
)
|
||||||
|
paths_uni = set(normalize_path(p) for p in paths)
|
||||||
|
futures = [self.pool.submit(compute_one, p, self.chunk_size) for p in paths]
|
||||||
|
|
||||||
|
for future in as_completed(futures):
|
||||||
|
try:
|
||||||
|
p_uni, new_size, new_mtime, new_sha1 = future.result()
|
||||||
|
except BitrotException:
|
||||||
|
continue
|
||||||
|
|
||||||
|
current_size += new_size
|
||||||
|
if self.verbosity:
|
||||||
|
self.report_progress(current_size, total_size)
|
||||||
|
|
||||||
|
if p_uni not in missing_paths:
|
||||||
|
# We are not expecting this path, it wasn't in the database yet.
|
||||||
|
# It's either new or a rename. Let's handle that.
|
||||||
|
stored_path = self.handle_unknown_path(
|
||||||
|
cur, p_uni, new_mtime, new_sha1, paths_uni, hashes
|
||||||
|
)
|
||||||
|
self.maybe_commit(conn)
|
||||||
|
if p_uni == stored_path:
|
||||||
|
new_paths.append(p_uni)
|
||||||
|
missing_paths.discard(p_uni)
|
||||||
|
else:
|
||||||
renamed_paths.append((stored_path, p_uni))
|
renamed_paths.append((stored_path, p_uni))
|
||||||
missing_paths.discard(stored_path)
|
missing_paths.discard(stored_path)
|
||||||
cur.execute('UPDATE bitrot SET mtime=?, path=?, '
|
continue
|
||||||
'timestamp=? WHERE hash=?',
|
|
||||||
(new_mtime, p_uni, update_ts, new_sha1))
|
# At this point we know we're seeing an expected file.
|
||||||
conn.commit()
|
missing_paths.discard(p_uni)
|
||||||
break
|
cur.execute('SELECT mtime, hash, timestamp FROM bitrot WHERE path=?',
|
||||||
else:
|
(p_uni,))
|
||||||
new_paths.append(p)
|
row = cur.fetchone()
|
||||||
cur.execute('INSERT INTO bitrot VALUES (?, ?, ?, ?)',
|
if not row:
|
||||||
(p_uni, new_mtime, new_sha1, update_ts))
|
print(
|
||||||
conn.commit()
|
'\rwarning: path disappeared from the database while running:',
|
||||||
continue
|
p_uni,
|
||||||
stored_mtime, stored_sha1, update_ts = row
|
file=sys.stderr,
|
||||||
if int(stored_mtime) != new_mtime:
|
)
|
||||||
updated_paths.append(p)
|
continue
|
||||||
cur.execute('UPDATE bitrot SET mtime=?, hash=?, timestamp=? '
|
|
||||||
'WHERE path=?',
|
stored_mtime, stored_sha1, stored_ts = row
|
||||||
(new_mtime, new_sha1, update_ts, p_uni))
|
if int(stored_mtime) != new_mtime:
|
||||||
conn.commit()
|
updated_paths.append(p_uni)
|
||||||
elif stored_sha1 != new_sha1:
|
cur.execute('UPDATE bitrot SET mtime=?, hash=?, timestamp=? '
|
||||||
error_count += 1
|
'WHERE path=?',
|
||||||
print('\rerror: SHA1 mismatch for {}: expected {}, got {}.'
|
(new_mtime, new_sha1, ts(), p_uni))
|
||||||
' Original info from {}.'.format(
|
self.maybe_commit(conn)
|
||||||
p, stored_sha1, new_sha1, update_ts
|
continue
|
||||||
),
|
|
||||||
file=sys.stderr,
|
if stored_sha1 != new_sha1:
|
||||||
)
|
errors.append(p_uni)
|
||||||
for path in missing_paths:
|
print(
|
||||||
cur.execute('DELETE FROM bitrot WHERE path=?', (path,))
|
'\rerror: SHA1 mismatch for {}: expected {}, got {}.'
|
||||||
|
' Last good hash checked on {}.'.format(
|
||||||
|
p_uni, stored_sha1, new_sha1, stored_ts
|
||||||
|
),
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
|
||||||
|
for path in missing_paths:
|
||||||
|
cur.execute('DELETE FROM bitrot WHERE path=?', (path,))
|
||||||
|
|
||||||
conn.commit()
|
conn.commit()
|
||||||
cur.execute('SELECT COUNT(path) FROM bitrot')
|
|
||||||
all_count = cur.fetchone()[0]
|
if not self.test:
|
||||||
if verbosity:
|
cur.execute('vacuum')
|
||||||
|
|
||||||
|
if self.verbosity:
|
||||||
|
cur.execute('SELECT COUNT(path) FROM bitrot')
|
||||||
|
all_count = cur.fetchone()[0]
|
||||||
|
self.report_done(
|
||||||
|
total_size,
|
||||||
|
all_count,
|
||||||
|
len(errors),
|
||||||
|
new_paths,
|
||||||
|
updated_paths,
|
||||||
|
renamed_paths,
|
||||||
|
missing_paths,
|
||||||
|
)
|
||||||
|
|
||||||
|
update_sha512_integrity(verbosity=self.verbosity)
|
||||||
|
|
||||||
|
if errors:
|
||||||
|
raise BitrotException(
|
||||||
|
1, 'There were {} errors found.'.format(len(errors)), errors,
|
||||||
|
)
|
||||||
|
|
||||||
|
def select_all_paths(self, cur):
|
||||||
|
"""Return a set of all distinct paths in the bitrot database.
|
||||||
|
|
||||||
|
The paths are Unicode and are normalized if FSENCODING was UTF-8.
|
||||||
|
"""
|
||||||
|
result = set()
|
||||||
|
cur.execute('SELECT path FROM bitrot')
|
||||||
|
row = cur.fetchone()
|
||||||
|
while row:
|
||||||
|
result.add(row[0])
|
||||||
|
row = cur.fetchone()
|
||||||
|
return result
|
||||||
|
|
||||||
|
def select_all_hashes(self, cur):
|
||||||
|
"""Return a dict where keys are hashes and values are sets of paths.
|
||||||
|
|
||||||
|
The paths are Unicode and are normalized if FSENCODING was UTF-8.
|
||||||
|
"""
|
||||||
|
result = {}
|
||||||
|
cur.execute('SELECT hash, path FROM bitrot')
|
||||||
|
row = cur.fetchone()
|
||||||
|
while row:
|
||||||
|
rhash, rpath = row
|
||||||
|
result.setdefault(rhash, set()).add(rpath)
|
||||||
|
row = cur.fetchone()
|
||||||
|
return result
|
||||||
|
|
||||||
|
def report_progress(self, current_size, total_size):
|
||||||
|
size_fmt = '\r{:>6.1%}'.format(current_size/(total_size or 1))
|
||||||
|
if size_fmt == self._last_reported_size:
|
||||||
|
return
|
||||||
|
|
||||||
|
sys.stdout.write(size_fmt)
|
||||||
|
sys.stdout.flush()
|
||||||
|
self._last_reported_size = size_fmt
|
||||||
|
|
||||||
|
def report_done(
|
||||||
|
self, total_size, all_count, error_count, new_paths, updated_paths,
|
||||||
|
renamed_paths, missing_paths):
|
||||||
|
"""Print a report on what happened. All paths should be Unicode here."""
|
||||||
print('\rFinished. {:.2f} MiB of data read. {} errors found.'
|
print('\rFinished. {:.2f} MiB of data read. {} errors found.'
|
||||||
''.format(total_size/1024/1024, error_count))
|
''.format(total_size/1024/1024, error_count))
|
||||||
if verbosity == 1:
|
if self.verbosity == 1:
|
||||||
print('{} entries in the database, {} new, {} updated, '
|
print(
|
||||||
'{} renamed, {} missing.'.format(all_count, len(new_paths),
|
'{} entries in the database, {} new, {} updated, '
|
||||||
len(updated_paths), len(renamed_paths), len(missing_paths)
|
'{} renamed, {} missing.'.format(
|
||||||
))
|
all_count, len(new_paths), len(updated_paths),
|
||||||
elif verbosity > 1:
|
len(renamed_paths), len(missing_paths),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
elif self.verbosity > 1:
|
||||||
print('{} entries in the database.'.format(all_count), end=' ')
|
print('{} entries in the database.'.format(all_count), end=' ')
|
||||||
if new_paths:
|
if new_paths:
|
||||||
print('{} entries new:'.format(len(new_paths)))
|
print('{} entries new:'.format(len(new_paths)))
|
||||||
@ -190,7 +382,12 @@ def run(verbosity=1, test=False):
|
|||||||
print('{} entries renamed:'.format(len(renamed_paths)))
|
print('{} entries renamed:'.format(len(renamed_paths)))
|
||||||
renamed_paths.sort()
|
renamed_paths.sort()
|
||||||
for path in renamed_paths:
|
for path in renamed_paths:
|
||||||
print(' from', path[0], 'to', path[1])
|
print(
|
||||||
|
' from',
|
||||||
|
path[0],
|
||||||
|
'to',
|
||||||
|
path[1],
|
||||||
|
)
|
||||||
if missing_paths:
|
if missing_paths:
|
||||||
print('{} entries missing:'.format(len(missing_paths)))
|
print('{} entries missing:'.format(len(missing_paths)))
|
||||||
missing_paths = sorted(missing_paths)
|
missing_paths = sorted(missing_paths)
|
||||||
@ -198,52 +395,196 @@ def run(verbosity=1, test=False):
|
|||||||
print(' ', path)
|
print(' ', path)
|
||||||
if not any((new_paths, updated_paths, missing_paths)):
|
if not any((new_paths, updated_paths, missing_paths)):
|
||||||
print()
|
print()
|
||||||
print('warning: database file not updated on disk (test mode).')
|
if self.test and self.verbosity:
|
||||||
if error_count:
|
print('warning: database file not updated on disk (test mode).')
|
||||||
sys.exit(1)
|
|
||||||
|
def handle_unknown_path(self, cur, new_path, new_mtime, new_sha1, paths_uni, hashes):
|
||||||
|
"""Either add a new entry to the database or update the existing entry
|
||||||
|
on rename.
|
||||||
|
|
||||||
|
`cur` is the database cursor. `new_path` is the new Unicode path.
|
||||||
|
`paths_uni` are Unicode paths seen on disk during this run of Bitrot.
|
||||||
|
`hashes` is a dictionary selected from the database, keys are hashes, values
|
||||||
|
are sets of Unicode paths that are stored in the DB under the given hash.
|
||||||
|
|
||||||
|
Returns `new_path` if the entry was indeed new or the `old_path` (e.g.
|
||||||
|
outdated path stored in the database for this hash) if there was a rename.
|
||||||
|
"""
|
||||||
|
|
||||||
|
for old_path in hashes.get(new_sha1, ()):
|
||||||
|
if old_path not in paths_uni:
|
||||||
|
# File of the same hash used to exist but no longer does.
|
||||||
|
# Let's treat `new_path` as a renamed version of that `old_path`.
|
||||||
|
cur.execute(
|
||||||
|
'UPDATE bitrot SET mtime=?, path=?, timestamp=? WHERE path=?',
|
||||||
|
(new_mtime, new_path, ts(), old_path),
|
||||||
|
)
|
||||||
|
return old_path
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Either we haven't found `new_sha1` at all in the database, or all
|
||||||
|
# currently stored paths for this hash still point to existing files.
|
||||||
|
# Let's insert a new entry for what appears to be a new file.
|
||||||
|
cur.execute(
|
||||||
|
'INSERT INTO bitrot VALUES (?, ?, ?, ?)',
|
||||||
|
(new_path, new_mtime, new_sha1, ts()),
|
||||||
|
)
|
||||||
|
return new_path
|
||||||
|
|
||||||
|
def get_path(directory=b'.', ext=b'db'):
|
||||||
|
"""Compose the path to the selected bitrot file."""
|
||||||
|
return os.path.join(directory, b'.bitrot.' + ext)
|
||||||
|
|
||||||
|
|
||||||
def stable_sum():
|
def stable_sum(bitrot_db=None):
|
||||||
current_dir = b'.' # sic, relative path
|
"""Calculates a stable SHA512 of all entries in the database.
|
||||||
bitrot_db = os.path.join(current_dir, b'.bitrot.db')
|
|
||||||
|
Useful for comparing if two directories hold the same data, as it ignores
|
||||||
|
timing information."""
|
||||||
|
if bitrot_db is None:
|
||||||
|
bitrot_db = get_path()
|
||||||
digest = hashlib.sha512()
|
digest = hashlib.sha512()
|
||||||
conn = get_sqlite3_cursor(bitrot_db)
|
conn = get_sqlite3_cursor(bitrot_db)
|
||||||
cur = conn.cursor()
|
cur = conn.cursor()
|
||||||
cur.execute('SELECT hash FROM bitrot ORDER BY path')
|
cur.execute('SELECT hash FROM bitrot ORDER BY path')
|
||||||
row = cur.fetchone()
|
row = cur.fetchone()
|
||||||
while row:
|
while row:
|
||||||
digest.update(row[0])
|
digest.update(row[0].encode('ascii'))
|
||||||
row = cur.fetchone()
|
row = cur.fetchone()
|
||||||
return digest.hexdigest()
|
return digest.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def check_sha512_integrity(verbosity=1):
|
||||||
|
sha512_path = get_path(ext=b'sha512')
|
||||||
|
if not os.path.exists(sha512_path):
|
||||||
|
return
|
||||||
|
|
||||||
|
if verbosity:
|
||||||
|
print('Checking bitrot.db integrity... ', end='')
|
||||||
|
sys.stdout.flush()
|
||||||
|
with open(sha512_path, 'rb') as f:
|
||||||
|
old_sha512 = f.read().strip()
|
||||||
|
bitrot_db = get_path()
|
||||||
|
digest = hashlib.sha512()
|
||||||
|
with open(bitrot_db, 'rb') as f:
|
||||||
|
digest.update(f.read())
|
||||||
|
new_sha512 = digest.hexdigest().encode('ascii')
|
||||||
|
if new_sha512 != old_sha512:
|
||||||
|
if verbosity:
|
||||||
|
if len(old_sha512) == 128:
|
||||||
|
print(
|
||||||
|
"error: SHA512 of the file is different, bitrot.db might "
|
||||||
|
"be corrupt.",
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
"error: SHA512 of the file is different but bitrot.sha512 "
|
||||||
|
"has a suspicious length. It might be corrupt.",
|
||||||
|
)
|
||||||
|
print(
|
||||||
|
"If you'd like to continue anyway, delete the .bitrot.sha512 "
|
||||||
|
"file and try again.",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
raise BitrotException(
|
||||||
|
3, 'bitrot.db integrity check failed, cannot continue.',
|
||||||
|
)
|
||||||
|
|
||||||
|
if verbosity:
|
||||||
|
print('ok.')
|
||||||
|
|
||||||
|
def update_sha512_integrity(verbosity=1):
|
||||||
|
old_sha512 = 0
|
||||||
|
sha512_path = get_path(ext=b'sha512')
|
||||||
|
if os.path.exists(sha512_path):
|
||||||
|
with open(sha512_path, 'rb') as f:
|
||||||
|
old_sha512 = f.read().strip()
|
||||||
|
bitrot_db = get_path()
|
||||||
|
digest = hashlib.sha512()
|
||||||
|
with open(bitrot_db, 'rb') as f:
|
||||||
|
digest.update(f.read())
|
||||||
|
new_sha512 = digest.hexdigest().encode('ascii')
|
||||||
|
if new_sha512 != old_sha512:
|
||||||
|
if verbosity:
|
||||||
|
print('Updating bitrot.sha512... ', end='')
|
||||||
|
sys.stdout.flush()
|
||||||
|
with open(sha512_path, 'wb') as f:
|
||||||
|
f.write(new_sha512)
|
||||||
|
if verbosity:
|
||||||
|
print('done.')
|
||||||
|
|
||||||
def run_from_command_line():
|
def run_from_command_line():
|
||||||
|
global FSENCODING
|
||||||
|
|
||||||
|
freeze_support()
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(prog='bitrot')
|
parser = argparse.ArgumentParser(prog='bitrot')
|
||||||
parser.add_argument('-q', '--quiet', action='store_true',
|
parser.add_argument(
|
||||||
|
'-l', '--follow-links', action='store_true',
|
||||||
|
help='follow symbolic links and store target files\' hashes. Once '
|
||||||
|
'a path is present in the database, it will be checked against '
|
||||||
|
'changes in content even if it becomes a symbolic link. In '
|
||||||
|
'other words, if you run `bitrot -l`, on subsequent runs '
|
||||||
|
'symbolic links registered during the first run will be '
|
||||||
|
'properly followed and checked even if you run without `-l`.')
|
||||||
|
parser.add_argument(
|
||||||
|
'-q', '--quiet', action='store_true',
|
||||||
help='don\'t print anything besides checksum errors')
|
help='don\'t print anything besides checksum errors')
|
||||||
parser.add_argument('-s', '--sum', action='store_true',
|
parser.add_argument(
|
||||||
|
'-s', '--sum', action='store_true',
|
||||||
help='using only the data already gathered, return a SHA-512 sum '
|
help='using only the data already gathered, return a SHA-512 sum '
|
||||||
'of hashes of all the entries in the database. No timestamps '
|
'of hashes of all the entries in the database. No timestamps '
|
||||||
'are used in calculation.')
|
'are used in calculation.')
|
||||||
parser.add_argument('-v', '--verbose', action='store_true',
|
parser.add_argument(
|
||||||
|
'-v', '--verbose', action='store_true',
|
||||||
help='list new, updated and missing entries')
|
help='list new, updated and missing entries')
|
||||||
parser.add_argument('-t', '--test', action='store_true',
|
parser.add_argument(
|
||||||
|
'-t', '--test', action='store_true',
|
||||||
help='just test against an existing database, don\'t update anything')
|
help='just test against an existing database, don\'t update anything')
|
||||||
parser.add_argument('--version', action='version',
|
parser.add_argument(
|
||||||
version='%(prog)s {}.{}.{}'.format(*VERSION))
|
'--version', action='version',
|
||||||
|
version=f"%(prog)s {VERSION}")
|
||||||
|
parser.add_argument(
|
||||||
|
'--commit-interval', type=float, default=300,
|
||||||
|
help='min time in seconds between commits '
|
||||||
|
'(0 commits on every operation)')
|
||||||
|
parser.add_argument(
|
||||||
|
'-w', '--workers', type=int, default=os.cpu_count(),
|
||||||
|
help='run this many workers (use -w1 for slow magnetic disks)')
|
||||||
|
parser.add_argument(
|
||||||
|
'--chunk-size', type=int, default=DEFAULT_CHUNK_SIZE,
|
||||||
|
help='read files this many bytes at a time')
|
||||||
|
parser.add_argument(
|
||||||
|
'--fsencoding', default='',
|
||||||
|
help='override the codec to decode filenames, otherwise taken from '
|
||||||
|
'the LANG environment variables')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
if args.sum:
|
if args.sum:
|
||||||
try:
|
try:
|
||||||
print(stable_sum())
|
print(stable_sum())
|
||||||
except RuntimeError as e:
|
except RuntimeError as e:
|
||||||
print(unicode(e).encode('utf8'), file=sys.stderr)
|
print(str(e).encode('utf8'), file=sys.stderr)
|
||||||
else:
|
else:
|
||||||
verbosity = 1
|
verbosity = 1
|
||||||
if args.quiet:
|
if args.quiet:
|
||||||
verbosity = 0
|
verbosity = 0
|
||||||
elif args.verbose:
|
elif args.verbose:
|
||||||
verbosity = 2
|
verbosity = 2
|
||||||
run(verbosity=verbosity, test=args.test)
|
bt = Bitrot(
|
||||||
|
verbosity=verbosity,
|
||||||
|
test=args.test,
|
||||||
|
follow_links=args.follow_links,
|
||||||
|
commit_interval=args.commit_interval,
|
||||||
|
chunk_size=args.chunk_size,
|
||||||
|
workers=args.workers,
|
||||||
|
)
|
||||||
|
if args.fsencoding:
|
||||||
|
FSENCODING = args.fsencoding
|
||||||
|
try:
|
||||||
|
bt.run()
|
||||||
|
except BitrotException as bre:
|
||||||
|
print('error:', bre.args[1], file=sys.stderr)
|
||||||
|
sys.exit(bre.args[0])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
348
tests/test_bitrot.py
Normal file
348
tests/test_bitrot.py
Normal file
@ -0,0 +1,348 @@
|
|||||||
|
"""
|
||||||
|
NOTE: those tests are ordered and require pytest-order to run correctly.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import getpass
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
import shlex
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
from textwrap import dedent
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
TMP = Path("/tmp/")
|
||||||
|
|
||||||
|
|
||||||
|
ReturnCode = int
|
||||||
|
StdOut = list[str]
|
||||||
|
StdErr = list[str]
|
||||||
|
|
||||||
|
|
||||||
|
def bitrot(*args: str) -> tuple[ReturnCode, StdOut, StdErr]:
|
||||||
|
cmd = [sys.executable, "-m", "bitrot"]
|
||||||
|
cmd.extend(args)
|
||||||
|
res = subprocess.run(shlex.join(cmd), shell=True, capture_output=True)
|
||||||
|
stdout = (res.stdout or b"").decode("utf8")
|
||||||
|
stderr = (res.stderr or b"").decode("utf8")
|
||||||
|
return res.returncode, lines(stdout), lines(stderr)
|
||||||
|
|
||||||
|
|
||||||
|
def bash(script, empty_dir: bool = False) -> bool:
|
||||||
|
username = getpass.getuser()
|
||||||
|
test_dir = TMP / f"bitrot-dir-{username}"
|
||||||
|
if empty_dir and test_dir.is_dir():
|
||||||
|
os.chdir(TMP)
|
||||||
|
shutil.rmtree(test_dir)
|
||||||
|
test_dir.mkdir(exist_ok=True)
|
||||||
|
os.chdir(test_dir)
|
||||||
|
|
||||||
|
preamble = """
|
||||||
|
set -euxo pipefail
|
||||||
|
LC_ALL=en_US.UTF-8
|
||||||
|
LANG=en_US.UTF-8
|
||||||
|
"""
|
||||||
|
|
||||||
|
if script:
|
||||||
|
# We need to wait a second for modification timestamps to differ so that
|
||||||
|
# the ordering of the output stays the same every run of the tests.
|
||||||
|
preamble += """
|
||||||
|
sleep 1
|
||||||
|
"""
|
||||||
|
|
||||||
|
script_path = TMP / "bitrot-test.bash"
|
||||||
|
script_path.write_text(dedent(preamble + script))
|
||||||
|
script_path.chmod(0o755)
|
||||||
|
|
||||||
|
out = subprocess.run(["bash", str(script_path)], capture_output=True)
|
||||||
|
if out.returncode:
|
||||||
|
print(f"Non-zero return code {out.returncode} when running {script_path}")
|
||||||
|
if out.stdout:
|
||||||
|
print(out.stdout)
|
||||||
|
if out.stderr:
|
||||||
|
print(out.stderr)
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def lines(s: str) -> list[str]:
|
||||||
|
r"""Only return non-empty lines that weren't killed by \r."""
|
||||||
|
return [
|
||||||
|
line.rstrip()
|
||||||
|
for line in s.splitlines(keepends=True)
|
||||||
|
if line and line.rstrip() and line[-1] != "\r"
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.order(1)
|
||||||
|
def test_command_exists() -> None:
|
||||||
|
rc, out, err = bitrot("--help")
|
||||||
|
assert rc == 0
|
||||||
|
assert not err
|
||||||
|
assert out[0].startswith("usage:")
|
||||||
|
|
||||||
|
assert bash("", empty_dir=True)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.order(2)
|
||||||
|
def test_new_files_in_a_tree_dir() -> None:
|
||||||
|
assert bash(
|
||||||
|
"""
|
||||||
|
mkdir -p nonemptydirs/dir2/
|
||||||
|
touch nonemptydirs/dir2/new-file-{a,b}.txt
|
||||||
|
echo $RANDOM >> nonemptydirs/dir2/new-file-b.txt
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
rc, out, err = bitrot("-v")
|
||||||
|
assert rc == 0
|
||||||
|
assert not err
|
||||||
|
# assert out[0] == "Finished. 0.00 MiB of data read. 0 errors found."
|
||||||
|
assert out[1] == "2 entries in the database. 2 entries new:"
|
||||||
|
assert out[2] == " ./nonemptydirs/dir2/new-file-a.txt"
|
||||||
|
assert out[3] == " ./nonemptydirs/dir2/new-file-b.txt"
|
||||||
|
assert out[4] == "Updating bitrot.sha512... done."
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.order(3)
|
||||||
|
def test_modified_files_in_a_tree_dir() -> None:
|
||||||
|
assert bash(
|
||||||
|
"""
|
||||||
|
echo $RANDOM >> nonemptydirs/dir2/new-file-a.txt
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
rc, out, err = bitrot("-v")
|
||||||
|
assert rc == 0
|
||||||
|
assert not err
|
||||||
|
assert out[0] == "Checking bitrot.db integrity... ok."
|
||||||
|
# assert out[1] == "Finished. 0.00 MiB of data read. 0 errors found."
|
||||||
|
assert out[2] == "2 entries in the database. 1 entries updated:"
|
||||||
|
assert out[3] == " ./nonemptydirs/dir2/new-file-a.txt"
|
||||||
|
assert out[4] == "Updating bitrot.sha512... done."
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.order(4)
|
||||||
|
def test_renamed_files_in_a_tree_dir() -> None:
|
||||||
|
assert bash(
|
||||||
|
"""
|
||||||
|
mv nonemptydirs/dir2/new-file-a.txt nonemptydirs/dir2/new-file-a.txt2
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
rc, out, err = bitrot("-v")
|
||||||
|
assert rc == 0
|
||||||
|
assert not err
|
||||||
|
assert out[0] == "Checking bitrot.db integrity... ok."
|
||||||
|
# assert out[1] == "Finished. 0.00 MiB of data read. 0 errors found."
|
||||||
|
assert out[2] == "2 entries in the database. 1 entries renamed:"
|
||||||
|
o3 = " from ./nonemptydirs/dir2/new-file-a.txt to ./nonemptydirs/dir2/new-file-a.txt2"
|
||||||
|
assert out[3] == o3
|
||||||
|
assert out[4] == "Updating bitrot.sha512... done."
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.order(5)
|
||||||
|
def test_deleted_files_in_a_tree_dir() -> None:
|
||||||
|
assert bash(
|
||||||
|
"""
|
||||||
|
rm nonemptydirs/dir2/new-file-a.txt2
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
rc, out, err = bitrot("-v")
|
||||||
|
assert rc == 0
|
||||||
|
assert not err
|
||||||
|
assert out[0] == "Checking bitrot.db integrity... ok."
|
||||||
|
# assert out[1] == "Finished. 0.00 MiB of data read. 0 errors found."
|
||||||
|
assert out[2] == "1 entries in the database. 1 entries missing:"
|
||||||
|
assert out[3] == " ./nonemptydirs/dir2/new-file-a.txt2"
|
||||||
|
assert out[4] == "Updating bitrot.sha512... done."
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.order(5)
|
||||||
|
def test_new_files_and_modified_files_in_a_tree_dir() -> None:
|
||||||
|
assert bash(
|
||||||
|
"""
|
||||||
|
for fil in {a,b,c,d,e,f,g}; do
|
||||||
|
echo $fil >> more-files-$fil.txt
|
||||||
|
done
|
||||||
|
echo $RANDOM >> nonemptydirs/dir2/new-file-b.txt
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
rc, out, err = bitrot("-v")
|
||||||
|
assert rc == 0
|
||||||
|
assert not err
|
||||||
|
assert out[0] == "Checking bitrot.db integrity... ok."
|
||||||
|
# assert out[1] == "Finished. 0.00 MiB of data read. 0 errors found."
|
||||||
|
assert out[2] == "8 entries in the database. 7 entries new:"
|
||||||
|
assert out[3] == " ./more-files-a.txt"
|
||||||
|
assert out[4] == " ./more-files-b.txt"
|
||||||
|
assert out[5] == " ./more-files-c.txt"
|
||||||
|
assert out[6] == " ./more-files-d.txt"
|
||||||
|
assert out[7] == " ./more-files-e.txt"
|
||||||
|
assert out[8] == " ./more-files-f.txt"
|
||||||
|
assert out[9] == " ./more-files-g.txt"
|
||||||
|
assert out[10] == "1 entries updated:"
|
||||||
|
assert out[11] == " ./nonemptydirs/dir2/new-file-b.txt"
|
||||||
|
assert out[12] == "Updating bitrot.sha512... done."
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.order(6)
|
||||||
|
def test_new_files_modified_deleted_and_moved_in_a_tree_dir() -> None:
|
||||||
|
assert bash(
|
||||||
|
"""
|
||||||
|
for fil in {a,b,c,d,e,f,g}; do
|
||||||
|
echo $fil $RANDOM >> nonemptydirs/pl-more-files-$fil.txt
|
||||||
|
done
|
||||||
|
echo $RANDOM >> nonemptydirs/dir2/new-file-b.txt
|
||||||
|
mv more-files-a.txt more-files-a.txt2
|
||||||
|
rm more-files-g.txt
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
rc, out, err = bitrot("-v")
|
||||||
|
assert rc == 0
|
||||||
|
assert not err
|
||||||
|
assert out[0] == "Checking bitrot.db integrity... ok."
|
||||||
|
# assert out[1] == "Finished. 0.00 MiB of data read. 0 errors found."
|
||||||
|
assert out[2] == "14 entries in the database. 7 entries new:"
|
||||||
|
assert out[3] == " ./nonemptydirs/pl-more-files-a.txt"
|
||||||
|
assert out[4] == " ./nonemptydirs/pl-more-files-b.txt"
|
||||||
|
assert out[5] == " ./nonemptydirs/pl-more-files-c.txt"
|
||||||
|
assert out[6] == " ./nonemptydirs/pl-more-files-d.txt"
|
||||||
|
assert out[7] == " ./nonemptydirs/pl-more-files-e.txt"
|
||||||
|
assert out[8] == " ./nonemptydirs/pl-more-files-f.txt"
|
||||||
|
assert out[9] == " ./nonemptydirs/pl-more-files-g.txt"
|
||||||
|
assert out[10] == "1 entries updated:"
|
||||||
|
assert out[11] == " ./nonemptydirs/dir2/new-file-b.txt"
|
||||||
|
assert out[12] == "1 entries renamed:"
|
||||||
|
assert out[13] == " from ./more-files-a.txt to ./more-files-a.txt2"
|
||||||
|
assert out[14] == "1 entries missing:"
|
||||||
|
assert out[15] == " ./more-files-g.txt"
|
||||||
|
assert out[16] == "Updating bitrot.sha512... done."
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.order(7)
|
||||||
|
def test_new_files_modified_deleted_and_moved_in_a_tree_dir_2() -> None:
|
||||||
|
assert bash(
|
||||||
|
"""
|
||||||
|
for fil in {a,b,c,d,e,f,g}; do
|
||||||
|
echo $RANDOM >> nonemptydirs/pl2-more-files-$fil.txt
|
||||||
|
done
|
||||||
|
echo $RANDOM >> nonemptydirs/pl-more-files-a.txt
|
||||||
|
mv nonemptydirs/pl-more-files-b.txt nonemptydirs/pl-more-files-b.txt2
|
||||||
|
cp nonemptydirs/pl-more-files-g.txt nonemptydirs/pl2-more-files-g.txt2
|
||||||
|
cp nonemptydirs/pl-more-files-d.txt nonemptydirs/pl2-more-files-d.txt2
|
||||||
|
rm more-files-f.txt nonemptydirs/pl-more-files-c.txt
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
rc, out, err = bitrot("-v")
|
||||||
|
assert rc == 0
|
||||||
|
assert not err
|
||||||
|
assert out[0] == "Checking bitrot.db integrity... ok."
|
||||||
|
# assert out[1] == "Finished. 0.00 MiB of data read. 0 errors found."
|
||||||
|
assert out[2] == "21 entries in the database. 9 entries new:"
|
||||||
|
assert out[3] == " ./nonemptydirs/pl2-more-files-a.txt"
|
||||||
|
assert out[4] == " ./nonemptydirs/pl2-more-files-b.txt"
|
||||||
|
assert out[5] == " ./nonemptydirs/pl2-more-files-c.txt"
|
||||||
|
assert out[6] == " ./nonemptydirs/pl2-more-files-d.txt"
|
||||||
|
assert out[7] == " ./nonemptydirs/pl2-more-files-d.txt2"
|
||||||
|
assert out[8] == " ./nonemptydirs/pl2-more-files-e.txt"
|
||||||
|
assert out[9] == " ./nonemptydirs/pl2-more-files-f.txt"
|
||||||
|
assert out[10] == " ./nonemptydirs/pl2-more-files-g.txt"
|
||||||
|
assert out[11] == " ./nonemptydirs/pl2-more-files-g.txt2"
|
||||||
|
assert out[12] == "1 entries updated:"
|
||||||
|
assert out[13] == " ./nonemptydirs/pl-more-files-a.txt"
|
||||||
|
assert out[14] == "1 entries renamed:"
|
||||||
|
o15 = " from ./nonemptydirs/pl-more-files-b.txt to ./nonemptydirs/pl-more-files-b.txt2"
|
||||||
|
assert out[15] == o15
|
||||||
|
assert out[16] == "2 entries missing:"
|
||||||
|
assert out[17] == " ./more-files-f.txt"
|
||||||
|
assert out[18] == " ./nonemptydirs/pl-more-files-c.txt"
|
||||||
|
assert out[19] == "Updating bitrot.sha512... done."
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.order(8)
|
||||||
|
def test_3278_files() -> None:
|
||||||
|
assert bash(
|
||||||
|
"""
|
||||||
|
mkdir -p alotfiles/here; cd alotfiles/here
|
||||||
|
# create a 320KB file
|
||||||
|
dd if=/dev/urandom of=masterfile bs=1 count=327680
|
||||||
|
# split it in 3277 files (instantly) + masterfile = 3278
|
||||||
|
split -b 100 -a 10 masterfile
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
rc, out, err = bitrot()
|
||||||
|
assert rc == 0
|
||||||
|
assert not err
|
||||||
|
assert out[0] == "Checking bitrot.db integrity... ok."
|
||||||
|
# assert out[1] == "Finished. 0.00 MiB of data read. 0 errors found."
|
||||||
|
o2 = "3299 entries in the database, 3278 new, 0 updated, 0 renamed, 0 missing."
|
||||||
|
assert out[2] == o2
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.order(9)
|
||||||
|
def test_3278_files_2() -> None:
|
||||||
|
assert bash(
|
||||||
|
"""
|
||||||
|
mv alotfiles/here alotfiles/here-moved
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
rc, out, err = bitrot()
|
||||||
|
assert rc == 0
|
||||||
|
assert not err
|
||||||
|
assert out[0] == "Checking bitrot.db integrity... ok."
|
||||||
|
# assert out[1] == "Finished. 0.00 MiB of data read. 0 errors found."
|
||||||
|
o2 = "3299 entries in the database, 0 new, 0 updated, 3278 renamed, 0 missing."
|
||||||
|
assert out[2] == o2
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.order(10)
|
||||||
|
def test_rotten_file() -> None:
|
||||||
|
assert bash(
|
||||||
|
"""
|
||||||
|
touch non-rotten-file
|
||||||
|
dd if=/dev/zero of=rotten-file bs=1k count=1000 &>/dev/null
|
||||||
|
# let's make sure they share the same timestamp
|
||||||
|
touch -r non-rotten-file rotten-file
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
rc, out, err = bitrot("-v")
|
||||||
|
assert rc == 0
|
||||||
|
assert not err
|
||||||
|
assert out[0] == "Checking bitrot.db integrity... ok."
|
||||||
|
# assert out[1] == "Finished. 0.00 MiB of data read. 0 errors found."
|
||||||
|
assert out[2] == "3301 entries in the database. 2 entries new:"
|
||||||
|
assert out[3] == " ./non-rotten-file"
|
||||||
|
assert out[4] == " ./rotten-file"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.order(11)
|
||||||
|
def test_rotten_file_2() -> None:
|
||||||
|
assert bash(
|
||||||
|
"""
|
||||||
|
# modify the rotten file...
|
||||||
|
dd if=/dev/urandom of=rotten-file bs=1k count=10 seek=1k conv=notrunc &>/dev/null
|
||||||
|
# ...but revert the modification date
|
||||||
|
touch -r non-rotten-file rotten-file
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
rc, out, err = bitrot("-q")
|
||||||
|
assert rc == 1
|
||||||
|
assert not out
|
||||||
|
e = (
|
||||||
|
"error: SHA1 mismatch for ./rotten-file: expected"
|
||||||
|
" 8fee1653e234fee8513245d3cb3e3c06d071493e, got"
|
||||||
|
)
|
||||||
|
assert err[0].startswith(e)
|
||||||
|
assert err[1] == "error: There were 1 errors found."
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.order("last")
|
||||||
|
def test_cleanup() -> None:
|
||||||
|
username = getpass.getuser()
|
||||||
|
test_dir = TMP / f"bitrot-dir-{username}"
|
||||||
|
if test_dir.is_dir():
|
||||||
|
os.chdir(TMP)
|
||||||
|
shutil.rmtree(test_dir)
|
Loading…
x
Reference in New Issue
Block a user