commit c0872132851606646d12af51346ff0e612753862 from: vincent.delft date: Sat Jun 8 04:38:37 2019 UTC change some comments test if fpath is a file commit - 49b405563aee486d202e70daf3e3e8b9e7962b7f commit + c0872132851606646d12af51346ff0e612753862 blob - 45a6222f6b752b22c69e8dfc40ac643c95025c23 blob + cb316531820dbe7e8b830515d48eb65045e2d921 --- yabitrot +++ yabitrot @@ -4,16 +4,16 @@ """ Author : Vincent -Version : 0.4 +Version : 0.4.2 Licence : BSD Require : python >= 3.6 use sqlite3 DB embedded with python package Developed on: OpenBSD Tested on : OpenBSD 6.4, Windows 10, osx 10.14 -Description : This tool allow you to calculate a checksum for each files in the target folder - Those values are stored in an sqlite DB at the root of your targetted folder +Description : This tool allow you to calculate a check-sum for each files in the target folder + Those values are stored in an sqlite DB at the root of your target folder This program use INODE as key instead of filename, so it can manage hardlinks - Since that, the scrip does never go outside the targetted filesystem + Since that, the scrip does never go outside the target filesystem it works on openBSD, but should work on any systems (OSX, Windows and Linux) Typically, you must perform a first scan of the folder you want: @@ -58,7 +58,7 @@ VERBOSE = 0 LOGFILE = "" DRY_RUN = False DB_FILE_NAME = ".cksum.db" -BIGFILE_TTS = 10 # number of seconds after which the chksum informs (if verbose) that he is working on a big file +BIGFILE_TTS = 10 # number of seconds after which the cksum informs (if verbose) that he is working on a big file def log(text): @@ -97,7 +97,7 @@ def get_cksum(path, osstats, status, chunk_size=DEFAUL crc += zlib.adler32(d) d = f.read(chunk_size) if VERBOSE > 0 and time.time() - localtts > BIGFILE_TTS: - # for pur information, we log the big consumers + # only for information, we log the big consumers log("big file: %s, inode: %s, size: %.2f MB, %s" % (status, osstats.st_ino, osstats.st_size / 1024 / 1024, path)) localtts = time.time() except OSError as ex: @@ -115,6 +115,8 @@ def get_osstats(fpath, excludes): for excl_patt in excludes: if fnmatch.fnmatch(fpath, excl_patt): to_skip = True + if not os.path.isfile(fpath): + to_skip = True if to_skip: if VERBOSE > 0: log("Based on exclude rules, we skip: %s" % fpath) @@ -136,9 +138,9 @@ def get_osstats(fpath, excludes): class CRCDB: """The DB class with 2 tables: cksum and params. - cksum is the main DB where we store indes and associated chckesums + cksum is the main DB where we store inodes and associated cksums params is a key-pair values table. Currently we store: - rootpath: the path from where we perform an anlysis + rootpath: the path from where we perform an analysis filesystem id: the id of the targeted filesystem """ def __init__(self, fpathname, commitlimit=30): @@ -249,9 +251,9 @@ class CRCDB: def analyze(rootpath, excludes=[]): - """ananlyze rootath and all sub-folders. - if a DB exists, it compare the checksum of the associated inode with what we have in the DB for this inode - if a DB does not exists it store the checksum associated to the inode + """analyze rootath and all sub-folders. + if a DB exists, it compare the cksum of the associated inode with what we have in the DB for this inode + if a DB does not exists it store the check-sum associated to the inode """ dbpath = os.path.join(rootpath, DB_FILE_NAME) DB = CRCDB(dbpath, COMMIT_LIMIT) @@ -310,11 +312,11 @@ def analyze(rootpath, excludes=[]): log("bit ERROR for file %s" % (fpath)) log(" Previous:") log(" scan was on %s" % time.strftime("%c", time.localtime(db_rec[2]))) - log(" checksum was: %s" % db_rec[1]) + log(" cksum was: %s" % db_rec[1]) log(" mtime was: %s" % time.strftime("%c", time.localtime(db_rec[0]))) log(" Current:") log(" scan on %s" % time.strftime("%c", time.localtime(BATCHID))) - log(" checksum is: %s" % cksum['crc']) + log(" cksum is: %s" % cksum['crc']) log(" mtime is: %s" % time.strftime("%c", time.localtime(osstats.st_mtime))) counter_biterror += 1 if cksum and cksum['crc']: @@ -326,7 +328,7 @@ def analyze(rootpath, excludes=[]): log("%s files added" % counter_added) log("%s files updates" % counter_update) log("%s files error" % counter_biterror) - log("%s files analysed in %.2f sec, %.3f GB" % (counter, time.time() - BATCHID, total_size / 1024 / 1024 / 1024)) + log("%s files analyzed in %.2f sec, %.3f GB" % (counter, time.time() - BATCHID, total_size / 1024 / 1024 / 1024)) log("%s entries in the DB" % records) if os.name == 'posix' and not DRY_RUN: os.chmod(dbpath, stat.S_IRUSR | stat.S_IWUSR) @@ -337,7 +339,7 @@ def analyze(rootpath, excludes=[]): def force_db(fpath, rootpath, excludes=[]): - """This udate the DB record for this inode""" + """This update the DB record for this inode""" dbpath = os.path.join(rootpath, DB_FILE_NAME) DB = CRCDB(dbpath, COMMIT_LIMIT) log("DB stored on: %s" % (dbpath)) @@ -366,7 +368,7 @@ def force_db(fpath, rootpath, excludes=[]): return 2 stats = get_cksum(fpath, osstats, "update") DB.update_rec(osstats.st_ino, stats) - log("checkcum calculated and stored in the DB") + log("check-sum calculated and stored in the DB") DB.close() return 0 @@ -381,7 +383,7 @@ if __name__ == "__main__": help='number of DB actions before committing them. Default is %s' % COMMIT_LIMIT) parser.add_argument( '-p', '--path', type=str, default='.', - help='Path to analyse. Default is "."') + help='Path to analyze. Default is "."') parser.add_argument( '-e', '--exclude', type=str, default='', help='file types to exclude with the fnmath format. For example *.core,*.tmp. Default is ""') @@ -393,10 +395,10 @@ if __name__ == "__main__": help='perform the task, but do not update the DB') parser.add_argument( '-L', '--log', type=str, default='', - help='put mesage in the log instead to stdout') + help='put message in the log instead to stdout') parser.add_argument( '-f', '--force', type=str, default='', - help='Force checksum for a specific file') + help='Force check-sum for a specific file') args = parser.parse_args() path = args.path if args.log: