commit 68d6447833a86ac1016d7277248c45481ea1622b from: vincent.delft date: Fri Oct 19 19:13:51 2018 UTC add --force argument commit - 8dcec1caa7894c07ae94278167d3a86ee6892589 commit + 68d6447833a86ac1016d7277248c45481ea1622b blob - 6aae36deb5b4b79d0d89b9caf617e171b6be7791 (mode 644) blob + /dev/null --- yabitrot.py +++ /dev/null @@ -1,297 +0,0 @@ -#!/usr/local/bin/python3.6 -u -# -*- coding:Utf-8 -*- - - -""" -Author : Vincent -Version : 0.2 -Licence : BSD -Require : OpenBSD - python >= 3.6 - use sqlite3 DB embedded with python package - -Description : This tool allow you to calculate a checksum for each files in the target folder - Those values are stored in an sqlite DB at the root of your targetted folder - This program use INODE as key instead of filename, so it can manage hardlinks - Since that, the scrip does never go outside the targetted filesystem - it works on openBSD, but should work on any systems (OSX, Windows and Linux) - - -/* - * Copyright (c) 2018 Vincent Delft - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ -""" - - -import zlib -import time -import os.path -import sqlite3 -import sys -import argparse -import fnmatch -import errno -import stat - -DEFAULT_CHUNK_SIZE = 16384 -COMMIT_LIMIT = 30 -BATCHID = time.time() -VERBOSE = 0 -LOGFILE = "" -DRY_RUN = False - - -def log(text): - if LOGFILE: - tts = time.strftime("%c", time.localtime()) - fid = open(LOGFILE, 'a') - fid.write("%s: %s\n" % (tts, text)) - fid.close() - else: - sys.stdout.write(text + "\n") - sys.stdout.flush() - - -def print_err(text): - if LOGFILE: - tts = time.strftime("%c", time.localtime()) - fid = open(LOGFILE, 'a') - fid.write("%s: %s\n" % (tts, text)) - fid.close() - sys.stderr.write(text + "\n") - sys.stderr.flush() - - -def get_stats(path, osstats, status, chunk_size=DEFAULT_CHUNK_SIZE): - localtts = time.time() - crc = 0 - stats = {'crc': None, 'mtime': None} - stats['mtime'] = osstats.st_mtime - try: - with open(path, 'rb') as f: - d = f.read(chunk_size) - while d: - crc += zlib.adler32(d) - d = f.read(chunk_size) - if VERBOSE > 0 and time.time() - localtts > 10: - log("big file: %s, inode: %s, size: %.2f MB, %s" % (status, osstats.st_ino, osstats.st_size / 1024 / 1024, path)) - localtts = time.time() - except OSError as ex: - if ex.errno in [errno.EACCES, errno.EOPNOTSUPP]: - print_err("Failed to read:%s" % path) - else: - raise - stats['crc'] = "%s" % crc - return stats - - -class CRCDB: - def __init__(self, fpathname, commitlimit=30): - self.counter = 0 - self.tts = time.time() - self.commitlimit = commitlimit - if os.path.exists(fpathname): - self.conn = sqlite3.connect(fpathname) - self.cur = self.conn.cursor() - tables = set(t for t, in self.cur.execute('SELECT name FROM sqlite_master')) - if 'cksum' not in tables: - self._create_db(fpathname) - else: - self._create_db(fpathname) - - def _create_db(self, fpathname): - self.conn = sqlite3.connect(fpathname) - self.cur = self.conn.cursor() - self.cur.execute("""CREATE TABLE cksum ( - inode INTEGER PRIMARY KEY, - mtime REAL, - hash TEXT, - timestamp REAL)""") - self.conn.commit() - - def get_rec(self, inode): - self.cur.execute('SELECT mtime, hash, timestamp FROM cksum WHERE ' - 'inode=?', (inode,)) - ret = self.cur.fetchone() - if ret and not DRY_RUN: - self.cur.execute('UPDATE cksum SET timestamp=? WHERE inode=?', (BATCHID, inode)) - self.commit() - return ret - return None - - def update_rec(self, inode, stats): - if not DRY_RUN: - self.cur.execute('UPDATE cksum SET mtime=?, hash=?, timestamp=? ' - 'WHERE inode=?', - (stats['mtime'], stats['crc'], BATCHID, inode)) - self.commit() - - def add_rec(self, inode, stats): - if not DRY_RUN: - self.cur.execute('INSERT INTO cksum VALUES (?, ?, ?, ?)', - (inode, stats['mtime'], stats['crc'], BATCHID)) - self.commit() - - def remove_rec(self, inode): - if not DRY_RUN: - self.cur.execute('DELETE FROM cksum WHERE inode=?', (inode,)) - self.commit() - - def commit(self): - self.counter += 1 - if time.time() - self.tts > self.commitlimit: - self.conn.commit() - if VERBOSE > 0: - log('commit %s files in %.2f sec' % (self.counter, time.time() - self.tts)) - self.tts = time.time() - self.counter = 0 - - def close(self): - self.conn.commit() - self.conn.close() - - def cleanup(self): - self.cur.execute('SELECT inode FROM cksum WHERE timestamp != ?', (BATCHID,)) - ret = self.cur.fetchall() - if ret: - if DRY_RUN: - log("%s files could be removed" % (len(ret))) - else: - log("%s files removed from DB" % len(ret)) - self.cur.execute('DELETE from cksum WHERE timestamp !=?', (BATCHID,)) - else: - log("No cleanup required") - - def count(self): - self.cur.execute("SELECT count(*) from cksum") - return self.cur.fetchone() - - -def analyze(rootpath, excludes=[]): - dbpath = os.path.join(rootpath, ".cksum.db") - DB = CRCDB(dbpath, COMMIT_LIMIT) - log("DB stored on: %s" % (dbpath)) - excludes.append('.cksum.db') - counter = 0 - counter_added = 0 - counter_update = 0 - counter_biterror = 0 - total_size = 0 - filesystemid = os.stat(rootpath).st_dev - log("Device ID:%s" % filesystemid) - analyze_tts = time.time() - for path, dummy, files in os.walk(rootpath): - for elem in files: - to_skip = False - for excl_patt in excludes: - if fnmatch.fnmatch(elem, excl_patt): - to_skip = True - if to_skip: - continue - fpath = os.path.join(path, elem) - if VERBOSE > 1 and time.time() - analyze_tts > COMMIT_LIMIT: - log("working with:", fpath) - analyze_tts = time.time() - try: - osstats = os.stat(fpath) - except OSError as ex: - if ex.errno in [errno.EACCES, errno.EOPNOTSUPP, errno.ENOENT]: - osstats = None - else: - raise - if osstats is None: - log("os.stat fails for: %s" % fpath) - continue - if osstats.st_dev != filesystemid: - continue - if not stat.S_ISREG(osstats.st_mode): - if VERBOSE > 0: - log("Not a regular file: %s" % fpath) - continue - counter += 1 - db_rec = DB.get_rec(osstats.st_ino) - stats = None - if db_rec is None: - stats = get_stats(fpath, osstats, "new") - DB.add_rec(osstats.st_ino, stats) - counter_added += 1 - else: - if db_rec[2] != BATCHID: - stats = get_stats(fpath, osstats, "update") - if db_rec[0] != osstats.st_mtime: - DB.update_rec(osstats.st_ino, stats) - counter_update += 1 - elif db_rec[1] != stats['crc']: - log("bit ERROR for file %s" % (fpath)) - log("Previous scan was on %s" % time.strftime("%c", time.localtime(db_rec[0]))) - counter_biterror += 1 - if stats and stats['crc']: - total_size += osstats.st_size - log("\n") - DB.cleanup() - records = DB.count() - DB.close() - print_err("%s files added" % counter_added) - print_err("%s files updates" % counter_update) - print_err("%s files error" % counter_biterror) - print_err("%s files analysed in %.2f sec, %.3f GB" % (counter, time.time() - BATCHID, total_size / 1024 / 1024 / 1024)) - print_err("%s entries in the DB" % records) - if os.name == 'posix' and not DRY_RUN: - os.chmod(dbpath, stat.S_IRUSR | stat.S_IWUSR) - os.chown(dbpath, os.getuid(), os.getgid()) - if counter_biterror > 0: - print_err("Several bit error, plese check the log file") - sys.exit(counter_biterror) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - '-s', '--chunk-size', type=int, default=DEFAULT_CHUNK_SIZE, - help='read files this many bytes at a time. Default is %s' % DEFAULT_CHUNK_SIZE) - parser.add_argument( - '-c', '--commit-limit', type=int, default=COMMIT_LIMIT, - help='number of DB actions before committing them. Default is %s' % COMMIT_LIMIT) - parser.add_argument( - '-p', '--path', type=str, default='.', - help='Path to analyse. Default is "."') - parser.add_argument( - '-e', '--exclude', type=str, default='', - help='file types to exclude with the fnmath format. For example *.core,*.tmp. Default is ""') - parser.add_argument( - '-v', '--verbose', type=int, default=0, - help='verbosity level, currently from 0 to 2. Default is 0') - parser.add_argument( - '-n', '--dry-run', action='store_true', - help='perform the task, but do not update the DB') - parser.add_argument( - '-L', '--log', type=str, default='', - help='put mesage in the log instead to stdout') - args = parser.parse_args() - path = args.path - if args.log: - LOGFILE = args.log - if args.verbose: - VERBOSE = args.verbose - if args.chunk_size: - DEFAULT_CHUNK_SIZE = args.chunk_size - if args.commit_limit: - COMMIT_LIMIT = args.commit_limit - if args.dry_run: - DRY_RUN = True - to_exclude = [] - if args.exclude: - to_exclude = args.exclude.split(",") - analyze(path, to_exclude) blob - /dev/null blob + 7e47e2712708a9d2cc809a340cb1d09117a0f037 (mode 755) --- /dev/null +++ yabitrot @@ -0,0 +1,339 @@ +#!/usr/local/bin/python3.6 -u +# -*- coding:Utf-8 -*- + + +""" +Author : Vincent +Version : 0.2 +Licence : BSD +Require : python >= 3.6 + use sqlite3 DB embedded with python package +Developed on: OpenBSD +Tested on : OpenBSD, Windows +Description : This tool allow you to calculate a checksum for each files in the target folder + Those values are stored in an sqlite DB at the root of your targetted folder + This program use INODE as key instead of filename, so it can manage hardlinks + Since that, the scrip does never go outside the targetted filesystem + it works on openBSD, but should work on any systems (OSX, Windows and Linux) + + +/* + * Copyright (c) 2018 Vincent Delft + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +""" + + +import zlib +import time +import os.path +import sqlite3 +import sys +import argparse +import fnmatch +import errno +import stat + +DEFAULT_CHUNK_SIZE = 16384 +COMMIT_LIMIT = 30 +BATCHID = time.time() +VERBOSE = 0 +LOGFILE = "" +DRY_RUN = False + + +def log(text): + if LOGFILE: + tts = time.strftime("%c", time.localtime()) + fid = open(LOGFILE, 'a') + fid.write("%s: %s\n" % (tts, text)) + fid.close() + else: + sys.stdout.write(text + "\n") + sys.stdout.flush() + + +def print_err(text): + if LOGFILE: + tts = time.strftime("%c", time.localtime()) + fid = open(LOGFILE, 'a') + fid.write("%s: %s\n" % (tts, text)) + fid.close() + sys.stderr.write(text + "\n") + sys.stderr.flush() + + +def get_stats(path, osstats, status, chunk_size=DEFAULT_CHUNK_SIZE): + localtts = time.time() + crc = 0 + stats = {'crc': None, 'mtime': None} + stats['mtime'] = osstats.st_mtime + try: + with open(path, 'rb') as f: + d = f.read(chunk_size) + while d: + crc += zlib.adler32(d) + d = f.read(chunk_size) + if VERBOSE > 0 and time.time() - localtts > 10: + log("big file: %s, inode: %s, size: %.2f MB, %s" % (status, osstats.st_ino, osstats.st_size / 1024 / 1024, path)) + localtts = time.time() + except OSError as ex: + if ex.errno in [errno.EACCES, errno.EOPNOTSUPP]: + print_err("Failed to read:%s" % path) + else: + raise + stats['crc'] = "%s" % crc + return stats + + +class CRCDB: + def __init__(self, fpathname, commitlimit=30): + self.counter = 0 + self.tts = time.time() + self.commitlimit = commitlimit + if os.path.exists(fpathname): + self.conn = sqlite3.connect(fpathname) + self.cur = self.conn.cursor() + tables = set(t for t, in self.cur.execute('SELECT name FROM sqlite_master')) + if 'cksum' not in tables: + self._create_db(fpathname) + else: + self._create_db(fpathname) + + def _create_db(self, fpathname): + self.conn = sqlite3.connect(fpathname) + self.cur = self.conn.cursor() + self.cur.execute("""CREATE TABLE cksum ( + inode INTEGER PRIMARY KEY, + mtime REAL, + hash TEXT, + timestamp REAL)""") + self.conn.commit() + + def get_rec(self, inode): + self.cur.execute('SELECT mtime, hash, timestamp FROM cksum WHERE ' + 'inode=?', (inode,)) + ret = self.cur.fetchone() + if ret and not DRY_RUN: + self.cur.execute('UPDATE cksum SET timestamp=? WHERE inode=?', (BATCHID, inode)) + self.commit() + return ret + return None + + def update_rec(self, inode, stats): + if not DRY_RUN: + self.cur.execute('UPDATE cksum SET mtime=?, hash=?, timestamp=? ' + 'WHERE inode=?', + (stats['mtime'], stats['crc'], BATCHID, inode)) + self.commit() + + def add_rec(self, inode, stats): + if not DRY_RUN: + self.cur.execute('INSERT INTO cksum VALUES (?, ?, ?, ?)', + (inode, stats['mtime'], stats['crc'], BATCHID)) + self.commit() + + def remove_rec(self, inode): + if not DRY_RUN: + self.cur.execute('DELETE FROM cksum WHERE inode=?', (inode,)) + self.commit() + + def commit(self): + self.counter += 1 + if time.time() - self.tts > self.commitlimit: + self.conn.commit() + if VERBOSE > 0: + log('commit %s files in %.2f sec' % (self.counter, time.time() - self.tts)) + self.tts = time.time() + self.counter = 0 + + def close(self): + self.conn.commit() + self.conn.close() + + def cleanup(self): + self.cur.execute('SELECT inode FROM cksum WHERE timestamp != ?', (BATCHID,)) + ret = self.cur.fetchall() + if ret: + if DRY_RUN: + log("%s files could be removed" % (len(ret))) + else: + log("%s files removed from DB" % len(ret)) + self.cur.execute('DELETE from cksum WHERE timestamp !=?', (BATCHID,)) + else: + log("No cleanup required") + + def count(self): + self.cur.execute("SELECT count(*) from cksum") + return self.cur.fetchone() + + +def analyze(rootpath, excludes=[]): + dbpath = os.path.join(rootpath, ".cksum.db") + DB = CRCDB(dbpath, COMMIT_LIMIT) + log("DB stored on: %s" % (dbpath)) + excludes.append('.cksum.db') + counter = 0 + counter_added = 0 + counter_update = 0 + counter_biterror = 0 + total_size = 0 + filesystemid = os.stat(rootpath).st_dev + log("Device ID:%s" % filesystemid) + analyze_tts = time.time() + for path, dummy, files in os.walk(rootpath): + for elem in files: + to_skip = False + for excl_patt in excludes: + if fnmatch.fnmatch(elem, excl_patt): + to_skip = True + if to_skip: + continue + fpath = os.path.join(path, elem) + if VERBOSE > 1 and time.time() - analyze_tts > COMMIT_LIMIT: + log("working with:", fpath) + analyze_tts = time.time() + try: + osstats = os.stat(fpath) + except OSError as ex: + if ex.errno in [errno.EACCES, errno.EOPNOTSUPP, errno.ENOENT]: + osstats = None + else: + raise + if osstats is None: + log("os.stat fails for: %s" % fpath) + continue + if osstats.st_dev != filesystemid: + continue + if not stat.S_ISREG(osstats.st_mode): + if VERBOSE > 0: + log("Not a regular file: %s" % fpath) + continue + counter += 1 + db_rec = DB.get_rec(osstats.st_ino) + stats = None + if db_rec is None: + stats = get_stats(fpath, osstats, "new") + DB.add_rec(osstats.st_ino, stats) + counter_added += 1 + else: + if db_rec[2] != BATCHID: + stats = get_stats(fpath, osstats, "update") + if db_rec[0] != osstats.st_mtime: + DB.update_rec(osstats.st_ino, stats) + counter_update += 1 + elif db_rec[1] != stats['crc']: + log("bit ERROR for file %s" % (fpath)) + log(" Previous:") + log(" scan was on %s" % time.strftime("%c", time.localtime(db_rec[0]))) + log(" checksum was: %s" % db_rec[1]) + log(" Current:") + log(" scan on %s" % time.strftime("%c", time.localtime(BATCHID))) + log(" checksum is: %s" % stats['crc']) + counter_biterror += 1 + if stats and stats['crc']: + total_size += osstats.st_size + log("\n") + DB.cleanup() + records = DB.count() + DB.close() + print_err("%s files added" % counter_added) + print_err("%s files updates" % counter_update) + print_err("%s files error" % counter_biterror) + print_err("%s files analysed in %.2f sec, %.3f GB" % (counter, time.time() - BATCHID, total_size / 1024 / 1024 / 1024)) + print_err("%s entries in the DB" % records) + if os.name == 'posix' and not DRY_RUN: + os.chmod(dbpath, stat.S_IRUSR | stat.S_IWUSR) + os.chown(dbpath, os.getuid(), os.getgid()) + if counter_biterror > 0: + print_err("Several bit error, plese check the log file") + sys.exit(counter_biterror) + +def force_db(fpath, rootpath, excludes = []): + dbpath = os.path.join(rootpath, ".cksum.db") + DB = CRCDB(dbpath, COMMIT_LIMIT) + log("DB stored on: %s" % (dbpath)) + excludes.append('.cksum.db') + filesystemid = os.stat(rootpath).st_dev + log("Device ID:%s" % filesystemid) + to_skip = False + for excl_patt in excludes: + if fnmatch.fnmatch(fpath, excl_patt): + to_skip = True + if to_skip: + print_err("The file you want is in the exclude list") + print_err("File name is: %s" % fpath) + print_err("Exclude list is: %s" % ",".join(excludes)) + return 1 + try: + osstats = os.stat(fpath) + except OSError as ex: + if ex.errno in [errno.EACCES, errno.EOPNOTSUPP, errno.ENOENT]: + osstats = None + else: + raise + if osstats is None: + log("os.stat fails for: %s" % fpath) + return 2 + stats = get_stats(fpath, osstats, "update") + DB.update_rec(osstats.st_ino, stats) + log("checkcum calculated and stored in the DB") + DB.close() + return 0 + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + '-s', '--chunk-size', type=int, default=DEFAULT_CHUNK_SIZE, + help='read files this many bytes at a time. Default is %s' % DEFAULT_CHUNK_SIZE) + parser.add_argument( + '-c', '--commit-limit', type=int, default=COMMIT_LIMIT, + help='number of DB actions before committing them. Default is %s' % COMMIT_LIMIT) + parser.add_argument( + '-p', '--path', type=str, default='.', + help='Path to analyse. Default is "."') + parser.add_argument( + '-e', '--exclude', type=str, default='', + help='file types to exclude with the fnmath format. For example *.core,*.tmp. Default is ""') + parser.add_argument( + '-v', '--verbose', type=int, default=0, + help='verbosity level, currently from 0 to 2. Default is 0') + parser.add_argument( + '-n', '--dry-run', action='store_true', + help='perform the task, but do not update the DB') + parser.add_argument( + '-L', '--log', type=str, default='', + help='put mesage in the log instead to stdout') + parser.add_argument( + '-f', '--force', type=str, default='', + help='Force checksum for a specific file') + args = parser.parse_args() + path = args.path + if args.log: + LOGFILE = args.log + if args.verbose: + VERBOSE = args.verbose + if args.chunk_size: + DEFAULT_CHUNK_SIZE = args.chunk_size + if args.commit_limit: + COMMIT_LIMIT = args.commit_limit + if args.dry_run: + DRY_RUN = True + to_exclude = [] + if args.exclude: + to_exclude = args.exclude.split(",") + if args.force: + ret = force_db(args.force, path, to_exclude) + sys.exit(ret) + analyze(path, to_exclude)