commit - 8dcec1caa7894c07ae94278167d3a86ee6892589
commit + 68d6447833a86ac1016d7277248c45481ea1622b
blob - 6aae36deb5b4b79d0d89b9caf617e171b6be7791 (mode 644)
blob + /dev/null
--- yabitrot.py
+++ /dev/null
-#!/usr/local/bin/python3.6 -u
-# -*- coding:Utf-8 -*-
-
-
-"""
-Author : Vincent <vincent.delft@gmail.com>
-Version : 0.2
-Licence : BSD
-Require : OpenBSD
- python >= 3.6
- use sqlite3 DB embedded with python package
-
-Description : This tool allow you to calculate a checksum for each files in the target folder
- Those values are stored in an sqlite DB at the root of your targetted folder
- This program use INODE as key instead of filename, so it can manage hardlinks
- Since that, the scrip does never go outside the targetted filesystem
- it works on openBSD, but should work on any systems (OSX, Windows and Linux)
-
-
-/*
- * Copyright (c) 2018 Vincent Delft <vincent.delf@gmail.com>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-"""
-
-
-import zlib
-import time
-import os.path
-import sqlite3
-import sys
-import argparse
-import fnmatch
-import errno
-import stat
-
-DEFAULT_CHUNK_SIZE = 16384
-COMMIT_LIMIT = 30
-BATCHID = time.time()
-VERBOSE = 0
-LOGFILE = ""
-DRY_RUN = False
-
-
-def log(text):
- if LOGFILE:
- tts = time.strftime("%c", time.localtime())
- fid = open(LOGFILE, 'a')
- fid.write("%s: %s\n" % (tts, text))
- fid.close()
- else:
- sys.stdout.write(text + "\n")
- sys.stdout.flush()
-
-
-def print_err(text):
- if LOGFILE:
- tts = time.strftime("%c", time.localtime())
- fid = open(LOGFILE, 'a')
- fid.write("%s: %s\n" % (tts, text))
- fid.close()
- sys.stderr.write(text + "\n")
- sys.stderr.flush()
-
-
-def get_stats(path, osstats, status, chunk_size=DEFAULT_CHUNK_SIZE):
- localtts = time.time()
- crc = 0
- stats = {'crc': None, 'mtime': None}
- stats['mtime'] = osstats.st_mtime
- try:
- with open(path, 'rb') as f:
- d = f.read(chunk_size)
- while d:
- crc += zlib.adler32(d)
- d = f.read(chunk_size)
- if VERBOSE > 0 and time.time() - localtts > 10:
- log("big file: %s, inode: %s, size: %.2f MB, %s" % (status, osstats.st_ino, osstats.st_size / 1024 / 1024, path))
- localtts = time.time()
- except OSError as ex:
- if ex.errno in [errno.EACCES, errno.EOPNOTSUPP]:
- print_err("Failed to read:%s" % path)
- else:
- raise
- stats['crc'] = "%s" % crc
- return stats
-
-
-class CRCDB:
- def __init__(self, fpathname, commitlimit=30):
- self.counter = 0
- self.tts = time.time()
- self.commitlimit = commitlimit
- if os.path.exists(fpathname):
- self.conn = sqlite3.connect(fpathname)
- self.cur = self.conn.cursor()
- tables = set(t for t, in self.cur.execute('SELECT name FROM sqlite_master'))
- if 'cksum' not in tables:
- self._create_db(fpathname)
- else:
- self._create_db(fpathname)
-
- def _create_db(self, fpathname):
- self.conn = sqlite3.connect(fpathname)
- self.cur = self.conn.cursor()
- self.cur.execute("""CREATE TABLE cksum (
- inode INTEGER PRIMARY KEY,
- mtime REAL,
- hash TEXT,
- timestamp REAL)""")
- self.conn.commit()
-
- def get_rec(self, inode):
- self.cur.execute('SELECT mtime, hash, timestamp FROM cksum WHERE '
- 'inode=?', (inode,))
- ret = self.cur.fetchone()
- if ret and not DRY_RUN:
- self.cur.execute('UPDATE cksum SET timestamp=? WHERE inode=?', (BATCHID, inode))
- self.commit()
- return ret
- return None
-
- def update_rec(self, inode, stats):
- if not DRY_RUN:
- self.cur.execute('UPDATE cksum SET mtime=?, hash=?, timestamp=? '
- 'WHERE inode=?',
- (stats['mtime'], stats['crc'], BATCHID, inode))
- self.commit()
-
- def add_rec(self, inode, stats):
- if not DRY_RUN:
- self.cur.execute('INSERT INTO cksum VALUES (?, ?, ?, ?)',
- (inode, stats['mtime'], stats['crc'], BATCHID))
- self.commit()
-
- def remove_rec(self, inode):
- if not DRY_RUN:
- self.cur.execute('DELETE FROM cksum WHERE inode=?', (inode,))
- self.commit()
-
- def commit(self):
- self.counter += 1
- if time.time() - self.tts > self.commitlimit:
- self.conn.commit()
- if VERBOSE > 0:
- log('commit %s files in %.2f sec' % (self.counter, time.time() - self.tts))
- self.tts = time.time()
- self.counter = 0
-
- def close(self):
- self.conn.commit()
- self.conn.close()
-
- def cleanup(self):
- self.cur.execute('SELECT inode FROM cksum WHERE timestamp != ?', (BATCHID,))
- ret = self.cur.fetchall()
- if ret:
- if DRY_RUN:
- log("%s files could be removed" % (len(ret)))
- else:
- log("%s files removed from DB" % len(ret))
- self.cur.execute('DELETE from cksum WHERE timestamp !=?', (BATCHID,))
- else:
- log("No cleanup required")
-
- def count(self):
- self.cur.execute("SELECT count(*) from cksum")
- return self.cur.fetchone()
-
-
-def analyze(rootpath, excludes=[]):
- dbpath = os.path.join(rootpath, ".cksum.db")
- DB = CRCDB(dbpath, COMMIT_LIMIT)
- log("DB stored on: %s" % (dbpath))
- excludes.append('.cksum.db')
- counter = 0
- counter_added = 0
- counter_update = 0
- counter_biterror = 0
- total_size = 0
- filesystemid = os.stat(rootpath).st_dev
- log("Device ID:%s" % filesystemid)
- analyze_tts = time.time()
- for path, dummy, files in os.walk(rootpath):
- for elem in files:
- to_skip = False
- for excl_patt in excludes:
- if fnmatch.fnmatch(elem, excl_patt):
- to_skip = True
- if to_skip:
- continue
- fpath = os.path.join(path, elem)
- if VERBOSE > 1 and time.time() - analyze_tts > COMMIT_LIMIT:
- log("working with:", fpath)
- analyze_tts = time.time()
- try:
- osstats = os.stat(fpath)
- except OSError as ex:
- if ex.errno in [errno.EACCES, errno.EOPNOTSUPP, errno.ENOENT]:
- osstats = None
- else:
- raise
- if osstats is None:
- log("os.stat fails for: %s" % fpath)
- continue
- if osstats.st_dev != filesystemid:
- continue
- if not stat.S_ISREG(osstats.st_mode):
- if VERBOSE > 0:
- log("Not a regular file: %s" % fpath)
- continue
- counter += 1
- db_rec = DB.get_rec(osstats.st_ino)
- stats = None
- if db_rec is None:
- stats = get_stats(fpath, osstats, "new")
- DB.add_rec(osstats.st_ino, stats)
- counter_added += 1
- else:
- if db_rec[2] != BATCHID:
- stats = get_stats(fpath, osstats, "update")
- if db_rec[0] != osstats.st_mtime:
- DB.update_rec(osstats.st_ino, stats)
- counter_update += 1
- elif db_rec[1] != stats['crc']:
- log("bit ERROR for file %s" % (fpath))
- log("Previous scan was on %s" % time.strftime("%c", time.localtime(db_rec[0])))
- counter_biterror += 1
- if stats and stats['crc']:
- total_size += osstats.st_size
- log("\n")
- DB.cleanup()
- records = DB.count()
- DB.close()
- print_err("%s files added" % counter_added)
- print_err("%s files updates" % counter_update)
- print_err("%s files error" % counter_biterror)
- print_err("%s files analysed in %.2f sec, %.3f GB" % (counter, time.time() - BATCHID, total_size / 1024 / 1024 / 1024))
- print_err("%s entries in the DB" % records)
- if os.name == 'posix' and not DRY_RUN:
- os.chmod(dbpath, stat.S_IRUSR | stat.S_IWUSR)
- os.chown(dbpath, os.getuid(), os.getgid())
- if counter_biterror > 0:
- print_err("Several bit error, plese check the log file")
- sys.exit(counter_biterror)
-
-
-if __name__ == "__main__":
- parser = argparse.ArgumentParser()
- parser.add_argument(
- '-s', '--chunk-size', type=int, default=DEFAULT_CHUNK_SIZE,
- help='read files this many bytes at a time. Default is %s' % DEFAULT_CHUNK_SIZE)
- parser.add_argument(
- '-c', '--commit-limit', type=int, default=COMMIT_LIMIT,
- help='number of DB actions before committing them. Default is %s' % COMMIT_LIMIT)
- parser.add_argument(
- '-p', '--path', type=str, default='.',
- help='Path to analyse. Default is "."')
- parser.add_argument(
- '-e', '--exclude', type=str, default='',
- help='file types to exclude with the fnmath format. For example *.core,*.tmp. Default is ""')
- parser.add_argument(
- '-v', '--verbose', type=int, default=0,
- help='verbosity level, currently from 0 to 2. Default is 0')
- parser.add_argument(
- '-n', '--dry-run', action='store_true',
- help='perform the task, but do not update the DB')
- parser.add_argument(
- '-L', '--log', type=str, default='',
- help='put mesage in the log instead to stdout')
- args = parser.parse_args()
- path = args.path
- if args.log:
- LOGFILE = args.log
- if args.verbose:
- VERBOSE = args.verbose
- if args.chunk_size:
- DEFAULT_CHUNK_SIZE = args.chunk_size
- if args.commit_limit:
- COMMIT_LIMIT = args.commit_limit
- if args.dry_run:
- DRY_RUN = True
- to_exclude = []
- if args.exclude:
- to_exclude = args.exclude.split(",")
- analyze(path, to_exclude)
blob - /dev/null
blob + 7e47e2712708a9d2cc809a340cb1d09117a0f037 (mode 755)
--- /dev/null
+++ yabitrot
+#!/usr/local/bin/python3.6 -u
+# -*- coding:Utf-8 -*-
+
+
+"""
+Author : Vincent <vincent.delft@gmail.com>
+Version : 0.2
+Licence : BSD
+Require : python >= 3.6
+ use sqlite3 DB embedded with python package
+Developed on: OpenBSD
+Tested on : OpenBSD, Windows
+Description : This tool allow you to calculate a checksum for each files in the target folder
+ Those values are stored in an sqlite DB at the root of your targetted folder
+ This program use INODE as key instead of filename, so it can manage hardlinks
+ Since that, the scrip does never go outside the targetted filesystem
+ it works on openBSD, but should work on any systems (OSX, Windows and Linux)
+
+
+/*
+ * Copyright (c) 2018 Vincent Delft <vincent.delf@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+"""
+
+
+import zlib
+import time
+import os.path
+import sqlite3
+import sys
+import argparse
+import fnmatch
+import errno
+import stat
+
+DEFAULT_CHUNK_SIZE = 16384
+COMMIT_LIMIT = 30
+BATCHID = time.time()
+VERBOSE = 0
+LOGFILE = ""
+DRY_RUN = False
+
+
+def log(text):
+ if LOGFILE:
+ tts = time.strftime("%c", time.localtime())
+ fid = open(LOGFILE, 'a')
+ fid.write("%s: %s\n" % (tts, text))
+ fid.close()
+ else:
+ sys.stdout.write(text + "\n")
+ sys.stdout.flush()
+
+
+def print_err(text):
+ if LOGFILE:
+ tts = time.strftime("%c", time.localtime())
+ fid = open(LOGFILE, 'a')
+ fid.write("%s: %s\n" % (tts, text))
+ fid.close()
+ sys.stderr.write(text + "\n")
+ sys.stderr.flush()
+
+
+def get_stats(path, osstats, status, chunk_size=DEFAULT_CHUNK_SIZE):
+ localtts = time.time()
+ crc = 0
+ stats = {'crc': None, 'mtime': None}
+ stats['mtime'] = osstats.st_mtime
+ try:
+ with open(path, 'rb') as f:
+ d = f.read(chunk_size)
+ while d:
+ crc += zlib.adler32(d)
+ d = f.read(chunk_size)
+ if VERBOSE > 0 and time.time() - localtts > 10:
+ log("big file: %s, inode: %s, size: %.2f MB, %s" % (status, osstats.st_ino, osstats.st_size / 1024 / 1024, path))
+ localtts = time.time()
+ except OSError as ex:
+ if ex.errno in [errno.EACCES, errno.EOPNOTSUPP]:
+ print_err("Failed to read:%s" % path)
+ else:
+ raise
+ stats['crc'] = "%s" % crc
+ return stats
+
+
+class CRCDB:
+ def __init__(self, fpathname, commitlimit=30):
+ self.counter = 0
+ self.tts = time.time()
+ self.commitlimit = commitlimit
+ if os.path.exists(fpathname):
+ self.conn = sqlite3.connect(fpathname)
+ self.cur = self.conn.cursor()
+ tables = set(t for t, in self.cur.execute('SELECT name FROM sqlite_master'))
+ if 'cksum' not in tables:
+ self._create_db(fpathname)
+ else:
+ self._create_db(fpathname)
+
+ def _create_db(self, fpathname):
+ self.conn = sqlite3.connect(fpathname)
+ self.cur = self.conn.cursor()
+ self.cur.execute("""CREATE TABLE cksum (
+ inode INTEGER PRIMARY KEY,
+ mtime REAL,
+ hash TEXT,
+ timestamp REAL)""")
+ self.conn.commit()
+
+ def get_rec(self, inode):
+ self.cur.execute('SELECT mtime, hash, timestamp FROM cksum WHERE '
+ 'inode=?', (inode,))
+ ret = self.cur.fetchone()
+ if ret and not DRY_RUN:
+ self.cur.execute('UPDATE cksum SET timestamp=? WHERE inode=?', (BATCHID, inode))
+ self.commit()
+ return ret
+ return None
+
+ def update_rec(self, inode, stats):
+ if not DRY_RUN:
+ self.cur.execute('UPDATE cksum SET mtime=?, hash=?, timestamp=? '
+ 'WHERE inode=?',
+ (stats['mtime'], stats['crc'], BATCHID, inode))
+ self.commit()
+
+ def add_rec(self, inode, stats):
+ if not DRY_RUN:
+ self.cur.execute('INSERT INTO cksum VALUES (?, ?, ?, ?)',
+ (inode, stats['mtime'], stats['crc'], BATCHID))
+ self.commit()
+
+ def remove_rec(self, inode):
+ if not DRY_RUN:
+ self.cur.execute('DELETE FROM cksum WHERE inode=?', (inode,))
+ self.commit()
+
+ def commit(self):
+ self.counter += 1
+ if time.time() - self.tts > self.commitlimit:
+ self.conn.commit()
+ if VERBOSE > 0:
+ log('commit %s files in %.2f sec' % (self.counter, time.time() - self.tts))
+ self.tts = time.time()
+ self.counter = 0
+
+ def close(self):
+ self.conn.commit()
+ self.conn.close()
+
+ def cleanup(self):
+ self.cur.execute('SELECT inode FROM cksum WHERE timestamp != ?', (BATCHID,))
+ ret = self.cur.fetchall()
+ if ret:
+ if DRY_RUN:
+ log("%s files could be removed" % (len(ret)))
+ else:
+ log("%s files removed from DB" % len(ret))
+ self.cur.execute('DELETE from cksum WHERE timestamp !=?', (BATCHID,))
+ else:
+ log("No cleanup required")
+
+ def count(self):
+ self.cur.execute("SELECT count(*) from cksum")
+ return self.cur.fetchone()
+
+
+def analyze(rootpath, excludes=[]):
+ dbpath = os.path.join(rootpath, ".cksum.db")
+ DB = CRCDB(dbpath, COMMIT_LIMIT)
+ log("DB stored on: %s" % (dbpath))
+ excludes.append('.cksum.db')
+ counter = 0
+ counter_added = 0
+ counter_update = 0
+ counter_biterror = 0
+ total_size = 0
+ filesystemid = os.stat(rootpath).st_dev
+ log("Device ID:%s" % filesystemid)
+ analyze_tts = time.time()
+ for path, dummy, files in os.walk(rootpath):
+ for elem in files:
+ to_skip = False
+ for excl_patt in excludes:
+ if fnmatch.fnmatch(elem, excl_patt):
+ to_skip = True
+ if to_skip:
+ continue
+ fpath = os.path.join(path, elem)
+ if VERBOSE > 1 and time.time() - analyze_tts > COMMIT_LIMIT:
+ log("working with:", fpath)
+ analyze_tts = time.time()
+ try:
+ osstats = os.stat(fpath)
+ except OSError as ex:
+ if ex.errno in [errno.EACCES, errno.EOPNOTSUPP, errno.ENOENT]:
+ osstats = None
+ else:
+ raise
+ if osstats is None:
+ log("os.stat fails for: %s" % fpath)
+ continue
+ if osstats.st_dev != filesystemid:
+ continue
+ if not stat.S_ISREG(osstats.st_mode):
+ if VERBOSE > 0:
+ log("Not a regular file: %s" % fpath)
+ continue
+ counter += 1
+ db_rec = DB.get_rec(osstats.st_ino)
+ stats = None
+ if db_rec is None:
+ stats = get_stats(fpath, osstats, "new")
+ DB.add_rec(osstats.st_ino, stats)
+ counter_added += 1
+ else:
+ if db_rec[2] != BATCHID:
+ stats = get_stats(fpath, osstats, "update")
+ if db_rec[0] != osstats.st_mtime:
+ DB.update_rec(osstats.st_ino, stats)
+ counter_update += 1
+ elif db_rec[1] != stats['crc']:
+ log("bit ERROR for file %s" % (fpath))
+ log(" Previous:")
+ log(" scan was on %s" % time.strftime("%c", time.localtime(db_rec[0])))
+ log(" checksum was: %s" % db_rec[1])
+ log(" Current:")
+ log(" scan on %s" % time.strftime("%c", time.localtime(BATCHID)))
+ log(" checksum is: %s" % stats['crc'])
+ counter_biterror += 1
+ if stats and stats['crc']:
+ total_size += osstats.st_size
+ log("\n")
+ DB.cleanup()
+ records = DB.count()
+ DB.close()
+ print_err("%s files added" % counter_added)
+ print_err("%s files updates" % counter_update)
+ print_err("%s files error" % counter_biterror)
+ print_err("%s files analysed in %.2f sec, %.3f GB" % (counter, time.time() - BATCHID, total_size / 1024 / 1024 / 1024))
+ print_err("%s entries in the DB" % records)
+ if os.name == 'posix' and not DRY_RUN:
+ os.chmod(dbpath, stat.S_IRUSR | stat.S_IWUSR)
+ os.chown(dbpath, os.getuid(), os.getgid())
+ if counter_biterror > 0:
+ print_err("Several bit error, plese check the log file")
+ sys.exit(counter_biterror)
+
+def force_db(fpath, rootpath, excludes = []):
+ dbpath = os.path.join(rootpath, ".cksum.db")
+ DB = CRCDB(dbpath, COMMIT_LIMIT)
+ log("DB stored on: %s" % (dbpath))
+ excludes.append('.cksum.db')
+ filesystemid = os.stat(rootpath).st_dev
+ log("Device ID:%s" % filesystemid)
+ to_skip = False
+ for excl_patt in excludes:
+ if fnmatch.fnmatch(fpath, excl_patt):
+ to_skip = True
+ if to_skip:
+ print_err("The file you want is in the exclude list")
+ print_err("File name is: %s" % fpath)
+ print_err("Exclude list is: %s" % ",".join(excludes))
+ return 1
+ try:
+ osstats = os.stat(fpath)
+ except OSError as ex:
+ if ex.errno in [errno.EACCES, errno.EOPNOTSUPP, errno.ENOENT]:
+ osstats = None
+ else:
+ raise
+ if osstats is None:
+ log("os.stat fails for: %s" % fpath)
+ return 2
+ stats = get_stats(fpath, osstats, "update")
+ DB.update_rec(osstats.st_ino, stats)
+ log("checkcum calculated and stored in the DB")
+ DB.close()
+ return 0
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '-s', '--chunk-size', type=int, default=DEFAULT_CHUNK_SIZE,
+ help='read files this many bytes at a time. Default is %s' % DEFAULT_CHUNK_SIZE)
+ parser.add_argument(
+ '-c', '--commit-limit', type=int, default=COMMIT_LIMIT,
+ help='number of DB actions before committing them. Default is %s' % COMMIT_LIMIT)
+ parser.add_argument(
+ '-p', '--path', type=str, default='.',
+ help='Path to analyse. Default is "."')
+ parser.add_argument(
+ '-e', '--exclude', type=str, default='',
+ help='file types to exclude with the fnmath format. For example *.core,*.tmp. Default is ""')
+ parser.add_argument(
+ '-v', '--verbose', type=int, default=0,
+ help='verbosity level, currently from 0 to 2. Default is 0')
+ parser.add_argument(
+ '-n', '--dry-run', action='store_true',
+ help='perform the task, but do not update the DB')
+ parser.add_argument(
+ '-L', '--log', type=str, default='',
+ help='put mesage in the log instead to stdout')
+ parser.add_argument(
+ '-f', '--force', type=str, default='',
+ help='Force checksum for a specific file')
+ args = parser.parse_args()
+ path = args.path
+ if args.log:
+ LOGFILE = args.log
+ if args.verbose:
+ VERBOSE = args.verbose
+ if args.chunk_size:
+ DEFAULT_CHUNK_SIZE = args.chunk_size
+ if args.commit_limit:
+ COMMIT_LIMIT = args.commit_limit
+ if args.dry_run:
+ DRY_RUN = True
+ to_exclude = []
+ if args.exclude:
+ to_exclude = args.exclude.split(",")
+ if args.force:
+ ret = force_db(args.force, path, to_exclude)
+ sys.exit(ret)
+ analyze(path, to_exclude)