X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~mdw/git/rsync-backup/blobdiff_plain/f6b4ffdc7265b79b945e2350efbd9d2a94df4450..HEAD:/fshash.in diff --git a/fshash.in b/fshash.in index 1dbd8be..dcfd229 100644 --- a/fshash.in +++ b/fshash.in @@ -27,6 +27,7 @@ from sys import argv, exit, stdin, stdout, stderr import os as OS import re as RX import time as T +import errno as E import stat as ST import optparse as OP import hashlib as H @@ -98,10 +99,15 @@ def enum_walk(file, func): dir([OS.path.join(d.name, e) for e in dirents(d.name)], dev) if file.endswith('/'): - OS.chdir(file) - fi = FileInfo('.') - func(fi) - dir(dirents('.'), fi.st.st_dev) + cwd = OS.open('.', OS.O_RDONLY) + try: + OS.chdir(file) + fi = FileInfo('.') + func(fi) + dir(dirents('.'), fi.st.st_dev) + finally: + OS.fchdir(cwd) + OS.close(cwd) else: fi = FileInfo(file) func(fi) @@ -326,6 +332,11 @@ class HashCache (object): if not me._db: die("no cache database") + def forget(me, ino): + me.need_db() + c = me._db.cursor() + c.execute('DELETE FROM hash WHERE ino = ?', [ino]) + def reset(me): me.need_db() c = me._db.cursor() @@ -348,7 +359,7 @@ class GenericFormatter (object): tm = T.gmtime(t) return T.strftime('%Y-%m-%dT%H:%M:%SZ', tm) def _enc_name(me, n): - return n.encode('string_escape') + return ' \\-> '.join(n.encode('string_escape').split(' -> ')) def name(me): return me._enc_name(me.fi.name) def info(me): @@ -438,12 +449,116 @@ class Reporter (object): suffix = '\0%d' % seq seq += 1 me._inomap[inoidx] = vino + if OPTS.compat >= 2: me._vinomap[vino] = inoidx if h: info = h else: info = '[%-*s]' % (2*me._hsz - 2, fmt.info()) print '%s %8s %6s %-12s %-20s %20s %s' % ( info, vino, fmt.mode(), fmt.owner(), fmt.mtime(), fmt.size(), fmt.name()) +###-------------------------------------------------------------------------- +### Database clearing from diff files. + +R_HUNK = RX.compile(r'^@@ -\d+,(\d+) \+\d+,(\d+) @@$') + +def clear_entry(db, lno, line): + + good = True + + if line.startswith('['): + pos = line.find(']') + if pos < 0: + moan("failed to parse file entry (type field; line %d)" % lno) + return False + ty = line[1:pos].strip() + rest = line[pos + 1:] + hash = None + else: + ff = line.split(None, 1) + if len(ff) != 2: + moan("failed to parse file entry (field split; line %d)" % lno) + return False + ty = 'regular-file' + hash, rest = ff + + ff = rest.split(None, 5) + if len(ff) != 6: + moan("failed to parse file entry (field split; line %d)" % lno) + return False + ino, mode, uidgid, mtime, sz, name = ff + + if ty != 'symbolic-link': + target = None + else: + nn = name.split(' -> ', 1) + if len(nn) != 2: + moan("failed to parse file entry (name split; line %d)" % lno) + return False + name, target = nn + target = target.decode('string_escape') + name = name.decode('string_escape') + + try: + st = OS.lstat(name) + except OSError, e: + moan("failed to stat `%s': %s" % (name, e.strerror)) + if e.errno != E.ENOENT: good = False + else: + print "Clear cache entry for `%s'" % name + db.forget(st.st_ino) + + return good + +def clear_cache(db): + + ## Work through the input diff file one line at a time. + diffstate = 'gap' + lno = 0 + good = True + for line in stdin: + if line.endswith('\n'): line = line[:-1] + lno += 1 + + ## We're in a gap between hunks. Find a hunk header and extract the line + ## counts. + if diffstate == 'gap': + m = R_HUNK.match(line) + if m: + oldlines = int(m.group(1)) + newlines = int(m.group(2)) + diffstate = 'hunk' + hdrlno = lno + + ## We're in a hunk. Keep track of whether we've reached the end, and + ## discard entries from the cache for mismatching lines. + elif diffstate == 'hunk': + if len(line) == 0: + moan("empty line in diff hunk (line %d)" % lno) + good = False + ty = line[0] + if ty == ' ': + oldlines -= 1; newlines -= 1 + elif ty == '+': + newlines -= 1 + if not clear_entry(db, lno, line[1:]): good = False + elif ty == '-': + oldlines -= 1 + if not clear_entry(db, lno, line[1:]): good = False + else: + moan("incomprehensible line in diff hunk (line %d)" % lno) + good = false + if oldlines < 0 or newlines < 0: + moan("inconsistent lengths in diff hunk header (line %d)" % hdrlno) + good = False + if oldlines == newlines == 0: + diffstate = 'gap' + + if diffstate == 'hunk': + moan("truncated diff hunk (started at line %d)" % hdrlno) + good = False + + return good + ###-------------------------------------------------------------------------- ### Main program. @@ -452,7 +567,7 @@ FMTMAP = { 'find0': lambda f: enum_find0(stdin, f) } op = OP.OptionParser( - usage = '%prog [-a] [-c CACHE] [-f FORMAT] [-H HASH] [FILE ...]', + usage = '%prog [-au] [-c CACHE] [-f FORMAT] [-H HASH] [FILE ...]', version = '%%prog, version %s' % VERSION, description = '''\ Print a digest of a filesystem (or a collection of specified files) to @@ -470,24 +585,42 @@ for short, long, props in [ ('-f', '--files', { 'dest': 'files', 'metavar': 'FORMAT', 'type': 'choice', 'choices': FMTMAP.keys(), 'help': 'read files to report in the given FORMAT' }), + ('-u', '--udiff', { 'action': 'store_true', 'dest': 'udiff', + 'help': 'read diff from stdin, clear cache entries' }), + ('-C', '--compat', { 'dest': 'compat', 'metavar': 'VERSION', + 'type': 'int', 'default': 2, + 'help': 'produce output with given compatibility VERSION' }), ('-H', '--hash', { 'dest': 'hash', 'metavar': 'HASH', ##'type': 'choice', 'choices': H.algorithms, 'help': 'use HASH as the hash function' })]: op.add_option(short, long, **props) -opts, args = op.parse_args(argv) - -if not opts.files and len(args) <= 1: - die("no filename sources: nothing to do") -db = HashCache(opts.cache, opts.hash) -if opts.all: - db.reset() -rep = Reporter(db) -if opts.files: - FMTMAP[opts.files](rep.file) -for dir in args[1:]: - enum_walk(dir, rep.file) -if opts.all: - db.prune() -db.flush() +OPTS, args = op.parse_args(argv) +if not 1 <= OPTS.compat <= 2: + die("unknown compatibility version %d" % OPTS.compat) +if OPTS.udiff: + if OPTS.cache is None or OPTS.all or OPTS.files or len(args) > 2: + die("incompatible options: `-u' requires `-c CACHE', forbids others") + db = HashCache(OPTS.cache, OPTS.hash) + if len(args) == 2: OS.chdir(args[1]) + good = True + if not clear_cache(db): good = False + if good: db.flush() + else: exit(2) +else: + if not OPTS.files and len(args) <= 1: + die("no filename sources: nothing to do") + db = HashCache(OPTS.cache, OPTS.hash) + if OPTS.all: + db.reset() + if OPTS.compat >= 2: + print "## fshash report format version %d" % OPTS.compat + rep = Reporter(db) + if OPTS.files: + FMTMAP[OPTS.files](rep.file) + for dir in args[1:]: + enum_walk(dir, rep.file) + if OPTS.all: + db.prune() + db.flush() ###----- That's all, folks --------------------------------------------------