Release 1.1.2.

[rsync-backup] / fshash.in
diff --git a/fshash.in b/fshash.in

index b6aa4cfc5f8ec9bd068943061c0baa56ac3e8b58..dcfd2294801260ccf7c35b03fb1a9175e61fcd5c 100644 (file)
--- a/fshash.in
+++ b/fshash.in
@@ -27,6 +27,7 @@ from sys import argv, exit, stdin, stdout, stderr
  import os as OS
  import re as RX
  import time as T
+import errno as E
  import stat as ST
  import optparse as OP
  import hashlib as H
@@ -98,10 +99,15 @@ def enum_walk(file, func):
          dir([OS.path.join(d.name, e) for e in dirents(d.name)], dev)
  
    if file.endswith('/'):
-    OS.chdir(file)
-    fi = FileInfo('.')
-    func(fi)
-    dir(dirents('.'), fi.st.st_dev)
+    cwd = OS.open('.', OS.O_RDONLY)
+    try:
+      OS.chdir(file)
+      fi = FileInfo('.')
+      func(fi)
+      dir(dirents('.'), fi.st.st_dev)
+    finally:
+      OS.fchdir(cwd)
+      OS.close(cwd)
    else:
      fi = FileInfo(file)
      func(fi)
@@ -326,6 +332,11 @@ class HashCache (object):
      if not me._db:
        die("no cache database")
  
+  def forget(me, ino):
+    me.need_db()
+    c = me._db.cursor()
+    c.execute('DELETE FROM hash WHERE ino = ?', [ino])
+
    def reset(me):
      me.need_db()
      c = me._db.cursor()
@@ -438,12 +449,116 @@ class Reporter (object):
            suffix = '\0%d' % seq
            seq += 1
          me._inomap[inoidx] = vino
+        if OPTS.compat >= 2: me._vinomap[vino] = inoidx
      if h: info = h
      else: info = '[%-*s]' % (2*me._hsz - 2, fmt.info())
      print '%s %8s %6s %-12s %-20s %20s %s' % (
        info, vino, fmt.mode(), fmt.owner(),
        fmt.mtime(), fmt.size(), fmt.name())
  
+###--------------------------------------------------------------------------
+### Database clearing from diff files.
+
+R_HUNK = RX.compile(r'^@@ -\d+,(\d+) \+\d+,(\d+) @@$')
+
+def clear_entry(db, lno, line):
+
+  good = True
+
+  if line.startswith('['):
+    pos = line.find(']')
+    if pos < 0:
+      moan("failed to parse file entry (type field; line %d)" % lno)
+      return False
+    ty = line[1:pos].strip()
+    rest = line[pos + 1:]
+    hash = None
+  else:
+    ff = line.split(None, 1)
+    if len(ff) != 2:
+      moan("failed to parse file entry (field split; line %d)" % lno)
+      return False
+    ty = 'regular-file'
+    hash, rest = ff
+
+  ff = rest.split(None, 5)
+  if len(ff) != 6:
+    moan("failed to parse file entry (field split; line %d)" % lno)
+    return False
+  ino, mode, uidgid, mtime, sz, name = ff
+
+  if ty != 'symbolic-link':
+    target = None
+  else:
+    nn = name.split(' -> ', 1)
+    if len(nn) != 2:
+      moan("failed to parse file entry (name split; line %d)" % lno)
+      return False
+    name, target = nn
+    target = target.decode('string_escape')
+  name = name.decode('string_escape')
+
+  try:
+    st = OS.lstat(name)
+  except OSError, e:
+    moan("failed to stat `%s': %s" % (name, e.strerror))
+    if e.errno != E.ENOENT: good = False
+  else:
+    print "Clear cache entry for `%s'" % name
+    db.forget(st.st_ino)
+
+  return good
+
+def clear_cache(db):
+
+  ## Work through the input diff file one line at a time.
+  diffstate = 'gap'
+  lno = 0
+  good = True
+  for line in stdin:
+    if line.endswith('\n'): line = line[:-1]
+    lno += 1
+
+    ## We're in a gap between hunks.  Find a hunk header and extract the line
+    ## counts.
+    if diffstate == 'gap':
+      m = R_HUNK.match(line)
+      if m:
+        oldlines = int(m.group(1))
+        newlines = int(m.group(2))
+        diffstate = 'hunk'
+        hdrlno = lno
+
+    ## We're in a hunk.  Keep track of whether we've reached the end, and
+    ## discard entries from the cache for mismatching lines.
+    elif diffstate == 'hunk':
+      if len(line) == 0:
+        moan("empty line in diff hunk (line %d)" % lno)
+        good = False
+      ty = line[0]
+      if ty == ' ':
+        oldlines -= 1; newlines -= 1
+      elif ty == '+':
+        newlines -= 1
+        if not clear_entry(db, lno, line[1:]): good = False
+      elif ty == '-':
+        oldlines -= 1
+        if not clear_entry(db, lno, line[1:]): good = False
+      else:
+        moan("incomprehensible line in diff hunk (line %d)" % lno)
+        good = false
+      if oldlines < 0 or newlines < 0:
+        moan("inconsistent lengths in diff hunk header (line %d)" % hdrlno)
+        good = False
+      if oldlines == newlines == 0:
+        diffstate = 'gap'
+
+  if diffstate == 'hunk':
+    moan("truncated diff hunk (started at line %d)" % hdrlno)
+    good = False
+
+  return good
+
  ###--------------------------------------------------------------------------
  ### Main program.
  
@@ -452,7 +567,7 @@ FMTMAP = {
    'find0': lambda f: enum_find0(stdin, f)
  }
  op = OP.OptionParser(
-  usage = '%prog [-a] [-c CACHE] [-f FORMAT] [-H HASH] [FILE ...]',
+  usage = '%prog [-au] [-c CACHE] [-f FORMAT] [-H HASH] [FILE ...]',
    version = '%%prog, version %s' % VERSION,
    description = '''\
  Print a digest of a filesystem (or a collection of specified files) to
@@ -470,24 +585,42 @@ for short, long, props in [
    ('-f', '--files', { 'dest': 'files', 'metavar': 'FORMAT',
                        'type': 'choice', 'choices': FMTMAP.keys(),
                        'help': 'read files to report in the given FORMAT' }),
+  ('-u', '--udiff', { 'action': 'store_true', 'dest': 'udiff',
+                      'help': 'read diff from stdin, clear cache entries' }),
+  ('-C', '--compat', { 'dest': 'compat', 'metavar': 'VERSION',
+                       'type': 'int', 'default': 2,
+                       'help': 'produce output with given compatibility VERSION' }),
    ('-H', '--hash', { 'dest': 'hash', 'metavar': 'HASH',
                       ##'type': 'choice', 'choices': H.algorithms,
                       'help': 'use HASH as the hash function' })]:
    op.add_option(short, long, **props)
-opts, args = op.parse_args(argv)
-
-if not opts.files and len(args) <= 1:
-  die("no filename sources: nothing to do")
-db = HashCache(opts.cache, opts.hash)
-if opts.all:
-  db.reset()
-rep = Reporter(db)
-if opts.files:
-  FMTMAP[opts.files](rep.file)
-for dir in args[1:]:
-  enum_walk(dir, rep.file)
-if opts.all:
-  db.prune()
-db.flush()
+OPTS, args = op.parse_args(argv)
+if not 1 <= OPTS.compat <= 2:
+  die("unknown compatibility version %d" % OPTS.compat)
+if OPTS.udiff:
+  if OPTS.cache is None or OPTS.all or OPTS.files or len(args) > 2:
+    die("incompatible options: `-u' requires `-c CACHE', forbids others")
+  db = HashCache(OPTS.cache, OPTS.hash)
+  if len(args) == 2: OS.chdir(args[1])
+  good = True
+  if not clear_cache(db): good = False
+  if good: db.flush()
+  else: exit(2)
+else:
+  if not OPTS.files and len(args) <= 1:
+    die("no filename sources: nothing to do")
+  db = HashCache(OPTS.cache, OPTS.hash)
+  if OPTS.all:
+    db.reset()
+  if OPTS.compat >= 2:
+    print "## fshash report format version %d" % OPTS.compat
+  rep = Reporter(db)
+  if OPTS.files:
+    FMTMAP[OPTS.files](rep.file)
+  for dir in args[1:]:
+    enum_walk(dir, rep.file)
+  if OPTS.all:
+    db.prune()
+  db.flush()
  
  ###----- That's all, folks --------------------------------------------------