produces manifests in the most recent format, but this option will force
it to be compatible with old versions. The original version was 1; all
later versions print a comment reporting the version number at the start
-of the manifest. The current version is 2.
+of the manifest. The current version is 3.
.TP
.B \-H, \-\-hash=\fIhash
Use the
.B fshash
writes filenames relative to the given directory.
.SS Output format
-Information about each filesystem object is written on a separate line.
-These lines can be quite long, and consist of a number of fields:
+For each filesystem object,
+.B fshash
+writes a summary line, followed by zero or more additional lines.
+A summary line can be quite long, and consists of a number of
+fields:
.hP 1.
For regular files, a cryptographic hash of the file's content, in
hexadecimal. For other kinds of filesystem object, a description of the
as appropriate, followed by the major and minor device numbers in
decimal, and separated by a colon.
.PP
+Additional lines begin with a tab character. The possible lines are as
+follows.
+.TP
+.I POSIX ACLs
+.RS
+If an object has nontrivial POSIX ACLs, then they are printed as additional
+lines of the form
+.IP
+.BI "acl posix-access " acl
+.PP
+or
+.IP
+.BI "acl posix-default " acl
+.PP
+for access and default ACLs respectively. The
+.I acl
+is the ACL in short text format, with numeric IDs, and with the entries
+sorted into a canonical order (owner, other users, mask, file group, other
+groups, others), or
+.BI <E nn \~ message >
+if there was an error retrieving the ACL.
+.PP
+An access ACL is nontrivial if it contains entries for
+other users, other groups, or a mask, i.e., it is not completely described by
+the file's traditional permissions. A default ACL is nontrivial if it is not
+empty.
+.PP
+POSIX ACLs are recognized at compatibility level 3 and above. ACL
+support requires the
+.B pylibacl
+library.
+.RE
+.TP
+.I Extended attributes
+.RS
+If a file has extended attributes then they are printed as additional lines
+of the form
+.IP
+.BI "xattr " name " " hash
+.PP
+where
+.I name
+is the attribute name, escaped in the same manner as filenames, and
+.I hash
+is the hash of the attribute's contents. Extended attribute hashes are not
+currently cached, because they're usually too small for it to be worthwhile.
+If errors occur, then
+.BI <E nn \~ message >
+is printed in place of the
+.I hash
+if the error occurred when trying to retrieve the attribute value,
+or in place of both
+.I name
+and
+.I hash
+if the error occurred when trying to list the attributes.
+.PP
+The
+.B fshash
+program does not print lines for extended attributes which are used to
+implement features which
+.B fshash
+handles explicitly: for example, on Linux, POSIX ACLs are stored in extended
+attributes named
+.B system.posix_acl_access
+and
+.BR system.posix_acl_default ,
+so these attributes are ignored if ACL support is available.
+.PP
+Extended attributes are recognized at compatibility level 3 and above.
+Extended attribute support requires the
+.B pyxattr
+or
+.B xattr
+library, or Python 3.3.
+.RE
.SH BUGS
No attempt is made to sort filenames read in
.B find0
from sys import argv, exc_info, exit, stdin, stdout, stderr
import binascii as B
import errno as E
+import grp as GR
import hashlib as H
import optparse as OP
import os as OS
+import pwd as PW
import re as RX
import sqlite3 as DB
import stat as ST
def text(x): return x.decode(_FSENC, _FSENCERR)
def bytechr(x): return bytes([x])
def byteord(x): return x
+ def iterkeys(x): return x.keys()
else:
from cStringIO import StringIO; BytesIO = StringIO
def bin(x): return x
def text(x): return x
def bytechr(x): return chr(x)
def byteord(x): return ord(x)
+ def iterkeys(x): return x.iterkeys()
def excval(): return exc_info()[1]
QUIS = OS.path.basename(argv[0])
i = m.end(0)
return text(out.getvalue())
+def simple_memo(func):
+ memo = dict()
+ def _(*args):
+ try:
+ r = memo[args]
+ except KeyError:
+ r = func(*args)
+ memo[args] = r
+ return r
+ return _
+
+@simple_memo
+def name_uid(name):
+ pw = PW.getpwnam(name)
+ return pw[2]
+
+@simple_memo
+def name_gid(name):
+ gr = GR.getgrnam(name)
+ return gr[2]
+
+###--------------------------------------------------------------------------
+### Extended attributes.
+
+def listxattr(f, follow_symlinks = True): return []
+if _PYVER >= (3, 3):
+ if hasattr(OS, "listxattr"):
+ getxattr, listxattr = OS.getxattr, OS.listxattr
+else:
+ try:
+ import xattr as _XA
+ except ImportError:
+ pass
+ else:
+ if hasattr(_XA, "list"):
+ def listxattr(f, follow_symlinks = True):
+ return _XA.list(f, nofollow = not follow_symlinks)
+ def getxattr(f, a, follow_symlinks = True):
+ return _XA.get(f, a, nofollow = not follow_symlinks)
+ else:
+ def listxattr(f, follow_symlinks = True):
+ return _XA.listxattr(f, nofollow = not follow_symlinks)
+ def getxattr(f, a, follow_symlinks = True):
+ return _XA.getxattr(f, a, nofollow = not follow_symlinks)
+
+###--------------------------------------------------------------------------
+### Access control lists.
+
+HAVE_ACL_P = False
+
+ACL_ACC= 1
+ACL_DFLT = 2
+
+def getacl(f, which): return None
+try:
+ import posix1e as ACL
+except ImportError:
+ pass
+else:
+
+ ## Match a line from the standard ACL text format.
+ R_ACLENT = RX.compile(r"""^
+ \s*
+ (?: (u | user | g | group | m | mask | o | other)
+ \s* : \s*
+ (| [^:\s] | [^:\s] [^:]* [^:\s])
+ \s* : \s*
+ ([-rwx]*)
+ \s*) ?
+ (?: \# .*)? $
+ """, RX.VERBOSE)
+
+ ## Codes for the possible entry tag types. These are ordered so that we
+ ## can sort.
+ AT_OWNUID = 1
+ AT_USER = 2
+ AT_MASK = 3
+ AT_OWNGID = 4
+ AT_GROUP = 5
+ AT_OTHER = 6
+
+ ## Output tags corresponding to the codes.
+ ACL_TAGMAP = [None, "u", "u", "m", "g", "g", "o"]
+
+ HAVE_ACL_P = True
+
+ def getacl(f, which):
+
+ ## Fetch the file ACL.
+ if which == ACL_ACC: acl = ACL.ACL(file = f)
+ elif which == ACL_DFLT: acl = ACL.ACL(filedef = f)
+ else: raise ValueError("unexpected WHICH = %d" % which)
+
+ ## For maximum portability, only use the text format, which is guaranteed
+ ## to be supported if anything is. We'll have to parse this ourselves.
+ ## Honestly, an important part of what we're doing here is producing a
+ ## /canonical/ presentation of the ACL, which doesn't seem to be
+ ## something that even the less portable functions will do for us.
+ s = str(acl)
+ extp = False
+ entries = []
+
+ ## First pass: grind through the ACL entries and build a list of (TAG,
+ ## QUAL, MODE) triples, where the TAG is an `AT_...' code, the QUAL is
+ ## either `None' or a numeric ID, and the MODE is a bitmask of
+ ## permissions.
+ for line in s.split("\n"):
+ m = R_ACLENT.match(line)
+ if m is None: raise ValueError("unexpected ACL line `%s'" % line)
+ if not m.group(1): continue
+ tag, qual, perm = m.group(1), m.group(2), m.group(3)
+
+ if qual == "": qual = None
+
+ ## Convert the tag and qualifier.
+ if tag == "u" or tag == "user":
+ if qual is None: pass
+ elif qual.isdigit(): qual = int(qual, 10)
+ else: qual = name_uid(qual)
+ if qual is None: tag = AT_OWNUID
+ else: tag = AT_USER; extp = True
+ elif tag == "m" or tag == "mask":
+ if qual is not None:
+ raise ValueError("unexpected mask qualifier `%s'" % qual)
+ tag = AT_MASK; extp = True
+ elif tag == "g" or tag == "group":
+ if qual is None: pass
+ elif qual.isdigit(): qual = int(qual, 10)
+ else: qual = name_gid(qual)
+ if qual is None: tag = AT_OWNGID
+ else: tag = AT_GROUP; extp = True
+ elif tag == "o" or tag == "other":
+ if qual is not None:
+ raise ValueError("unexpected other qualifier `%s'" % qual)
+ tag = AT_OTHER
+ else:
+ raise ValueError("unexpected tag type `%s'" % tag)
+
+ ## Convert the permissions.
+ mode = 0
+ for ch in perm:
+ if ch == "r": mode |= 4
+ elif ch == "w": mode |= 2
+ elif ch == "x": mode |= 1
+ elif ch == "-": pass
+ else: raise ValueError("unexpected permission character `%s'" % ch)
+
+ ## Done.
+ entries.append((tag, qual, mode))
+
+ ## If the ACL is trivial then ignore it. An access ACL trivial if it
+ ## contains only entries which are reflected in the traditional
+ ## permission bits. A default ACL is trivial if it's empty.
+ if (which == ACL_ACC and not extp) or \
+ (which == ACL_DFLT and not entries):
+ return None
+
+ ## Sort the entries. The tag codes are arranged so that this is a useful
+ ## ordering.
+ entries.sort()
+
+ ## Produce output. This happens to be the standard short text format,
+ ## with exclusively numeric IDs.
+ out = StringIO()
+ firstp = True
+ for tag, qual, mode in entries:
+ if firstp: firstp = False
+ else: out.write(",")
+ out.write(ACL_TAGMAP[tag])
+ out.write(":")
+ if qual is not None: out.write(str(qual))
+ out.write(":")
+ if mode&4: out.write("r")
+ else: out.write("-")
+ if mode&2: out.write("w")
+ else: out.write("-")
+ if mode&1: out.write("x")
+ else: out.write("-")
+
+ return out.getvalue()
+
###--------------------------------------------------------------------------
### File system enumeration.
+class FileAttr (object):
+ def __init__(me, file, attr):
+ try: value = getxattr(file, attr, follow_symlinks = False)
+ except (OSError, IOError): me.value, me.err = None, excval()
+ else: me.value, me.err = value, None
+
class FileInfo (object):
def __init__(me, file, st = None):
me.name = file
me.st = None
me.err = excval()
+ me.xa, me.xa_err = dict(), None
+ me.acl_acc = me.aclerr_acc = None
+ me.acl_dflt = me.aclerr_dflt = None
+
+ if me.st is not None:
+
+ def collect_acl(which):
+ try:
+ return getacl(file, which), None
+ except (OSError, IOError):
+ err = excval()
+ if err.errno == E.ENOTSUP: return None, None
+ else: return None, excval()
+
+ if not ST.S_ISLNK(me.st.st_mode):
+ me.acl_acc, me.aclerr_acc = collect_acl(ACL_ACC)
+ if ST.S_ISDIR(me.st.st_mode):
+ me.acl_dflt, me.aclerr_dflt = collect_acl(ACL_DFLT)
+
+ try: names = listxattr(file, follow_symlinks = False)
+ except (OSError, IOError): me.xa_err = excval()
+ else:
+ for name in names:
+ if HAVE_ACL_P and (name == "system.posix_acl_access" or
+ name == "system.posix_acl_default"):
+ continue
+ me.xa[name] = FileAttr(file, name)
+
def enum_walk(file, func):
def dirents(name):
me._db = db
me._pend = 0
+ def hashblob(me, blob):
+ h = H.new(me.hash)
+ h.update(blob)
+ return text(B.hexlify(h.digest()))
+
def hashfile(me, fi):
## If this isn't a proper file then don't try to hash it.
(info, vino, fmt.mode(), fmt.owner(),
fmt.mtime(), fmt.size(), fmt.name()))
+ if OPTS.compat >= 3:
+
+ for which, acl, err in \
+ [("posix-access", fi.acl_acc, fi.aclerr_acc),
+ ("posix-default", fi.acl_dflt, fi.aclerr_dflt)]:
+ if acl is not None:
+ print("\tacl %s %s" % (which, acl))
+ elif err is not None:
+ print("\tacl %s <E%d %s>" % (which, err.errno, err.strerror))
+
+ if fi.xa_err is not None:
+ print("\txattr <E%d %s>" % (fi.xa_err.errno, fi.xa_err.strerror))
+ else:
+ for name in sorted(iterkeys(fi.xa)):
+ attr = fi.xa[name]
+ if attr.err is None:
+ print("\txattr %s %s" %
+ (escapify(name), me._db.hashblob(attr.value)))
+ else:
+ print("\txattr %s <E%d %s>" %
+ (escapify(name), attr.err.errno, attr.err.strerror))
+
###--------------------------------------------------------------------------
### Database clearing from diff files.
('-u', '--udiff', { 'action': 'store_true', 'dest': 'udiff',
'help': 'read diff from stdin, clear cache entries' }),
('-C', '--compat', { 'dest': 'compat', 'metavar': 'VERSION',
- 'type': 'int', 'default': 2,
+ 'type': 'int', 'default': 3,
'help': 'produce output with given compatibility VERSION' }),
('-H', '--hash', { 'dest': 'hash', 'metavar': 'HASH',
##'type': 'choice', 'choices': H.algorithms,
'help': 'use HASH as the hash function' })]:
op.add_option(short, long, **props)
OPTS, args = op.parse_args(argv)
-if not 1 <= OPTS.compat <= 2:
+if not 1 <= OPTS.compat <= 3:
die("unknown compatibility version %d" % OPTS.compat)
if OPTS.udiff:
if OPTS.cache is None or OPTS.all or OPTS.files or len(args) > 2: