From 0153cc39f12cf4020fa9875e55dbdb0b06bb33ca Mon Sep 17 00:00:00 2001 Message-Id: <0153cc39f12cf4020fa9875e55dbdb0b06bb33ca.1746308375.git.mdw@distorted.org.uk> From: Mark Wooding Date: Sat, 1 Jun 2024 13:23:33 +0100 Subject: [PATCH] fshash.in: Support POSIX ACLs and extended attributes. Organization: Straylight/Edgeware From: Mark Wooding This is compatibility level 3. --- fshash.1 | 85 ++++++++++++++++++- fshash.in | 250 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 330 insertions(+), 5 deletions(-) diff --git a/fshash.1 b/fshash.1 index 8577d65..0c4ee9d 100644 --- a/fshash.1 +++ b/fshash.1 @@ -109,7 +109,7 @@ were made with the bugs. By default, produces manifests in the most recent format, but this option will force it to be compatible with old versions. The original version was 1; all later versions print a comment reporting the version number at the start -of the manifest. The current version is 2. +of the manifest. The current version is 3. .TP .B \-H, \-\-hash=\fIhash Use the @@ -134,8 +134,11 @@ is treated specially: .B fshash writes filenames relative to the given directory. .SS Output format -Information about each filesystem object is written on a separate line. -These lines can be quite long, and consist of a number of fields: +For each filesystem object, +.B fshash +writes a summary line, followed by zero or more additional lines. +A summary line can be quite long, and consists of a number of +fields: .hP 1. For regular files, a cryptographic hash of the file's content, in hexadecimal. For other kinds of filesystem object, a description of the @@ -229,6 +232,82 @@ or as appropriate, followed by the major and minor device numbers in decimal, and separated by a colon. .PP +Additional lines begin with a tab character. The possible lines are as +follows. +.TP +.I POSIX ACLs +.RS +If an object has nontrivial POSIX ACLs, then they are printed as additional +lines of the form +.IP +.BI "acl posix-access " acl +.PP +or +.IP +.BI "acl posix-default " acl +.PP +for access and default ACLs respectively. The +.I acl +is the ACL in short text format, with numeric IDs, and with the entries +sorted into a canonical order (owner, other users, mask, file group, other +groups, others), or +.BI +if there was an error retrieving the ACL. +.PP +An access ACL is nontrivial if it contains entries for +other users, other groups, or a mask, i.e., it is not completely described by +the file's traditional permissions. A default ACL is nontrivial if it is not +empty. +.PP +POSIX ACLs are recognized at compatibility level 3 and above. ACL +support requires the +.B pylibacl +library. +.RE +.TP +.I Extended attributes +.RS +If a file has extended attributes then they are printed as additional lines +of the form +.IP +.BI "xattr " name " " hash +.PP +where +.I name +is the attribute name, escaped in the same manner as filenames, and +.I hash +is the hash of the attribute's contents. Extended attribute hashes are not +currently cached, because they're usually too small for it to be worthwhile. +If errors occur, then +.BI +is printed in place of the +.I hash +if the error occurred when trying to retrieve the attribute value, +or in place of both +.I name +and +.I hash +if the error occurred when trying to list the attributes. +.PP +The +.B fshash +program does not print lines for extended attributes which are used to +implement features which +.B fshash +handles explicitly: for example, on Linux, POSIX ACLs are stored in extended +attributes named +.B system.posix_acl_access +and +.BR system.posix_acl_default , +so these attributes are ignored if ACL support is available. +.PP +Extended attributes are recognized at compatibility level 3 and above. +Extended attribute support requires the +.B pyxattr +or +.B xattr +library, or Python 3.3. +.RE .SH BUGS No attempt is made to sort filenames read in .B find0 diff --git a/fshash.in b/fshash.in index f19a81e..1ffbf28 100644 --- a/fshash.in +++ b/fshash.in @@ -26,9 +26,11 @@ from sys import argv, exc_info, exit, stdin, stdout, stderr import binascii as B import errno as E +import grp as GR import hashlib as H import optparse as OP import os as OS +import pwd as PW import re as RX import sqlite3 as DB import stat as ST @@ -52,12 +54,14 @@ if _PYVER >= (3,): def text(x): return x.decode(_FSENC, _FSENCERR) def bytechr(x): return bytes([x]) def byteord(x): return x + def iterkeys(x): return x.keys() else: from cStringIO import StringIO; BytesIO = StringIO def bin(x): return x def text(x): return x def bytechr(x): return chr(x) def byteord(x): return ord(x) + def iterkeys(x): return x.iterkeys() def excval(): return exc_info()[1] QUIS = OS.path.basename(argv[0]) @@ -113,9 +117,196 @@ def unescapify(x): i = m.end(0) return text(out.getvalue()) +def simple_memo(func): + memo = dict() + def _(*args): + try: + r = memo[args] + except KeyError: + r = func(*args) + memo[args] = r + return r + return _ + +@simple_memo +def name_uid(name): + pw = PW.getpwnam(name) + return pw[2] + +@simple_memo +def name_gid(name): + gr = GR.getgrnam(name) + return gr[2] + +###-------------------------------------------------------------------------- +### Extended attributes. + +def listxattr(f, follow_symlinks = True): return [] +if _PYVER >= (3, 3): + if hasattr(OS, "listxattr"): + getxattr, listxattr = OS.getxattr, OS.listxattr +else: + try: + import xattr as _XA + except ImportError: + pass + else: + if hasattr(_XA, "list"): + def listxattr(f, follow_symlinks = True): + return _XA.list(f, nofollow = not follow_symlinks) + def getxattr(f, a, follow_symlinks = True): + return _XA.get(f, a, nofollow = not follow_symlinks) + else: + def listxattr(f, follow_symlinks = True): + return _XA.listxattr(f, nofollow = not follow_symlinks) + def getxattr(f, a, follow_symlinks = True): + return _XA.getxattr(f, a, nofollow = not follow_symlinks) + +###-------------------------------------------------------------------------- +### Access control lists. + +HAVE_ACL_P = False + +ACL_ACC= 1 +ACL_DFLT = 2 + +def getacl(f, which): return None +try: + import posix1e as ACL +except ImportError: + pass +else: + + ## Match a line from the standard ACL text format. + R_ACLENT = RX.compile(r"""^ + \s* + (?: (u | user | g | group | m | mask | o | other) + \s* : \s* + (| [^:\s] | [^:\s] [^:]* [^:\s]) + \s* : \s* + ([-rwx]*) + \s*) ? + (?: \# .*)? $ + """, RX.VERBOSE) + + ## Codes for the possible entry tag types. These are ordered so that we + ## can sort. + AT_OWNUID = 1 + AT_USER = 2 + AT_MASK = 3 + AT_OWNGID = 4 + AT_GROUP = 5 + AT_OTHER = 6 + + ## Output tags corresponding to the codes. + ACL_TAGMAP = [None, "u", "u", "m", "g", "g", "o"] + + HAVE_ACL_P = True + + def getacl(f, which): + + ## Fetch the file ACL. + if which == ACL_ACC: acl = ACL.ACL(file = f) + elif which == ACL_DFLT: acl = ACL.ACL(filedef = f) + else: raise ValueError("unexpected WHICH = %d" % which) + + ## For maximum portability, only use the text format, which is guaranteed + ## to be supported if anything is. We'll have to parse this ourselves. + ## Honestly, an important part of what we're doing here is producing a + ## /canonical/ presentation of the ACL, which doesn't seem to be + ## something that even the less portable functions will do for us. + s = str(acl) + extp = False + entries = [] + + ## First pass: grind through the ACL entries and build a list of (TAG, + ## QUAL, MODE) triples, where the TAG is an `AT_...' code, the QUAL is + ## either `None' or a numeric ID, and the MODE is a bitmask of + ## permissions. + for line in s.split("\n"): + m = R_ACLENT.match(line) + if m is None: raise ValueError("unexpected ACL line `%s'" % line) + if not m.group(1): continue + tag, qual, perm = m.group(1), m.group(2), m.group(3) + + if qual == "": qual = None + + ## Convert the tag and qualifier. + if tag == "u" or tag == "user": + if qual is None: pass + elif qual.isdigit(): qual = int(qual, 10) + else: qual = name_uid(qual) + if qual is None: tag = AT_OWNUID + else: tag = AT_USER; extp = True + elif tag == "m" or tag == "mask": + if qual is not None: + raise ValueError("unexpected mask qualifier `%s'" % qual) + tag = AT_MASK; extp = True + elif tag == "g" or tag == "group": + if qual is None: pass + elif qual.isdigit(): qual = int(qual, 10) + else: qual = name_gid(qual) + if qual is None: tag = AT_OWNGID + else: tag = AT_GROUP; extp = True + elif tag == "o" or tag == "other": + if qual is not None: + raise ValueError("unexpected other qualifier `%s'" % qual) + tag = AT_OTHER + else: + raise ValueError("unexpected tag type `%s'" % tag) + + ## Convert the permissions. + mode = 0 + for ch in perm: + if ch == "r": mode |= 4 + elif ch == "w": mode |= 2 + elif ch == "x": mode |= 1 + elif ch == "-": pass + else: raise ValueError("unexpected permission character `%s'" % ch) + + ## Done. + entries.append((tag, qual, mode)) + + ## If the ACL is trivial then ignore it. An access ACL trivial if it + ## contains only entries which are reflected in the traditional + ## permission bits. A default ACL is trivial if it's empty. + if (which == ACL_ACC and not extp) or \ + (which == ACL_DFLT and not entries): + return None + + ## Sort the entries. The tag codes are arranged so that this is a useful + ## ordering. + entries.sort() + + ## Produce output. This happens to be the standard short text format, + ## with exclusively numeric IDs. + out = StringIO() + firstp = True + for tag, qual, mode in entries: + if firstp: firstp = False + else: out.write(",") + out.write(ACL_TAGMAP[tag]) + out.write(":") + if qual is not None: out.write(str(qual)) + out.write(":") + if mode&4: out.write("r") + else: out.write("-") + if mode&2: out.write("w") + else: out.write("-") + if mode&1: out.write("x") + else: out.write("-") + + return out.getvalue() + ###-------------------------------------------------------------------------- ### File system enumeration. +class FileAttr (object): + def __init__(me, file, attr): + try: value = getxattr(file, attr, follow_symlinks = False) + except (OSError, IOError): me.value, me.err = None, excval() + else: me.value, me.err = value, None + class FileInfo (object): def __init__(me, file, st = None): me.name = file @@ -130,6 +321,34 @@ class FileInfo (object): me.st = None me.err = excval() + me.xa, me.xa_err = dict(), None + me.acl_acc = me.aclerr_acc = None + me.acl_dflt = me.aclerr_dflt = None + + if me.st is not None: + + def collect_acl(which): + try: + return getacl(file, which), None + except (OSError, IOError): + err = excval() + if err.errno == E.ENOTSUP: return None, None + else: return None, excval() + + if not ST.S_ISLNK(me.st.st_mode): + me.acl_acc, me.aclerr_acc = collect_acl(ACL_ACC) + if ST.S_ISDIR(me.st.st_mode): + me.acl_dflt, me.aclerr_dflt = collect_acl(ACL_DFLT) + + try: names = listxattr(file, follow_symlinks = False) + except (OSError, IOError): me.xa_err = excval() + else: + for name in names: + if HAVE_ACL_P and (name == "system.posix_acl_access" or + name == "system.posix_acl_default"): + continue + me.xa[name] = FileAttr(file, name) + def enum_walk(file, func): def dirents(name): @@ -315,6 +534,11 @@ class HashCache (object): me._db = db me._pend = 0 + def hashblob(me, blob): + h = H.new(me.hash) + h.update(blob) + return text(B.hexlify(h.digest())) + def hashfile(me, fi): ## If this isn't a proper file then don't try to hash it. @@ -515,6 +739,28 @@ class Reporter (object): (info, vino, fmt.mode(), fmt.owner(), fmt.mtime(), fmt.size(), fmt.name())) + if OPTS.compat >= 3: + + for which, acl, err in \ + [("posix-access", fi.acl_acc, fi.aclerr_acc), + ("posix-default", fi.acl_dflt, fi.aclerr_dflt)]: + if acl is not None: + print("\tacl %s %s" % (which, acl)) + elif err is not None: + print("\tacl %s " % (which, err.errno, err.strerror)) + + if fi.xa_err is not None: + print("\txattr " % (fi.xa_err.errno, fi.xa_err.strerror)) + else: + for name in sorted(iterkeys(fi.xa)): + attr = fi.xa[name] + if attr.err is None: + print("\txattr %s %s" % + (escapify(name), me._db.hashblob(attr.value))) + else: + print("\txattr %s " % + (escapify(name), attr.err.errno, attr.err.strerror)) + ###-------------------------------------------------------------------------- ### Database clearing from diff files. @@ -648,14 +894,14 @@ for short, long, props in [ ('-u', '--udiff', { 'action': 'store_true', 'dest': 'udiff', 'help': 'read diff from stdin, clear cache entries' }), ('-C', '--compat', { 'dest': 'compat', 'metavar': 'VERSION', - 'type': 'int', 'default': 2, + 'type': 'int', 'default': 3, 'help': 'produce output with given compatibility VERSION' }), ('-H', '--hash', { 'dest': 'hash', 'metavar': 'HASH', ##'type': 'choice', 'choices': H.algorithms, 'help': 'use HASH as the hash function' })]: op.add_option(short, long, **props) OPTS, args = op.parse_args(argv) -if not 1 <= OPTS.compat <= 2: +if not 1 <= OPTS.compat <= 3: die("unknown compatibility version %d" % OPTS.compat) if OPTS.udiff: if OPTS.cache is None or OPTS.all or OPTS.files or len(args) > 2: -- [mdw]