#! @PYTHON@ ### ### Shamir secret sharing ### ### (c) 2011 Mark Wooding ### ###----- Licensing notice --------------------------------------------------- ### ### This file is part of the distorted.org.uk key management suite. ### ### distorted-keys is free software; you can redistribute it and/or modify ### it under the terms of the GNU General Public License as published by ### the Free Software Foundation; either version 2 of the License, or ### (at your option) any later version. ### ### distorted-keys is distributed in the hope that it will be useful, ### but WITHOUT ANY WARRANTY; without even the implied warranty of ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ### GNU General Public License for more details. ### ### You should have received a copy of the GNU General Public License ### along with distorted-keys; if not, write to the Free Software Foundation, ### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. from __future__ import with_statement import sys as SYS import os as OS import hashlib as H import base64 as B from cStringIO import StringIO import optparse as OPT PACKAGE = '@PACKAGE@' VERSION = '@VERSION@' ###-------------------------------------------------------------------------- ### Arithmetic in GF(2^8). ### ### These functions implement arithmetic in a field containing exactly 256 ### elements. This is convenient, because there's exactly one field element ### for each one-byte value. ### ### Actually, we only implement multiplication and division, because addition ### and subtraction are trivial -- they're the XOR operation that Python ### already has. Multiplication and (especially) division are fiddly if you ### do them directly, but the field is easily small enough that log tables ### are a feasible alternative. ### ### There are many (isomorphic) representations of the field GF(2^8). The ### one we use here is a field of residue classes of binary polynomials: ### GF(2)[x]/(x^8 + x^4 + x^3 + x^2 + 1). Usefully, the element [x] (i.e., ### the residue class containing x) generates the multiplicative group of the ### field: every nonzero element can be written in the form x^k for some ### integer 0 <= k < 255. Multiplication by x is especially easy, so this is ### a good choice as a base of logarithms. ### ### More precisely, we use an integer encoding of the residue classes. Each ### class contains exactly one polynomial of degree less than 8, called the ### principal representative. If the principal representative of a residue ### class is a_7 x^7 + ... + a_1 x + a_0, with each a_i either 0 or 1, then ### the encoding of the representative is 2^7 a_7 + ... + 2 a_1 + a_0. It is ### easily verified that this mapping is invertible. def gf28_log_tables(): """Build the log tables used to implement GF(2^8) arithmetic.""" global GF28_LOG, GF28_EXP log = [0] * 256 exp = [0] * 256 x = 1 for i in xrange(255): exp[i] = x log[x] = i x <<= 1 if x & 0x100: x ^= 0x11d GF28_LOG = log GF28_EXP = exp[:255] * 2 gf28_log_tables() def gf28_mul(x, y): """Multiply two elements X and Y of GF(2^8), retturning their product.""" if x == 0 or y == 0: return 0 else: return GF28_EXP[GF28_LOG[x] + GF28_LOG[y]] def gf28_div(x, y): """ Divide element X by Y in GF(2^8), returning the quotient. Raise `ZeroDivisionError' if Y is zero. """ if y == 0: raise ZeroDivisionError elif x == 0: return 0 else: return GF28_EXP[GF28_LOG[x] + 255 - GF28_LOG[y]] ###-------------------------------------------------------------------------- ### Secret sharing machinery. ### ### Shamir's secret sharing scheme uses polynomial interpolation. Let K be a ### finite field, and let s -- the `secret' -- be an element of K. Let t -- ### the `threshold' -- be a positive integer. A degree-(t - 1) polynomial ### p(x) over K has the general form a_{t-1} x^{t-1} + ... + a_1 x + a_0; we ### then have p(0) = a_0. If we choose a_0 = s, and the other a_i uniformly ### at random from K, then we have chosen a degree-(t - 1) polynomial ### uniformly at random, subject to the restrictiion that it evaluates to s ### at 0. We can issue shares of the form (x, p(x)) for 0 < x < #K. ### ### Any collection of t shares can be used to reconstruct the polynomial. We ### can use Lagrange interpolation to do this. Suppose that we're given t ### different shares (x_i, y_i) for 0 <= i < t. Firstly, set ### ### ------- x - x_j ### l_i(x) = | | --------- ### | | x_i - x_j ### 0 <= j < t ### j /= i ### ### Now notice that l_i(x_i) = 1 and l_i(x_j) = 0 if j /= i. Then ### ### ---- ### p(x) = > l_i(x) y_i ### ---- ### 0 <= i < t ### ### To see this, it suffices to note that (a) l_i(x) (and therefore the sum) ### is a degree-(t - 1) polynomial, and (b) it agrees with p(x) at t distinct ### points, namely each of the x_i. So, with t shares, we can recover the ### original polynomial, and hence p(0) = s. With t - 1 shares, we can add a ### fake `share' (0, s') for any guessed s' in K and interpolate ### corresponding values for the remaining shares; we therefore have perfect ### secrecy. ### ### We use K = GF(2^8), as implemented above. Each byte of a long secret is ### shared independently. ### ### All of this machinery is the same as in my Catacomb library and Daniel ### Silverstone's `gfshare' package. class SecretSplit (object): """ Create shares of a secret. Initialize with a secret S and a threshold T; then issue shares with the `issue' method. """ def __init__(me, s, t): """ Initialize secret sharing. S is a string containing the secret; T is the threshold, i.e., the number of shares requierd to recover the secret. """ ## The number of bytes in the secret is a useful thing to store. me._n = len(s) ## Build an array of coefficients chosen at random. me._coeffs = [OS.urandom(me._n) for i in xrange(t - 1)] + [s] def issue(me, x): """ Issue the share with index X, returning it as a string. The share will be binary, even if the original secret is plain text. The index must be in the range 1 <= X <= 255. """ ## Check that the index is in range. if x < 1 or x > 255: raise ValueError, 'share index out of range' ## Evaluate the polynomial at the chosen index. o = StringIO() for i in xrange(me._n): y = 0 for j in me._coeffs: y = gf28_mul(x, y) ^ ord(j[i]) o.write(chr(y)) return o.getvalue() class SecretJoin (object): """ Recover a secret given a number of shares. """ def __init__(me): me._n = None me._m = {} def insert(me, x, p): """ Insert a share P with index X. All the shares must have the same length. It doesn't matter what the length of the first share is, but the others have to match it. """ ## Check that the share index is in range. if x < 1 or x > 255: raise ValueError, 'share index out of range' ## Make sure that we haven't been given this one already. if x in me._m: raise ValueError, 'duplicate share index' ## Check the length of the share. Store the length if this is the first ## share. if me._n is None: me._n = len(p) elif len(p) != me._n: raise ValueError, 'bad share length' ## Store the share. me._m[x] = p def recover(me): """ Recover the secret from the shares provided, and return it as a string. It's assumed that the caller provided the correct number of shares. """ ## Iniitialize a map of l_i(0) values. These depend only on the share ## indices provided, and are therefore independent of the secret length. ll = {} kk = me._m.keys() for xi in kk: l = 1 for xj in kk: if xi != xj: l = gf28_mul(l, gf28_div(xj, xi ^ xj)) ll[xi] = l ## Now do the interpolation. o = StringIO() for i in xrange(me._n): y = 0 for xi in kk: y = y ^ gf28_mul(ll[xi], ord(me._m[xi][i])) o.write(chr(y)) return o.getvalue() ###-------------------------------------------------------------------------- ### Key/value encoding. ### ### We use a simple encoding for the data structures we handle. The encoding ### looks like this: ### ### LABEL:KEY=VALUE;KEY=VALUE;... ### ### The LABEL just says what kind of structure this is meant to be. The KEYs ### are short strings which identify the structure slots. We're doing this ### on the cheap, so order is significant, and there are no creature comforts ### like optional slots. The VALUEs are encodings of the slot values; the ### details of the encoding depend on the VALUE type, which is statically ### determined by the structure. class StructureSlot (object): """ Base class for structure slot types. Subclasses must implement `encode' and `decode' methods. """ def __init__(me, name): """Initialize a structure slot, remembering its name.""" me._name = name class IntegerSlot (StructureSlot): """ An integer slot. """ TYPE = 'integer' def encode(me, value): return '%d' % value def decode(me, string): return int(string) class StringSlot (StructureSlot): """ A string slot, unencoded. Be careful. """ TYPE = 'string' def encode(me, value): return value def decode(me, string): return string class BinarySlot (StructureSlot): """ A binary slot, encoded as Base64. """ TYPE = 'base64' def encode(me, value): return B.b64encode(value) def decode(me, string): return B.b64decode(string) class StructureType (object): """ Base class for classes which can encode and decode their slots. """ def __init__(me, *args, **kw): """ Initialize a structure object. The arguments correspond to slots. Positional arguments are read first, and stored in the appropriate attributes. Keywords arguments are used to set arbitrary attributes, but it is an error to set an attribute which already has a value. The `decoded' method is called after argument processing. """ _magic = False ## Decode the arguments. if len(args) > len(me.SLOTS): raise ValueError, 'too many positional arguments' for i in xrange(len(args)): slotdef, attr = me.SLOTS[i] setattr(me, attr, args[i]) for k, v in kw.iteritems(): if k == '_magic': _magic = v continue if hasattr(me, k): raise ValueError, 'already set attribute' setattr(me, k, v) ## Initialize the object. (There's a hack here for the `decode' method.) if not _magic: me.decoded() def encode(me): """ Encode an object as a string. """ o = StringIO() o.write(me.LABEL) o.write(':') sep = '' for slotdef, attr in me.SLOTS: o.write(sep) o.write('%s=%s' % (slotdef._name, slotdef.encode(getattr(me, attr)))) sep = ';' return o.getvalue() def save(me, path): """ Write an encoded description of the object to PATH. """ with open(path + '.new', 'w') as f: f.write(me.encode()) f.write('\n') OS.rename(path + '.new', path) @classmethod def decode(cls, string): """ Decode a STRING, returning a new object. The `decoded' method is called on the new object. """ if not string.startswith(cls.LABEL + ':'): raise SyntaxError, 'incorrect label' slots = string[len(cls.LABEL) + 1:].split(';') if len(slots) != len(cls.SLOTS): raise SyntaxError, 'wrong number of slots' obj = cls(_magic = True) for i in xrange(len(slots)): slot = slots[i] slotdef, attr = cls.SLOTS[i] if not slot.startswith(slotdef._name + '='): raise SyntaxError, 'incorrect slot name' try: val = slotdef.decode(slot[len(slotdef._name) + 1:]) except (ValueError, SyntaxError): raise SyntaxError, 'invalid %s value' % slotdef.TYPE setattr(obj, attr, val) obj.decoded() return obj @classmethod def load(cls, path): """ Read an encoded description of a structure and return the decoded object. """ with open(path, 'r') as f: cont = f.read() if cont and cont[-1] == '\n': cont = cont[:-1] if '\n' in cont: raise SyntaxError, 'encoding must be a single line' return cls.decode(cont) def decoded(me): """ Hook for object initialization. This is called by `__init__' and `decode'. """ pass ###-------------------------------------------------------------------------- ### Higher-level share management. ### ### We store shares together with a bunch of metadata about the sharing ### parameters, and a hash of all of that together with the final secret. ### Theoretically, storing a hash weakens the whole scheme from providing ### unconditional security to merely providing computational security (based ### on the assupmption that the hash function is one-way). In practice, the ### secret will be used as a symmetric encryption key or similar, so it's ### already only secure against computationally bounded adversaries. class Secret (StructureType): """ Represents a secret, either about to be shared or recently recovered. """ LABEL = 'shamir-secret' SLOTS = [(IntegerSlot('n'), 'count'), (IntegerSlot('t'), 'thresh'), (BinarySlot('s'), 'secret')] def hash(me, hashfn): """ Hash the secret and its parameters using the hash function HASHFN. The hash is returned as a raw binary string. The HASHFN must be one of the hash functions known to Python's `hashlib' module (essentially, hash functions known to OpenSSL). """ h = H.new(hashfn) h.update(me.encode()) return h.digest() class Params (StructureType): """ Represents secret sharing parameters. """ LABEL = 'shamir-params' SLOTS = [(IntegerSlot('n'), 'count'), (IntegerSlot('t'), 'thresh'), (StringSlot('f'), 'hashfn'), (BinarySlot('h'), 'hash')] class Share (StructureType): """ Represents a share of a secret. """ LABEL = 'shamir-share' SLOTS = [(IntegerSlot('i'), 'index'), (BinarySlot('y'), 'share')] ###-------------------------------------------------------------------------- ### Utilities. QUIS = OS.path.basename(SYS.argv[0]) def moan(message): SYS.stderr.write('%s: %s\n' % (QUIS, message)) def die(message, rc = 1): moan(message) SYS.exit(rc) class struct (object): def __init__(me, **kw): me.__dict__.update(kw) class Subcommand (struct): pass class SubcommandOptionParser (OPT.OptionParser, object): def __init__(me, usage = '%prog [-OPTIONS] COMMAND [ARGS ...]', *args, **kw): super(SubcommandOptionParser, me).__init__(usage = usage, *args, **kw) me.subcommands = [ Subcommand(name = 'help', usage = '[COMMAND ...]', opts = [], func = me.cmd_help, desc = 'Show help for %prog, or for the COMMANDs.') ] me.disable_interspersed_args() def add_subcommand(me, name, func, usage, desc = None, opts = []): subcmd = Subcommand(name = name, func = func, usage = usage, desc = desc, opts = opts) me.subcommands.append(subcmd) def lookup_subcommand(me, name): match = [] for sub in me.subcommands: if sub.name == name: return sub elif sub.name.startswith(name): match.append(sub) if len(match) == 1: return match[0] elif len(match) == 0: me.error("unknown command `%s'"% name) else: me.error("ambiguous command `%s': could be any of %s" % (name, ', '.join(["`%s'" % sub.name for sub in match]))) def subcmd_usage(me, sub): if sub.usage: return '%s %s' % (sub.name, sub.usage) else: return sub.name def print_help(me): super(SubcommandOptionParser, me).print_help() print print 'Commands:' for sub in me.subcommands: if sub.usage is None: continue print ' ' + me.subcmd_usage(sub) def make_subcommand_optparser(me, sub): op = OPT.OptionParser(usage = 'usage: %%prog %s' % me.subcmd_usage(sub), description = sub.desc) for opt in sub.opts: op.add_option(opt) return op def cmd_help(me, gopts, opts, args): if not args: me.print_help() else: for name in args: sub = me.lookup_subcommand(name) op = me.make_subcommand_optparser(sub) op.print_help() def dispatch(me, args = None): if args is None: args = SYS.argv[1:] gopts, args = me.parse_args(args) if not args: me.error('missing command') sub = me.lookup_subcommand(args[0]) op = me.make_subcommand_optparser(sub) opts, args = op.parse_args(args[1:]) sub.func(gopts, opts, args) def subcommand(name, usage, desc, opts = []): def _(func): OPTPARSE.add_subcommand(name, func, usage, desc, opts) return func return _ OPTPARSE = SubcommandOptionParser( usage = '%prog SUBCOMMAND [ARGS ...]', version = '%%prog, %s version %s' % (PACKAGE, VERSION), description = """\ Split and recombine secrets using Shamir's secret sharing system. """) ###-------------------------------------------------------------------------- ### Issue shares given a secret. def parse_sharing(string): slash = string.find('/') if slash == -1: raise ValueError, "no `/'" return int(string[slash + 1:]), int(string[:slash]) @subcommand('issue', '[-H HASHFN] THRESH/COUNT [SECRET-FILE]', """\ Issue shares of a secret. The secret (as a binary lump) is read from SECRET-FILE, or standard input if SECRET-FILE is omitted. COUNT + 1 lines are written to standard output: the first line contains the sharing parameters, and does not need to be kept especially secret (though it contains a hash of the secret, so it compromises the unconditional security of the secret sharing scheme); the remaining lines are the shares, one per line. All of the lines are plain text.""", [OPT.make_option('-H', '--hash-function', type = 'string', action = 'store', dest = 'hashfn', default = 'sha256', help = 'Hash function for verifying the share.')]) def cmd_issue(gopts, opts, args): if len(args) < 1 or len(args) > 2: OPTPARSE.error('bad arguments') try: count, thresh = parse_sharing(args[0]) except ValueError, err: OPTPARSE.error("bad sharing parameters `%s': %s" % (args[0], err.message)) if len(args) < 2 or args[1] == '-': sec = SYS.stdin.read() else: with open(args[1], 'rb') as f: sec = f.read() secret = Secret(count, thresh, sec) h = secret.hash(opts.hashfn) params = Params(count, thresh, opts.hashfn, h) print params.encode() ss = SecretSplit(sec, thresh) for i in xrange(count): share = Share(i, ss.issue(i + 1)) print share.encode() ###-------------------------------------------------------------------------- ### Recover a secret from shares. @subcommand('recover', '', """\ Standard input should contain a number of lines. The first line should contain the secret sharing parameters (the first line output by `issue'); the remaining lines should contain shares. The shares may be supplied in any order. The secret is written to the OUTPUT file, or standard output.""", [OPT.make_option('-o', '--output', action = 'store', type = 'string', dest = 'file', default = '-', help = 'Write the secret to FILE.')]) def cmd_recover(gopts, opts, args): ## Keep track of how well we've done so far. Report all of the things ## which go wrong. lose = False lineno = 1 ## Read the system parameters. line = SYS.stdin.readline() if line.endswith('\n'): line = line[:-1] try: params = Params.decode(line) except ValueError, err: moan("parameters (line %d) invalid: %s" % (lineno, err.message)) lose = True ## Now read the shares. Pay attention to syntax errors, but not to much ## else. Build the maps for the sharing parameters. sj = SecretJoin() shmap = {} for line in SYS.stdin: lineno += 1 if line.endswith('\n'): line = line[:-1] try: share = Share.decode(line) except ValueError, err: moan("share invalid (line %d): %s" % (lineno, err.message)) lose = True continue share.lineno = lineno if share.index in shmap: moan("duplicate share index " "(line %d, index %d previously on line %d)" % (lineno, share.index, shmap[share.index].lineno)) lose = True elif share.index < 0 or share.index >= params.count: moan("share index %d out of range (line %d)" % (share.index, lineno)) lose = True elif len(shmap) >= params.thresh: moan("enough shares already (line %d)" % lineno) lose = True else: sj.insert(share.index + 1, share.share) shmap[share.index] = share if len(shmap) < params.thresh: moan("not enough shares") lose = True if lose: SYS.exit(1) ## Reassemble the secret. secret = Secret(params.count, params.thresh, sj.recover()) if params.hash != secret.hash(params.hashfn): die('secret doesn\'t match hash') if opts.file == '-': SYS.stdout.write(secret.secret) else: with open(opts.file, 'wb') as f: f.write(secret.secret) ###----- That's all, folks -------------------------------------------------- if __name__ == '__main__': OPTPARSE.dispatch()