+#! @PYTHON@
+###
+### Shamir secret sharing
+###
+### (c) 2011 Mark Wooding
+###
+
+###----- Licensing notice ---------------------------------------------------
+###
+### This program is free software; you can redistribute it and/or modify
+### it under the terms of the GNU General Public License as published by
+### the Free Software Foundation; either version 2 of the License, or
+### (at your option) any later version.
+###
+### This program is distributed in the hope that it will be useful,
+### but WITHOUT ANY WARRANTY; without even the implied warranty of
+### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+### GNU General Public License for more details.
+###
+### You should have received a copy of the GNU General Public License
+### along with this program; if not, write to the Free Software Foundation,
+### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+from __future__ import with_statement
+
+import sys as SYS
+import os as OS
+import hashlib as H
+import base64 as B
+from cStringIO import StringIO
+import optparse as OPT
+
+###--------------------------------------------------------------------------
+### Arithmetic in GF(2^8).
+###
+### These functions implement arithmetic in a field containing exactly 256
+### elements. This is convenient, because there's exactly one field element
+### for each one-byte value.
+###
+### Actually, we only implement multiplication and division, because addition
+### and subtraction are trivial -- they're the XOR operation that Python
+### already has. Multiplication and (especially) division are fiddly if you
+### do them directly, but the field is easily small enough that log tables
+### are a feasible alternative.
+###
+### There are many (isomorphic) representations of the field GF(2^8). The
+### one we use here is a field of residue classes of binary polynomials:
+### GF(2)[x]/(x^8 + x^4 + x^3 + x^2 + 1). Usefully, the element [x] (i.e.,
+### the residue class containing x) generates the multiplicative group of the
+### field: every nonzero element can be written in the form x^k for some
+### integer 0 <= k < 255. Multiplication by x is especially easy, so this is
+### a good choice as a base of logarithms.
+###
+### More precisely, we use an integer encoding of the residue classes. Each
+### class contains exactly one polynomial of degree less than 8, called the
+### principal representative. If the principal representative of a residue
+### class is a_7 x^7 + ... + a_1 x + a_0, with each a_i either 0 or 1, then
+### the encoding of the representative is 2^7 a_7 + ... + 2 a_1 + a_0. It is
+### easily verified that this mapping is invertible.
+
+def gf28_log_tables():
+ """Build the log tables used to implement GF(2^8) arithmetic."""
+ global GF28_LOG, GF28_EXP
+ log = [0] * 256
+ exp = [0] * 256
+ x = 1
+ for i in xrange(255):
+ exp[i] = x
+ log[x] = i
+ x <<= 1
+ if x & 0x100:
+ x ^= 0x11d
+ GF28_LOG = log
+ GF28_EXP = exp[:255] * 2
+gf28_log_tables()
+
+def gf28_mul(x, y):
+ """Multiply two elements X and Y of GF(2^8), retturning their product."""
+ if x == 0 or y == 0: return 0
+ else: return GF28_EXP[GF28_LOG[x] + GF28_LOG[y]]
+
+def gf28_div(x, y):
+ """
+ Divide element X by Y in GF(2^8), returning the quotient.
+
+ Raise `ZeroDivisionError' if Y is zero.
+ """
+ if y == 0: raise ZeroDivisionError
+ elif x == 0: return 0
+ else: return GF28_EXP[GF28_LOG[x] + 255 - GF28_LOG[y]]
+
+###--------------------------------------------------------------------------
+### Secret sharing machinery.
+###
+### Shamir's secret sharing scheme uses polynomial interpolation. Let K be a
+### finite field, and let s -- the `secret' -- be an element of K. Let t --
+### the `threshold' -- be a positive integer. A degree-(t - 1) polynomial
+### p(x) over K has the general form a_{t-1} x^{t-1} + ... + a_1 x + a_0; we
+### then have p(0) = a_0. If we choose a_0 = s, and the other a_i uniformly
+### at random from K, then we have chosen a degree-(t - 1) polynomial
+### uniformly at random, subject to the restrictiion that it evaluates to s
+### at 0. We can issue shares of the form (x, p(x)) for 0 < x < #K.
+###
+### Any collection of t shares can be used to reconstruct the polynomial. We
+### can use Lagrange interpolation to do this. Suppose that we're given t
+### different shares (x_i, y_i) for 0 <= i < t. Firstly, set
+###
+### ------- x - x_j
+### l_i(x) = | | ---------
+### | | x_i - x_j
+### 0 <= j < t
+### j /= i
+###
+### Now notice that l_i(x_i) = 1 and l_i(x_j) = 0 if j /= i. Then
+###
+### ----
+### p(x) = > l_i(x) y_i
+### ----
+### 0 <= i < t
+###
+### To see this, it suffices to note that (a) l_i(x) (and therefore the sum)
+### is a degree-(t - 1) polynomial, and (b) it agrees with p(x) at t distinct
+### points, namely each of the x_i. So, with t shares, we can recover the
+### original polynomial, and hence p(0) = s. With t - 1 shares, we can add a
+### fake `share' (0, s') for any guessed s' in K and interpolate
+### corresponding values for the remaining shares; we therefore have perfect
+### secrecy.
+###
+### We use K = GF(2^8), as implemented above. Each byte of a long secret is
+### shared independently.
+###
+### All of this machinery is the same as in my Catacomb library and Daniel
+### Silverstone's `gfshare' package.
+
+class SecretSplit (object):
+ """
+ Create shares of a secret.
+
+ Initialize with a secret S and a threshold T; then issue shares with the
+ `issue' method.
+ """
+
+ def __init__(me, s, t):
+ """
+ Initialize secret sharing.
+
+ S is a string containing the secret; T is the threshold, i.e., the number
+ of shares requierd to recover the secret.
+ """
+
+ ## The number of bytes in the secret is a useful thing to store.
+ me._n = len(s)
+
+ ## Build an array of coefficients chosen at random.
+ me._coeffs = [OS.urandom(me._n) for i in xrange(t - 1)] + [s]
+
+ def issue(me, x):
+ """
+ Issue the share with index X, returning it as a string.
+
+ The share will be binary, even if the original secret is plain text. The
+ index must be in the range 1 <= X <= 255.
+ """
+
+ ## Check that the index is in range.
+ if x < 1 or x > 255:
+ raise ValueError, 'share index out of range'
+
+ ## Evaluate the polynomial at the chosen index.
+ o = StringIO()
+ for i in xrange(me._n):
+ y = 0
+ for j in me._coeffs:
+ y = gf28_mul(x, y) ^ ord(j[i])
+ o.write(chr(y))
+ return o.getvalue()
+
+class SecretJoin (object):
+ """
+ Recover a secret given a number of shares.
+ """
+ def __init__(me):
+ me._n = None
+ me._m = {}
+
+ def insert(me, x, p):
+ """
+ Insert a share P with index X.
+
+ All the shares must have the same length. It doesn't matter what the
+ length of the first share is, but the others have to match it.
+ """
+
+ ## Check that the share index is in range.
+ if x < 1 or x > 255:
+ raise ValueError, 'share index out of range'
+
+ ## Make sure that we haven't been given this one already.
+ if x in me._m:
+ raise ValueError, 'duplicate share index'
+
+ ## Check the length of the share. Store the length if this is the first
+ ## share.
+ if me._n is None:
+ me._n = len(p)
+ elif len(p) != me._n:
+ raise ValueError, 'bad share length'
+
+ ## Store the share.
+ me._m[x] = p
+
+ def recover(me):
+ """
+ Recover the secret from the shares provided, and return it as a string.
+
+ It's assumed that the caller provided the correct number of shares.
+ """
+
+ ## Iniitialize a map of l_i(0) values. These depend only on the share
+ ## indices provided, and are therefore independent of the secret length.
+ ll = {}
+ kk = me._m.keys()
+ for xi in kk:
+ l = 1
+ for xj in kk:
+ if xi != xj:
+ l = gf28_mul(l, gf28_div(xj, xi ^ xj))
+ ll[xi] = l
+
+ ## Now do the interpolation.
+ o = StringIO()
+ for i in xrange(me._n):
+ y = 0
+ for xi in kk:
+ y = y ^ gf28_mul(ll[xi], ord(me._m[xi][i]))
+ o.write(chr(y))
+ return o.getvalue()
+
+###--------------------------------------------------------------------------
+### Key/value encoding.
+###
+### We use a simple encoding for the data structures we handle. The encoding
+### looks like this:
+###
+### LABEL:KEY=VALUE;KEY=VALUE;...
+###
+### The LABEL just says what kind of structure this is meant to be. The KEYs
+### are short strings which identify the structure slots. We're doing this
+### on the cheap, so order is significant, and there are no creature comforts
+### like optional slots. The VALUEs are encodings of the slot values; the
+### details of the encoding depend on the VALUE type, which is statically
+### determined by the structure.
+
+class StructureSlot (object):
+ """
+ Base class for structure slot types.
+
+ Subclasses must implement `encode' and `decode' methods.
+ """
+ def __init__(me, name):
+ """Initialize a structure slot, remembering its name."""
+ me._name = name
+
+class IntegerSlot (StructureSlot):
+ """
+ An integer slot.
+ """
+ TYPE = 'integer'
+ def encode(me, value):
+ return '%d' % value
+ def decode(me, string):
+ return int(string)
+
+class StringSlot (StructureSlot):
+ """
+ A string slot, unencoded. Be careful.
+ """
+ TYPE = 'string'
+ def encode(me, value):
+ return value
+ def decode(me, string):
+ return string
+
+class BinarySlot (StructureSlot):
+ """
+ A binary slot, encoded as Base64.
+ """
+ TYPE = 'base64'
+ def encode(me, value):
+ return B.b64encode(value)
+ def decode(me, string):
+ return B.b64decode(string)
+
+class StructureType (object):
+ """
+ Base class for classes which can encode and decode their slots.
+ """
+
+ def __init__(me, *args, **kw):
+ """
+ Initialize a structure object.
+
+ The arguments correspond to slots. Positional arguments are read first,
+ and stored in the appropriate attributes. Keywords arguments are used to
+ set arbitrary attributes, but it is an error to set an attribute which
+ already has a value.
+
+ The `decoded' method is called after argument processing.
+ """
+
+ _magic = False
+
+ ## Decode the arguments.
+ if len(args) > len(me.SLOTS):
+ raise ValueError, 'too many positional arguments'
+ for i in xrange(len(args)):
+ slotdef, attr = me.SLOTS[i]
+ setattr(me, attr, args[i])
+ for k, v in kw.iteritems():
+ if k == '_magic':
+ _magic = v
+ continue
+ if hasattr(me, k):
+ raise ValueError, 'already set attribute'
+ setattr(me, k, v)
+
+ ## Initialize the object. (There's a hack here for the `decode' method.)
+ if not _magic:
+ me.decoded()
+
+ def encode(me):
+ """
+ Encode an object as a string.
+ """
+ o = StringIO()
+ o.write(me.LABEL)
+ o.write(':')
+ sep = ''
+ for slotdef, attr in me.SLOTS:
+ o.write(sep)
+ o.write('%s=%s' % (slotdef._name, slotdef.encode(getattr(me, attr))))
+ sep = ';'
+ return o.getvalue()
+
+ def save(me, path):
+ """
+ Write an encoded description of the object to PATH.
+ """
+ with open(path + '.new', 'w') as f:
+ f.write(me.encode())
+ f.write('\n')
+ OS.rename(path + '.new', path)
+
+ @classmethod
+ def decode(cls, string):
+ """
+ Decode a STRING, returning a new object.
+
+ The `decoded' method is called on the new object.
+ """
+ if not string.startswith(cls.LABEL + ':'):
+ raise SyntaxError, 'incorrect label'
+ slots = string[len(cls.LABEL) + 1:].split(';')
+ if len(slots) != len(cls.SLOTS):
+ raise SyntaxError, 'wrong number of slots'
+ obj = cls(_magic = True)
+ for i in xrange(len(slots)):
+ slot = slots[i]
+ slotdef, attr = cls.SLOTS[i]
+ if not slot.startswith(slotdef._name + '='):
+ raise SyntaxError, 'incorrect slot name'
+ try:
+ val = slotdef.decode(slot[len(slotdef._name) + 1:])
+ except (ValueError, SyntaxError):
+ raise SyntaxError, 'invalid %s value' % slotdef.TYPE
+ setattr(obj, attr, val)
+ obj.decoded()
+ return obj
+
+ @classmethod
+ def load(cls, path):
+ """
+ Read an encoded description of a structure and return the decoded object.
+ """
+ with open(path, 'r') as f:
+ cont = f.read()
+ if cont and cont[-1] == '\n':
+ cont = cont[:-1]
+ if '\n' in cont:
+ raise SyntaxError, 'encoding must be a single line'
+ return cls.decode(cont)
+
+ def decoded(me):
+ """
+ Hook for object initialization.
+
+ This is called by `__init__' and `decode'.
+ """
+ pass
+
+###--------------------------------------------------------------------------
+### Higher-level share management.
+###
+### We store shares together with a bunch of metadata about the sharing
+### parameters, and a hash of all of that together with the final secret.
+### Theoretically, storing a hash weakens the whole scheme from providing
+### unconditional security to merely providing computational security (based
+### on the assupmption that the hash function is one-way). In practice, the
+### secret will be used as a symmetric encryption key or similar, so it's
+### already only secure against computationally bounded adversaries.
+
+class Secret (StructureType):
+ """
+ Represents a secret, either about to be shared or recently recovered.
+ """
+ LABEL = 'shamir-secret'
+ SLOTS = [(IntegerSlot('n'), 'count'),
+ (IntegerSlot('t'), 'thresh'),
+ (BinarySlot('s'), 'secret')]
+
+ def hash(me, hashfn):
+ """
+ Hash the secret and its parameters using the hash function HASHFN.
+
+ The hash is returned as a raw binary string. The HASHFN must be one of
+ the hash functions known to Python's `hashlib' module (essentially, hash
+ functions known to OpenSSL).
+ """
+ h = H.new(hashfn)
+ h.update(me.encode())
+ return h.digest()
+
+class Params (StructureType):
+ """
+ Represents secret sharing parameters.
+ """
+ LABEL = 'shamir-params'
+ SLOTS = [(IntegerSlot('n'), 'count'),
+ (IntegerSlot('t'), 'thresh'),
+ (StringSlot('f'), 'hashfn'),
+ (BinarySlot('h'), 'hash')]
+
+class Share (StructureType):
+ """
+ Represents a share of a secret.
+ """
+ LABEL = 'shamir-share'
+ SLOTS = [(IntegerSlot('i'), 'index'),
+ (BinarySlot('y'), 'share')]
+
+###--------------------------------------------------------------------------
+### Utilities.
+
+QUIS = OS.path.basename(SYS.argv[0])
+
+def moan(message):
+ SYS.stderr.write('%s: %s\n' % (QUIS, message))
+
+def die(message, rc = 1):
+ moan(message)
+ SYS.exit(rc)
+
+class struct (object):
+ def __init__(me, **kw):
+ me.__dict__.update(kw)
+
+class Subcommand (struct):
+ pass
+
+class SubcommandOptionParser (OPT.OptionParser, object):
+
+ def __init__(me, usage = '%prog [-OPTIONS] COMMAND [ARGS ...]',
+ *args, **kw):
+ super(SubcommandOptionParser, me).__init__(usage = usage, *args, **kw)
+ me.subcommands = [
+ Subcommand(name = 'help', usage = '[COMMAND ...]', opts = [],
+ func = me.cmd_help,
+ desc = 'Show help for %prog, or for the COMMANDs.')
+ ]
+ me.disable_interspersed_args()
+
+ def add_subcommand(me, name, func, usage, desc = None, opts = []):
+ subcmd = Subcommand(name = name, func = func, usage = usage,
+ desc = desc, opts = opts)
+ me.subcommands.append(subcmd)
+
+ def lookup_subcommand(me, name):
+ match = []
+ for sub in me.subcommands:
+ if sub.name == name:
+ return sub
+ elif sub.name.startswith(name):
+ match.append(sub)
+ if len(match) == 1:
+ return match[0]
+ elif len(match) == 0:
+ me.error("unknown command `%s'"% name)
+ else:
+ me.error("ambiguous command `%s': could be any of %s"
+ % (name, ', '.join(["`%s'" % sub.name for sub in match])))
+
+ def subcmd_usage(me, sub):
+ if sub.usage:
+ return '%s %s' % (sub.name, sub.usage)
+ else:
+ return sub.name
+
+ def print_help(me):
+ super(SubcommandOptionParser, me).print_help()
+ print
+ print 'Commands:'
+ for sub in me.subcommands:
+ if sub.usage is None:
+ continue
+ print ' ' + me.subcmd_usage(sub)
+
+ def make_subcommand_optparser(me, sub):
+ op = OPT.OptionParser(usage = 'usage: %%prog %s' % me.subcmd_usage(sub),
+ description = sub.desc)
+ for opt in sub.opts:
+ op.add_option(opt)
+ return op
+
+ def cmd_help(me, gopts, opts, args):
+ if not args:
+ me.print_help()
+ else:
+ for name in args:
+ sub = me.lookup_subcommand(name)
+ op = me.make_subcommand_optparser(sub)
+ op.print_help()
+
+ def dispatch(me, args = None):
+ if args is None:
+ args = SYS.argv[1:]
+ gopts, args = me.parse_args(args)
+ if not args:
+ me.error('missing command')
+ sub = me.lookup_subcommand(args[0])
+ op = me.make_subcommand_optparser(sub)
+ opts, args = op.parse_args(args[1:])
+ sub.func(gopts, opts, args)
+
+OPTPARSE = SubcommandOptionParser(description = """\
+Split and recombine secrets using Shamir's secret sharing system.
+""")
+
+###--------------------------------------------------------------------------
+### Issue shares given a secret.
+
+def parse_sharing(string):
+ slash = string.find('/')
+ if slash == -1:
+ raise ValueError, "no `/'"
+ return int(string[slash + 1:]), int(string[:slash])
+
+def cmd_issue(gopts, opts, args):
+
+ if len(args) < 1 or len(args) > 2:
+ OPTPARSE.error('bad arguments')
+
+ try:
+ count, thresh = parse_sharing(args[0])
+ except ValueError, err:
+ OPTPARSE.error("bad sharing parameters `%s': %s"
+ % (args[0], err.message))
+
+ if len(args) < 2 or args[1] == '-':
+ sec = SYS.stdin.read()
+ else:
+ with open(args[1], 'rb') as f:
+ sec = f.read()
+
+ secret = Secret(count, thresh, sec)
+ h = secret.hash(opts.hashfn)
+ params = Params(count, thresh, opts.hashfn, h)
+ print params.encode()
+
+ ss = SecretSplit(sec, thresh)
+ for i in xrange(count):
+ share = Share(i, ss.issue(i + 1))
+ print share.encode()
+
+OPTPARSE.add_subcommand(
+ 'issue', cmd_issue,
+ '[-H HASHFN] THRESH/COUNT [SECRET-FILE]',
+ """\
+Issue shares of a secret. The secret (as a binary lump) is read from
+SECRET-FILE, or standard input if SECRET-FILE is omitted. COUNT + 1 lines
+are written to standard output: the first line contains the sharing
+parameters, and does not need to be kept especially secret (though it
+contains a hash of the secret, so it compromises the unconditional security
+of the secret sharing scheme); the remaining lines are the shares, one per
+line. All of the lines are plain text.""",
+ [OPT.make_option('-H', '--hash-function', type = 'string',
+ action = 'store', dest = 'hashfn', default = 'sha256',
+ help = 'Hash function for verifying the share.')])
+
+###--------------------------------------------------------------------------
+### Recover a secret from shares.
+
+def cmd_recover(gopts, opts, args):
+
+ ## Keep track of how well we've done so far. Report all of the things
+ ## which go wrong.
+ lose = False
+ lineno = 1
+
+ ## Read the system parameters.
+ line = SYS.stdin.readline()
+ if line.endswith('\n'):
+ line = line[:-1]
+ try:
+ params = Params.decode(line)
+ except ValueError, err:
+ moan("parameters (line %d) invalid: %s" % (lineno, err.message))
+ lose = True
+
+ ## Now read the shares. Pay attention to syntax errors, but not to much
+ ## else. Build the maps for the sharing parameters.
+ sj = SecretJoin()
+ shmap = {}
+ for line in SYS.stdin:
+ lineno += 1
+ if line.endswith('\n'):
+ line = line[:-1]
+ try:
+ share = Share.decode(line)
+ except ValueError, err:
+ moan("share invalid (line %d): %s" % (lineno, err.message))
+ lose = True
+ continue
+ share.lineno = lineno
+ if share.index in shmap:
+ moan("duplicate share index "
+ "(line %d, index %d previously on line %d)" %
+ (lineno, share.index, shmap[share.index].lineno))
+ lose = True
+ elif share.index < 0 or share.index >= params.count:
+ moan("share index %d out of range (line %d)" % (share.index, lineno))
+ lose = True
+ elif len(shmap) >= params.thresh:
+ moan("enough shares already (line %d)" % lineno)
+ lose = True
+ else:
+ sj.insert(share.index + 1, share.share)
+ shmap[share.index] = share
+
+ if len(shmap) < params.thresh:
+ moan("not enough shares")
+ lose = True
+
+ if lose:
+ SYS.exit(1)
+
+ ## Reassemble the secret.
+ secret = Secret(params.count, params.thresh, sj.recover())
+
+ if params.hash != secret.hash(params.hashfn):
+ die('secret doesn\'t match hash')
+
+ if opts.file == '-':
+ SYS.stdout.write(secret.secret)
+ else:
+ with open(opts.file, 'wb') as f:
+ f.write(secret.secret)
+
+OPTPARSE.add_subcommand(
+ 'recover', cmd_recover,
+ '',
+ """\
+Standard input should contain a number of lines. The first line should
+contain the secret sharing parameters (the first line output by `issue');
+the remaining lines should contain shares. The shares may be supplied in
+any order. The secret is written to the OUTPUT file, or standard output.""",
+ [OPT.make_option('-o', '--output', action = 'store', type = 'string',
+ dest = 'file', default = '-',
+ help = 'Write the secret to FILE.')])
+
+###----- That's all, folks --------------------------------------------------
+
+if __name__ == '__main__':
+ OPTPARSE.dispatch()