3 ### Shamir secret sharing
5 ### (c) 2011 Mark Wooding
8 ###----- Licensing notice ---------------------------------------------------
10 ### This program is free software; you can redistribute it and/or modify
11 ### it under the terms of the GNU General Public License as published by
12 ### the Free Software Foundation; either version 2 of the License, or
13 ### (at your option) any later version.
15 ### This program is distributed in the hope that it will be useful,
16 ### but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ### GNU General Public License for more details.
20 ### You should have received a copy of the GNU General Public License
21 ### along with this program; if not, write to the Free Software Foundation,
22 ### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
24 from __future__ import with_statement
30 from cStringIO import StringIO
31 import optparse as OPT
33 ###--------------------------------------------------------------------------
34 ### Arithmetic in GF(2^8).
36 ### These functions implement arithmetic in a field containing exactly 256
37 ### elements. This is convenient, because there's exactly one field element
38 ### for each one-byte value.
40 ### Actually, we only implement multiplication and division, because addition
41 ### and subtraction are trivial -- they're the XOR operation that Python
42 ### already has. Multiplication and (especially) division are fiddly if you
43 ### do them directly, but the field is easily small enough that log tables
44 ### are a feasible alternative.
46 ### There are many (isomorphic) representations of the field GF(2^8). The
47 ### one we use here is a field of residue classes of binary polynomials:
48 ### GF(2)[x]/(x^8 + x^4 + x^3 + x^2 + 1). Usefully, the element [x] (i.e.,
49 ### the residue class containing x) generates the multiplicative group of the
50 ### field: every nonzero element can be written in the form x^k for some
51 ### integer 0 <= k < 255. Multiplication by x is especially easy, so this is
52 ### a good choice as a base of logarithms.
54 ### More precisely, we use an integer encoding of the residue classes. Each
55 ### class contains exactly one polynomial of degree less than 8, called the
56 ### principal representative. If the principal representative of a residue
57 ### class is a_7 x^7 + ... + a_1 x + a_0, with each a_i either 0 or 1, then
58 ### the encoding of the representative is 2^7 a_7 + ... + 2 a_1 + a_0. It is
59 ### easily verified that this mapping is invertible.
61 def gf28_log_tables():
62 """Build the log tables used to implement GF(2^8) arithmetic."""
63 global GF28_LOG, GF28_EXP
74 GF28_EXP = exp[:255] * 2
78 """Multiply two elements X and Y of GF(2^8), retturning their product."""
79 if x == 0 or y == 0: return 0
80 else: return GF28_EXP[GF28_LOG[x] + GF28_LOG[y]]
84 Divide element X by Y in GF(2^8), returning the quotient.
86 Raise `ZeroDivisionError' if Y is zero.
88 if y == 0: raise ZeroDivisionError
90 else: return GF28_EXP[GF28_LOG[x] + 255 - GF28_LOG[y]]
92 ###--------------------------------------------------------------------------
93 ### Secret sharing machinery.
95 ### Shamir's secret sharing scheme uses polynomial interpolation. Let K be a
96 ### finite field, and let s -- the `secret' -- be an element of K. Let t --
97 ### the `threshold' -- be a positive integer. A degree-(t - 1) polynomial
98 ### p(x) over K has the general form a_{t-1} x^{t-1} + ... + a_1 x + a_0; we
99 ### then have p(0) = a_0. If we choose a_0 = s, and the other a_i uniformly
100 ### at random from K, then we have chosen a degree-(t - 1) polynomial
101 ### uniformly at random, subject to the restrictiion that it evaluates to s
102 ### at 0. We can issue shares of the form (x, p(x)) for 0 < x < #K.
104 ### Any collection of t shares can be used to reconstruct the polynomial. We
105 ### can use Lagrange interpolation to do this. Suppose that we're given t
106 ### different shares (x_i, y_i) for 0 <= i < t. Firstly, set
109 ### l_i(x) = | | ---------
114 ### Now notice that l_i(x_i) = 1 and l_i(x_j) = 0 if j /= i. Then
117 ### p(x) = > l_i(x) y_i
121 ### To see this, it suffices to note that (a) l_i(x) (and therefore the sum)
122 ### is a degree-(t - 1) polynomial, and (b) it agrees with p(x) at t distinct
123 ### points, namely each of the x_i. So, with t shares, we can recover the
124 ### original polynomial, and hence p(0) = s. With t - 1 shares, we can add a
125 ### fake `share' (0, s') for any guessed s' in K and interpolate
126 ### corresponding values for the remaining shares; we therefore have perfect
129 ### We use K = GF(2^8), as implemented above. Each byte of a long secret is
130 ### shared independently.
132 ### All of this machinery is the same as in my Catacomb library and Daniel
133 ### Silverstone's `gfshare' package.
135 class SecretSplit (object):
137 Create shares of a secret.
139 Initialize with a secret S and a threshold T; then issue shares with the
143 def __init__(me, s, t):
145 Initialize secret sharing.
147 S is a string containing the secret; T is the threshold, i.e., the number
148 of shares requierd to recover the secret.
151 ## The number of bytes in the secret is a useful thing to store.
154 ## Build an array of coefficients chosen at random.
155 me._coeffs = [OS.urandom(me._n) for i in xrange(t - 1)] + [s]
159 Issue the share with index X, returning it as a string.
161 The share will be binary, even if the original secret is plain text. The
162 index must be in the range 1 <= X <= 255.
165 ## Check that the index is in range.
167 raise ValueError, 'share index out of range'
169 ## Evaluate the polynomial at the chosen index.
171 for i in xrange(me._n):
174 y = gf28_mul(x, y) ^ ord(j[i])
178 class SecretJoin (object):
180 Recover a secret given a number of shares.
186 def insert(me, x, p):
188 Insert a share P with index X.
190 All the shares must have the same length. It doesn't matter what the
191 length of the first share is, but the others have to match it.
194 ## Check that the share index is in range.
196 raise ValueError, 'share index out of range'
198 ## Make sure that we haven't been given this one already.
200 raise ValueError, 'duplicate share index'
202 ## Check the length of the share. Store the length if this is the first
206 elif len(p) != me._n:
207 raise ValueError, 'bad share length'
214 Recover the secret from the shares provided, and return it as a string.
216 It's assumed that the caller provided the correct number of shares.
219 ## Iniitialize a map of l_i(0) values. These depend only on the share
220 ## indices provided, and are therefore independent of the secret length.
227 l = gf28_mul(l, gf28_div(xj, xi ^ xj))
230 ## Now do the interpolation.
232 for i in xrange(me._n):
235 y = y ^ gf28_mul(ll[xi], ord(me._m[xi][i]))
239 ###--------------------------------------------------------------------------
240 ### Key/value encoding.
242 ### We use a simple encoding for the data structures we handle. The encoding
245 ### LABEL:KEY=VALUE;KEY=VALUE;...
247 ### The LABEL just says what kind of structure this is meant to be. The KEYs
248 ### are short strings which identify the structure slots. We're doing this
249 ### on the cheap, so order is significant, and there are no creature comforts
250 ### like optional slots. The VALUEs are encodings of the slot values; the
251 ### details of the encoding depend on the VALUE type, which is statically
252 ### determined by the structure.
254 class StructureSlot (object):
256 Base class for structure slot types.
258 Subclasses must implement `encode' and `decode' methods.
260 def __init__(me, name):
261 """Initialize a structure slot, remembering its name."""
264 class IntegerSlot (StructureSlot):
269 def encode(me, value):
271 def decode(me, string):
274 class StringSlot (StructureSlot):
276 A string slot, unencoded. Be careful.
279 def encode(me, value):
281 def decode(me, string):
284 class BinarySlot (StructureSlot):
286 A binary slot, encoded as Base64.
289 def encode(me, value):
290 return B.b64encode(value)
291 def decode(me, string):
292 return B.b64decode(string)
294 class StructureType (object):
296 Base class for classes which can encode and decode their slots.
299 def __init__(me, *args, **kw):
301 Initialize a structure object.
303 The arguments correspond to slots. Positional arguments are read first,
304 and stored in the appropriate attributes. Keywords arguments are used to
305 set arbitrary attributes, but it is an error to set an attribute which
308 The `decoded' method is called after argument processing.
313 ## Decode the arguments.
314 if len(args) > len(me.SLOTS):
315 raise ValueError, 'too many positional arguments'
316 for i in xrange(len(args)):
317 slotdef, attr = me.SLOTS[i]
318 setattr(me, attr, args[i])
319 for k, v in kw.iteritems():
324 raise ValueError, 'already set attribute'
327 ## Initialize the object. (There's a hack here for the `decode' method.)
333 Encode an object as a string.
339 for slotdef, attr in me.SLOTS:
341 o.write('%s=%s' % (slotdef._name, slotdef.encode(getattr(me, attr))))
347 Write an encoded description of the object to PATH.
349 with open(path + '.new', 'w') as f:
352 OS.rename(path + '.new', path)
355 def decode(cls, string):
357 Decode a STRING, returning a new object.
359 The `decoded' method is called on the new object.
361 if not string.startswith(cls.LABEL + ':'):
362 raise SyntaxError, 'incorrect label'
363 slots = string[len(cls.LABEL) + 1:].split(';')
364 if len(slots) != len(cls.SLOTS):
365 raise SyntaxError, 'wrong number of slots'
366 obj = cls(_magic = True)
367 for i in xrange(len(slots)):
369 slotdef, attr = cls.SLOTS[i]
370 if not slot.startswith(slotdef._name + '='):
371 raise SyntaxError, 'incorrect slot name'
373 val = slotdef.decode(slot[len(slotdef._name) + 1:])
374 except (ValueError, SyntaxError):
375 raise SyntaxError, 'invalid %s value' % slotdef.TYPE
376 setattr(obj, attr, val)
383 Read an encoded description of a structure and return the decoded object.
385 with open(path, 'r') as f:
387 if cont and cont[-1] == '\n':
390 raise SyntaxError, 'encoding must be a single line'
391 return cls.decode(cont)
395 Hook for object initialization.
397 This is called by `__init__' and `decode'.
401 ###--------------------------------------------------------------------------
402 ### Higher-level share management.
404 ### We store shares together with a bunch of metadata about the sharing
405 ### parameters, and a hash of all of that together with the final secret.
406 ### Theoretically, storing a hash weakens the whole scheme from providing
407 ### unconditional security to merely providing computational security (based
408 ### on the assupmption that the hash function is one-way). In practice, the
409 ### secret will be used as a symmetric encryption key or similar, so it's
410 ### already only secure against computationally bounded adversaries.
412 class Secret (StructureType):
414 Represents a secret, either about to be shared or recently recovered.
416 LABEL = 'shamir-secret'
417 SLOTS = [(IntegerSlot('n'), 'count'),
418 (IntegerSlot('t'), 'thresh'),
419 (BinarySlot('s'), 'secret')]
421 def hash(me, hashfn):
423 Hash the secret and its parameters using the hash function HASHFN.
425 The hash is returned as a raw binary string. The HASHFN must be one of
426 the hash functions known to Python's `hashlib' module (essentially, hash
427 functions known to OpenSSL).
430 h.update(me.encode())
433 class Params (StructureType):
435 Represents secret sharing parameters.
437 LABEL = 'shamir-params'
438 SLOTS = [(IntegerSlot('n'), 'count'),
439 (IntegerSlot('t'), 'thresh'),
440 (StringSlot('f'), 'hashfn'),
441 (BinarySlot('h'), 'hash')]
443 class Share (StructureType):
445 Represents a share of a secret.
447 LABEL = 'shamir-share'
448 SLOTS = [(IntegerSlot('i'), 'index'),
449 (BinarySlot('y'), 'share')]
451 ###--------------------------------------------------------------------------
454 QUIS = OS.path.basename(SYS.argv[0])
457 SYS.stderr.write('%s: %s\n' % (QUIS, message))
459 def die(message, rc = 1):
463 class struct (object):
464 def __init__(me, **kw):
465 me.__dict__.update(kw)
467 class Subcommand (struct):
470 class SubcommandOptionParser (OPT.OptionParser, object):
472 def __init__(me, usage = '%prog [-OPTIONS] COMMAND [ARGS ...]',
474 super(SubcommandOptionParser, me).__init__(usage = usage, *args, **kw)
476 Subcommand(name = 'help', usage = '[COMMAND ...]', opts = [],
478 desc = 'Show help for %prog, or for the COMMANDs.')
480 me.disable_interspersed_args()
482 def add_subcommand(me, name, func, usage, desc = None, opts = []):
483 subcmd = Subcommand(name = name, func = func, usage = usage,
484 desc = desc, opts = opts)
485 me.subcommands.append(subcmd)
487 def lookup_subcommand(me, name):
489 for sub in me.subcommands:
492 elif sub.name.startswith(name):
496 elif len(match) == 0:
497 me.error("unknown command `%s'"% name)
499 me.error("ambiguous command `%s': could be any of %s"
500 % (name, ', '.join(["`%s'" % sub.name for sub in match])))
502 def subcmd_usage(me, sub):
504 return '%s %s' % (sub.name, sub.usage)
509 super(SubcommandOptionParser, me).print_help()
512 for sub in me.subcommands:
513 if sub.usage is None:
515 print ' ' + me.subcmd_usage(sub)
517 def make_subcommand_optparser(me, sub):
518 op = OPT.OptionParser(usage = 'usage: %%prog %s' % me.subcmd_usage(sub),
519 description = sub.desc)
524 def cmd_help(me, gopts, opts, args):
529 sub = me.lookup_subcommand(name)
530 op = me.make_subcommand_optparser(sub)
533 def dispatch(me, args = None):
536 gopts, args = me.parse_args(args)
538 me.error('missing command')
539 sub = me.lookup_subcommand(args[0])
540 op = me.make_subcommand_optparser(sub)
541 opts, args = op.parse_args(args[1:])
542 sub.func(gopts, opts, args)
544 OPTPARSE = SubcommandOptionParser(description = """\
545 Split and recombine secrets using Shamir's secret sharing system.
548 ###--------------------------------------------------------------------------
549 ### Issue shares given a secret.
551 def parse_sharing(string):
552 slash = string.find('/')
554 raise ValueError, "no `/'"
555 return int(string[slash + 1:]), int(string[:slash])
557 def cmd_issue(gopts, opts, args):
559 if len(args) < 1 or len(args) > 2:
560 OPTPARSE.error('bad arguments')
563 count, thresh = parse_sharing(args[0])
564 except ValueError, err:
565 OPTPARSE.error("bad sharing parameters `%s': %s"
566 % (args[0], err.message))
568 if len(args) < 2 or args[1] == '-':
569 sec = SYS.stdin.read()
571 with open(args[1], 'rb') as f:
574 secret = Secret(count, thresh, sec)
575 h = secret.hash(opts.hashfn)
576 params = Params(count, thresh, opts.hashfn, h)
577 print params.encode()
579 ss = SecretSplit(sec, thresh)
580 for i in xrange(count):
581 share = Share(i, ss.issue(i + 1))
584 OPTPARSE.add_subcommand(
586 '[-H HASHFN] THRESH/COUNT [SECRET-FILE]',
588 Issue shares of a secret. The secret (as a binary lump) is read from
589 SECRET-FILE, or standard input if SECRET-FILE is omitted. COUNT + 1 lines
590 are written to standard output: the first line contains the sharing
591 parameters, and does not need to be kept especially secret (though it
592 contains a hash of the secret, so it compromises the unconditional security
593 of the secret sharing scheme); the remaining lines are the shares, one per
594 line. All of the lines are plain text.""",
595 [OPT.make_option('-H', '--hash-function', type = 'string',
596 action = 'store', dest = 'hashfn', default = 'sha256',
597 help = 'Hash function for verifying the share.')])
599 ###--------------------------------------------------------------------------
600 ### Recover a secret from shares.
602 def cmd_recover(gopts, opts, args):
604 ## Keep track of how well we've done so far. Report all of the things
609 ## Read the system parameters.
610 line = SYS.stdin.readline()
611 if line.endswith('\n'):
614 params = Params.decode(line)
615 except ValueError, err:
616 moan("parameters (line %d) invalid: %s" % (lineno, err.message))
619 ## Now read the shares. Pay attention to syntax errors, but not to much
620 ## else. Build the maps for the sharing parameters.
623 for line in SYS.stdin:
625 if line.endswith('\n'):
628 share = Share.decode(line)
629 except ValueError, err:
630 moan("share invalid (line %d): %s" % (lineno, err.message))
633 share.lineno = lineno
634 if share.index in shmap:
635 moan("duplicate share index "
636 "(line %d, index %d previously on line %d)" %
637 (lineno, share.index, shmap[share.index].lineno))
639 elif share.index < 0 or share.index >= params.count:
640 moan("share index %d out of range (line %d)" % (share.index, lineno))
642 elif len(shmap) >= params.thresh:
643 moan("enough shares already (line %d)" % lineno)
646 sj.insert(share.index + 1, share.share)
647 shmap[share.index] = share
649 if len(shmap) < params.thresh:
650 moan("not enough shares")
656 ## Reassemble the secret.
657 secret = Secret(params.count, params.thresh, sj.recover())
659 if params.hash != secret.hash(params.hashfn):
660 die('secret doesn\'t match hash')
663 SYS.stdout.write(secret.secret)
665 with open(opts.file, 'wb') as f:
666 f.write(secret.secret)
668 OPTPARSE.add_subcommand(
669 'recover', cmd_recover,
672 Standard input should contain a number of lines. The first line should
673 contain the secret sharing parameters (the first line output by `issue');
674 the remaining lines should contain shares. The shares may be supplied in
675 any order. The secret is written to the OUTPUT file, or standard output.""",
676 [OPT.make_option('-o', '--output', action = 'store', type = 'string',
677 dest = 'file', default = '-',
678 help = 'Write the secret to FILE.')])
680 ###----- That's all, folks --------------------------------------------------
682 if __name__ == '__main__':