chiark / gitweb /
Makefile.am: Distinctive `SUBST' indicator for `confsubst' rules.
[distorted-keys] / shamir.in
CommitLineData
53263601
MW
1#! @PYTHON@
2###
3### Shamir secret sharing
4###
5### (c) 2011 Mark Wooding
6###
7
8###----- Licensing notice ---------------------------------------------------
9###
599c8f75
MW
10### This file is part of the distorted.org.uk key management suite.
11###
12### distorted-keys is free software; you can redistribute it and/or modify
53263601
MW
13### it under the terms of the GNU General Public License as published by
14### the Free Software Foundation; either version 2 of the License, or
15### (at your option) any later version.
16###
599c8f75 17### distorted-keys is distributed in the hope that it will be useful,
53263601
MW
18### but WITHOUT ANY WARRANTY; without even the implied warranty of
19### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20### GNU General Public License for more details.
21###
22### You should have received a copy of the GNU General Public License
599c8f75 23### along with distorted-keys; if not, write to the Free Software Foundation,
53263601
MW
24### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26from __future__ import with_statement
27
28import sys as SYS
29import os as OS
30import hashlib as H
31import base64 as B
32from cStringIO import StringIO
33import optparse as OPT
34
35###--------------------------------------------------------------------------
36### Arithmetic in GF(2^8).
37###
38### These functions implement arithmetic in a field containing exactly 256
39### elements. This is convenient, because there's exactly one field element
40### for each one-byte value.
41###
42### Actually, we only implement multiplication and division, because addition
43### and subtraction are trivial -- they're the XOR operation that Python
44### already has. Multiplication and (especially) division are fiddly if you
45### do them directly, but the field is easily small enough that log tables
46### are a feasible alternative.
47###
48### There are many (isomorphic) representations of the field GF(2^8). The
49### one we use here is a field of residue classes of binary polynomials:
50### GF(2)[x]/(x^8 + x^4 + x^3 + x^2 + 1). Usefully, the element [x] (i.e.,
51### the residue class containing x) generates the multiplicative group of the
52### field: every nonzero element can be written in the form x^k for some
53### integer 0 <= k < 255. Multiplication by x is especially easy, so this is
54### a good choice as a base of logarithms.
55###
56### More precisely, we use an integer encoding of the residue classes. Each
57### class contains exactly one polynomial of degree less than 8, called the
58### principal representative. If the principal representative of a residue
59### class is a_7 x^7 + ... + a_1 x + a_0, with each a_i either 0 or 1, then
60### the encoding of the representative is 2^7 a_7 + ... + 2 a_1 + a_0. It is
61### easily verified that this mapping is invertible.
62
63def gf28_log_tables():
64 """Build the log tables used to implement GF(2^8) arithmetic."""
65 global GF28_LOG, GF28_EXP
66 log = [0] * 256
67 exp = [0] * 256
68 x = 1
69 for i in xrange(255):
70 exp[i] = x
71 log[x] = i
72 x <<= 1
73 if x & 0x100:
74 x ^= 0x11d
75 GF28_LOG = log
76 GF28_EXP = exp[:255] * 2
77gf28_log_tables()
78
79def gf28_mul(x, y):
80 """Multiply two elements X and Y of GF(2^8), retturning their product."""
81 if x == 0 or y == 0: return 0
82 else: return GF28_EXP[GF28_LOG[x] + GF28_LOG[y]]
83
84def gf28_div(x, y):
85 """
86 Divide element X by Y in GF(2^8), returning the quotient.
87
88 Raise `ZeroDivisionError' if Y is zero.
89 """
90 if y == 0: raise ZeroDivisionError
91 elif x == 0: return 0
92 else: return GF28_EXP[GF28_LOG[x] + 255 - GF28_LOG[y]]
93
94###--------------------------------------------------------------------------
95### Secret sharing machinery.
96###
97### Shamir's secret sharing scheme uses polynomial interpolation. Let K be a
98### finite field, and let s -- the `secret' -- be an element of K. Let t --
99### the `threshold' -- be a positive integer. A degree-(t - 1) polynomial
100### p(x) over K has the general form a_{t-1} x^{t-1} + ... + a_1 x + a_0; we
101### then have p(0) = a_0. If we choose a_0 = s, and the other a_i uniformly
102### at random from K, then we have chosen a degree-(t - 1) polynomial
103### uniformly at random, subject to the restrictiion that it evaluates to s
104### at 0. We can issue shares of the form (x, p(x)) for 0 < x < #K.
105###
106### Any collection of t shares can be used to reconstruct the polynomial. We
107### can use Lagrange interpolation to do this. Suppose that we're given t
108### different shares (x_i, y_i) for 0 <= i < t. Firstly, set
109###
110### ------- x - x_j
111### l_i(x) = | | ---------
112### | | x_i - x_j
113### 0 <= j < t
114### j /= i
115###
116### Now notice that l_i(x_i) = 1 and l_i(x_j) = 0 if j /= i. Then
117###
118### ----
119### p(x) = > l_i(x) y_i
120### ----
121### 0 <= i < t
122###
123### To see this, it suffices to note that (a) l_i(x) (and therefore the sum)
124### is a degree-(t - 1) polynomial, and (b) it agrees with p(x) at t distinct
125### points, namely each of the x_i. So, with t shares, we can recover the
126### original polynomial, and hence p(0) = s. With t - 1 shares, we can add a
127### fake `share' (0, s') for any guessed s' in K and interpolate
128### corresponding values for the remaining shares; we therefore have perfect
129### secrecy.
130###
131### We use K = GF(2^8), as implemented above. Each byte of a long secret is
132### shared independently.
133###
134### All of this machinery is the same as in my Catacomb library and Daniel
135### Silverstone's `gfshare' package.
136
137class SecretSplit (object):
138 """
139 Create shares of a secret.
140
141 Initialize with a secret S and a threshold T; then issue shares with the
142 `issue' method.
143 """
144
145 def __init__(me, s, t):
146 """
147 Initialize secret sharing.
148
149 S is a string containing the secret; T is the threshold, i.e., the number
150 of shares requierd to recover the secret.
151 """
152
153 ## The number of bytes in the secret is a useful thing to store.
154 me._n = len(s)
155
156 ## Build an array of coefficients chosen at random.
157 me._coeffs = [OS.urandom(me._n) for i in xrange(t - 1)] + [s]
158
159 def issue(me, x):
160 """
161 Issue the share with index X, returning it as a string.
162
163 The share will be binary, even if the original secret is plain text. The
164 index must be in the range 1 <= X <= 255.
165 """
166
167 ## Check that the index is in range.
168 if x < 1 or x > 255:
169 raise ValueError, 'share index out of range'
170
171 ## Evaluate the polynomial at the chosen index.
172 o = StringIO()
173 for i in xrange(me._n):
174 y = 0
175 for j in me._coeffs:
176 y = gf28_mul(x, y) ^ ord(j[i])
177 o.write(chr(y))
178 return o.getvalue()
179
180class SecretJoin (object):
181 """
182 Recover a secret given a number of shares.
183 """
184 def __init__(me):
185 me._n = None
186 me._m = {}
187
188 def insert(me, x, p):
189 """
190 Insert a share P with index X.
191
192 All the shares must have the same length. It doesn't matter what the
193 length of the first share is, but the others have to match it.
194 """
195
196 ## Check that the share index is in range.
197 if x < 1 or x > 255:
198 raise ValueError, 'share index out of range'
199
200 ## Make sure that we haven't been given this one already.
201 if x in me._m:
202 raise ValueError, 'duplicate share index'
203
204 ## Check the length of the share. Store the length if this is the first
205 ## share.
206 if me._n is None:
207 me._n = len(p)
208 elif len(p) != me._n:
209 raise ValueError, 'bad share length'
210
211 ## Store the share.
212 me._m[x] = p
213
214 def recover(me):
215 """
216 Recover the secret from the shares provided, and return it as a string.
217
218 It's assumed that the caller provided the correct number of shares.
219 """
220
221 ## Iniitialize a map of l_i(0) values. These depend only on the share
222 ## indices provided, and are therefore independent of the secret length.
223 ll = {}
224 kk = me._m.keys()
225 for xi in kk:
226 l = 1
227 for xj in kk:
228 if xi != xj:
229 l = gf28_mul(l, gf28_div(xj, xi ^ xj))
230 ll[xi] = l
231
232 ## Now do the interpolation.
233 o = StringIO()
234 for i in xrange(me._n):
235 y = 0
236 for xi in kk:
237 y = y ^ gf28_mul(ll[xi], ord(me._m[xi][i]))
238 o.write(chr(y))
239 return o.getvalue()
240
241###--------------------------------------------------------------------------
242### Key/value encoding.
243###
244### We use a simple encoding for the data structures we handle. The encoding
245### looks like this:
246###
247### LABEL:KEY=VALUE;KEY=VALUE;...
248###
249### The LABEL just says what kind of structure this is meant to be. The KEYs
250### are short strings which identify the structure slots. We're doing this
251### on the cheap, so order is significant, and there are no creature comforts
252### like optional slots. The VALUEs are encodings of the slot values; the
253### details of the encoding depend on the VALUE type, which is statically
254### determined by the structure.
255
256class StructureSlot (object):
257 """
258 Base class for structure slot types.
259
260 Subclasses must implement `encode' and `decode' methods.
261 """
262 def __init__(me, name):
263 """Initialize a structure slot, remembering its name."""
264 me._name = name
265
266class IntegerSlot (StructureSlot):
267 """
268 An integer slot.
269 """
270 TYPE = 'integer'
271 def encode(me, value):
272 return '%d' % value
273 def decode(me, string):
274 return int(string)
275
276class StringSlot (StructureSlot):
277 """
278 A string slot, unencoded. Be careful.
279 """
280 TYPE = 'string'
281 def encode(me, value):
282 return value
283 def decode(me, string):
284 return string
285
286class BinarySlot (StructureSlot):
287 """
288 A binary slot, encoded as Base64.
289 """
290 TYPE = 'base64'
291 def encode(me, value):
292 return B.b64encode(value)
293 def decode(me, string):
294 return B.b64decode(string)
295
296class StructureType (object):
297 """
298 Base class for classes which can encode and decode their slots.
299 """
300
301 def __init__(me, *args, **kw):
302 """
303 Initialize a structure object.
304
305 The arguments correspond to slots. Positional arguments are read first,
306 and stored in the appropriate attributes. Keywords arguments are used to
307 set arbitrary attributes, but it is an error to set an attribute which
308 already has a value.
309
310 The `decoded' method is called after argument processing.
311 """
312
313 _magic = False
314
315 ## Decode the arguments.
316 if len(args) > len(me.SLOTS):
317 raise ValueError, 'too many positional arguments'
318 for i in xrange(len(args)):
319 slotdef, attr = me.SLOTS[i]
320 setattr(me, attr, args[i])
321 for k, v in kw.iteritems():
322 if k == '_magic':
323 _magic = v
324 continue
325 if hasattr(me, k):
326 raise ValueError, 'already set attribute'
327 setattr(me, k, v)
328
329 ## Initialize the object. (There's a hack here for the `decode' method.)
330 if not _magic:
331 me.decoded()
332
333 def encode(me):
334 """
335 Encode an object as a string.
336 """
337 o = StringIO()
338 o.write(me.LABEL)
339 o.write(':')
340 sep = ''
341 for slotdef, attr in me.SLOTS:
342 o.write(sep)
343 o.write('%s=%s' % (slotdef._name, slotdef.encode(getattr(me, attr))))
344 sep = ';'
345 return o.getvalue()
346
347 def save(me, path):
348 """
349 Write an encoded description of the object to PATH.
350 """
351 with open(path + '.new', 'w') as f:
352 f.write(me.encode())
353 f.write('\n')
354 OS.rename(path + '.new', path)
355
356 @classmethod
357 def decode(cls, string):
358 """
359 Decode a STRING, returning a new object.
360
361 The `decoded' method is called on the new object.
362 """
363 if not string.startswith(cls.LABEL + ':'):
364 raise SyntaxError, 'incorrect label'
365 slots = string[len(cls.LABEL) + 1:].split(';')
366 if len(slots) != len(cls.SLOTS):
367 raise SyntaxError, 'wrong number of slots'
368 obj = cls(_magic = True)
369 for i in xrange(len(slots)):
370 slot = slots[i]
371 slotdef, attr = cls.SLOTS[i]
372 if not slot.startswith(slotdef._name + '='):
373 raise SyntaxError, 'incorrect slot name'
374 try:
375 val = slotdef.decode(slot[len(slotdef._name) + 1:])
376 except (ValueError, SyntaxError):
377 raise SyntaxError, 'invalid %s value' % slotdef.TYPE
378 setattr(obj, attr, val)
379 obj.decoded()
380 return obj
381
382 @classmethod
383 def load(cls, path):
384 """
385 Read an encoded description of a structure and return the decoded object.
386 """
387 with open(path, 'r') as f:
388 cont = f.read()
389 if cont and cont[-1] == '\n':
390 cont = cont[:-1]
391 if '\n' in cont:
392 raise SyntaxError, 'encoding must be a single line'
393 return cls.decode(cont)
394
395 def decoded(me):
396 """
397 Hook for object initialization.
398
399 This is called by `__init__' and `decode'.
400 """
401 pass
402
403###--------------------------------------------------------------------------
404### Higher-level share management.
405###
406### We store shares together with a bunch of metadata about the sharing
407### parameters, and a hash of all of that together with the final secret.
408### Theoretically, storing a hash weakens the whole scheme from providing
409### unconditional security to merely providing computational security (based
410### on the assupmption that the hash function is one-way). In practice, the
411### secret will be used as a symmetric encryption key or similar, so it's
412### already only secure against computationally bounded adversaries.
413
414class Secret (StructureType):
415 """
416 Represents a secret, either about to be shared or recently recovered.
417 """
418 LABEL = 'shamir-secret'
419 SLOTS = [(IntegerSlot('n'), 'count'),
420 (IntegerSlot('t'), 'thresh'),
421 (BinarySlot('s'), 'secret')]
422
423 def hash(me, hashfn):
424 """
425 Hash the secret and its parameters using the hash function HASHFN.
426
427 The hash is returned as a raw binary string. The HASHFN must be one of
428 the hash functions known to Python's `hashlib' module (essentially, hash
429 functions known to OpenSSL).
430 """
431 h = H.new(hashfn)
432 h.update(me.encode())
433 return h.digest()
434
435class Params (StructureType):
436 """
437 Represents secret sharing parameters.
438 """
439 LABEL = 'shamir-params'
440 SLOTS = [(IntegerSlot('n'), 'count'),
441 (IntegerSlot('t'), 'thresh'),
442 (StringSlot('f'), 'hashfn'),
443 (BinarySlot('h'), 'hash')]
444
445class Share (StructureType):
446 """
447 Represents a share of a secret.
448 """
449 LABEL = 'shamir-share'
450 SLOTS = [(IntegerSlot('i'), 'index'),
451 (BinarySlot('y'), 'share')]
452
453###--------------------------------------------------------------------------
454### Utilities.
455
456QUIS = OS.path.basename(SYS.argv[0])
457
458def moan(message):
459 SYS.stderr.write('%s: %s\n' % (QUIS, message))
460
461def die(message, rc = 1):
462 moan(message)
463 SYS.exit(rc)
464
465class struct (object):
466 def __init__(me, **kw):
467 me.__dict__.update(kw)
468
469class Subcommand (struct):
470 pass
471
472class SubcommandOptionParser (OPT.OptionParser, object):
473
474 def __init__(me, usage = '%prog [-OPTIONS] COMMAND [ARGS ...]',
475 *args, **kw):
476 super(SubcommandOptionParser, me).__init__(usage = usage, *args, **kw)
477 me.subcommands = [
478 Subcommand(name = 'help', usage = '[COMMAND ...]', opts = [],
479 func = me.cmd_help,
480 desc = 'Show help for %prog, or for the COMMANDs.')
481 ]
482 me.disable_interspersed_args()
483
484 def add_subcommand(me, name, func, usage, desc = None, opts = []):
485 subcmd = Subcommand(name = name, func = func, usage = usage,
486 desc = desc, opts = opts)
487 me.subcommands.append(subcmd)
488
489 def lookup_subcommand(me, name):
490 match = []
491 for sub in me.subcommands:
492 if sub.name == name:
493 return sub
494 elif sub.name.startswith(name):
495 match.append(sub)
496 if len(match) == 1:
497 return match[0]
498 elif len(match) == 0:
499 me.error("unknown command `%s'"% name)
500 else:
501 me.error("ambiguous command `%s': could be any of %s"
502 % (name, ', '.join(["`%s'" % sub.name for sub in match])))
503
504 def subcmd_usage(me, sub):
505 if sub.usage:
506 return '%s %s' % (sub.name, sub.usage)
507 else:
508 return sub.name
509
510 def print_help(me):
511 super(SubcommandOptionParser, me).print_help()
512 print
513 print 'Commands:'
514 for sub in me.subcommands:
515 if sub.usage is None:
516 continue
517 print ' ' + me.subcmd_usage(sub)
518
519 def make_subcommand_optparser(me, sub):
520 op = OPT.OptionParser(usage = 'usage: %%prog %s' % me.subcmd_usage(sub),
521 description = sub.desc)
522 for opt in sub.opts:
523 op.add_option(opt)
524 return op
525
526 def cmd_help(me, gopts, opts, args):
527 if not args:
528 me.print_help()
529 else:
530 for name in args:
531 sub = me.lookup_subcommand(name)
532 op = me.make_subcommand_optparser(sub)
533 op.print_help()
534
535 def dispatch(me, args = None):
536 if args is None:
537 args = SYS.argv[1:]
538 gopts, args = me.parse_args(args)
539 if not args:
540 me.error('missing command')
541 sub = me.lookup_subcommand(args[0])
542 op = me.make_subcommand_optparser(sub)
543 opts, args = op.parse_args(args[1:])
544 sub.func(gopts, opts, args)
545
546OPTPARSE = SubcommandOptionParser(description = """\
547Split and recombine secrets using Shamir's secret sharing system.
548""")
549
550###--------------------------------------------------------------------------
551### Issue shares given a secret.
552
553def parse_sharing(string):
554 slash = string.find('/')
555 if slash == -1:
556 raise ValueError, "no `/'"
557 return int(string[slash + 1:]), int(string[:slash])
558
559def cmd_issue(gopts, opts, args):
560
561 if len(args) < 1 or len(args) > 2:
562 OPTPARSE.error('bad arguments')
563
564 try:
565 count, thresh = parse_sharing(args[0])
566 except ValueError, err:
567 OPTPARSE.error("bad sharing parameters `%s': %s"
568 % (args[0], err.message))
569
570 if len(args) < 2 or args[1] == '-':
571 sec = SYS.stdin.read()
572 else:
573 with open(args[1], 'rb') as f:
574 sec = f.read()
575
576 secret = Secret(count, thresh, sec)
577 h = secret.hash(opts.hashfn)
578 params = Params(count, thresh, opts.hashfn, h)
579 print params.encode()
580
581 ss = SecretSplit(sec, thresh)
582 for i in xrange(count):
583 share = Share(i, ss.issue(i + 1))
584 print share.encode()
585
586OPTPARSE.add_subcommand(
587 'issue', cmd_issue,
588 '[-H HASHFN] THRESH/COUNT [SECRET-FILE]',
589 """\
590Issue shares of a secret. The secret (as a binary lump) is read from
591SECRET-FILE, or standard input if SECRET-FILE is omitted. COUNT + 1 lines
592are written to standard output: the first line contains the sharing
593parameters, and does not need to be kept especially secret (though it
594contains a hash of the secret, so it compromises the unconditional security
595of the secret sharing scheme); the remaining lines are the shares, one per
596line. All of the lines are plain text.""",
597 [OPT.make_option('-H', '--hash-function', type = 'string',
598 action = 'store', dest = 'hashfn', default = 'sha256',
599 help = 'Hash function for verifying the share.')])
600
601###--------------------------------------------------------------------------
602### Recover a secret from shares.
603
604def cmd_recover(gopts, opts, args):
605
606 ## Keep track of how well we've done so far. Report all of the things
607 ## which go wrong.
608 lose = False
609 lineno = 1
610
611 ## Read the system parameters.
612 line = SYS.stdin.readline()
613 if line.endswith('\n'):
614 line = line[:-1]
615 try:
616 params = Params.decode(line)
617 except ValueError, err:
618 moan("parameters (line %d) invalid: %s" % (lineno, err.message))
619 lose = True
620
621 ## Now read the shares. Pay attention to syntax errors, but not to much
622 ## else. Build the maps for the sharing parameters.
623 sj = SecretJoin()
624 shmap = {}
625 for line in SYS.stdin:
626 lineno += 1
627 if line.endswith('\n'):
628 line = line[:-1]
629 try:
630 share = Share.decode(line)
631 except ValueError, err:
632 moan("share invalid (line %d): %s" % (lineno, err.message))
633 lose = True
634 continue
635 share.lineno = lineno
636 if share.index in shmap:
637 moan("duplicate share index "
638 "(line %d, index %d previously on line %d)" %
639 (lineno, share.index, shmap[share.index].lineno))
640 lose = True
641 elif share.index < 0 or share.index >= params.count:
642 moan("share index %d out of range (line %d)" % (share.index, lineno))
643 lose = True
644 elif len(shmap) >= params.thresh:
645 moan("enough shares already (line %d)" % lineno)
646 lose = True
647 else:
648 sj.insert(share.index + 1, share.share)
649 shmap[share.index] = share
650
651 if len(shmap) < params.thresh:
652 moan("not enough shares")
653 lose = True
654
655 if lose:
656 SYS.exit(1)
657
658 ## Reassemble the secret.
659 secret = Secret(params.count, params.thresh, sj.recover())
660
661 if params.hash != secret.hash(params.hashfn):
662 die('secret doesn\'t match hash')
663
664 if opts.file == '-':
665 SYS.stdout.write(secret.secret)
666 else:
667 with open(opts.file, 'wb') as f:
668 f.write(secret.secret)
669
670OPTPARSE.add_subcommand(
671 'recover', cmd_recover,
672 '',
673 """\
674Standard input should contain a number of lines. The first line should
675contain the secret sharing parameters (the first line output by `issue');
676the remaining lines should contain shares. The shares may be supplied in
677any order. The secret is written to the OUTPUT file, or standard output.""",
678 [OPT.make_option('-o', '--output', action = 'store', type = 'string',
679 dest = 'file', default = '-',
680 help = 'Write the secret to FILE.')])
681
682###----- That's all, folks --------------------------------------------------
683
684if __name__ == '__main__':
685 OPTPARSE.dispatch()