chiark / gitweb /
disobedience, playrtp: Have `playrtp' handle volume control.
[disorder] / lib / salsa208.c
CommitLineData
ed8e4373
MW
1/* salsa208.c --- The Salsa20/8 stream cipher
2 * Copyright (C) Mark Wooding
3 *
4 * This file is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published
6 * by the Free Software Foundation; either version 2, or (at your
7 * option) any later version.
8 *
9 * This file is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this file; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301, USA.
18 *
19 */
20/** @file lib/salsa208.c
21 * @brief Salsa20/8 stream cipher implementation
22 *
23 * For a description of the algorithm, see:
24 *
25 * Daniel J. Bernstein, `The Salsa20 family of stream ciphers', in Matthew
26 * Robshaw and Olivier Billet (eds.), `New Stream Cipher Designs',
27 * Springer--Verlag 2008, pp. 84--97;
28 * http://cr.yp.to/snuffle/salsafamily-20071225.pdf
29 *
30 * As far as I know, the best attack against all 8 rounds of Salsa20/8 is by
31 * Aumasson, Fischer, Khazaei, Meier, and Rechberger, which takes 2^251
32 * operations to recover a 256-bit key, which is hopelessly impractical.
33 * Much more effective attacks are known against Salsa20/7, so we would have
34 * a tiny security margin if we were trying for security -- but we aren't.
35 * Instead, we want high-quality randomness for queue ids and for selecting
36 * random tracks. (The cookie machinery, which does want cryptographic
37 * security, makes its own arrangements.) Specifically, the intention is to
38 * replace RC4, which (a) is slow because it has a long dependency chain
39 * which plays badly with the deep pipelines in modern CPUs, and (b) has
40 * well-known and rather embarassing biases. On the other hand, Salsa20/8
41 * has no known biases, and admits considerable instruction-level
42 * parallelism. In practice, Salsa20/8 is about 30% faster than RC4 even
43 * without a fancy SIMD implementation (which is good, because this isn't one
44 * of those); a vectorized implementation acting on multiple blocks at a time
45 * would be even faster.
46 *
47 * Salsa20/8 has a number of other attractive features, such as being
48 * trivially seekable, but we don't need those here and the necessary
49 * machinery is not implemented.
50 */
51
52#include <assert.h>
53#include <string.h>
54
55#include "salsa208.h"
56
57static inline uint32_t ld16(const void *p) {
58 const unsigned char *q = p;
59 return ((uint32_t)q[0] << 0) | ((uint32_t)q[1] << 8);
60}
61
62static inline uint32_t ld32(const void *p) {
63 const unsigned char *q = p;
64 return ((uint32_t)q[0] << 0) | ((uint32_t)q[1] << 8) |
65 ((uint32_t)q[2] << 16) | ((uint32_t)q[3] << 24);
66}
67
68static inline void st32(void *p, uint32_t x) {
69 unsigned char *q = p;
70 q[0] = (x >> 0)&0xff; q[1] = (x >> 8)&0xff;
71 q[2] = (x >> 16)&0xff; q[3] = (x >> 24)&0xff;
72}
73
74static inline uint32_t rol32(uint32_t x, unsigned n)
75 { return (x << n) | (x >> (32 - n)); }
76
77static inline void quarterround(uint32_t m[16], int a, int b, int c, int d) {
78 m[b] ^= rol32(m[a] + m[d], 7); m[c] ^= rol32(m[b] + m[a], 9);
79 m[d] ^= rol32(m[c] + m[b], 13); m[a] ^= rol32(m[d] + m[c], 18);
80}
81
82static void core(salsa208_context *context) {
83 unsigned i;
84 uint32_t t[16];
85
86 /* Copy the state. */
87 for(i = 0; i < 16; i++) t[i] = context->m[i];
88
89 /* Hack on the state. */
90 for(i = 0; i < 4; i++) {
91
92 /* Vertical quarter-rounds. */
93 quarterround(t, 0, 4, 8, 12);
94 quarterround(t, 5, 9, 13, 1);
95 quarterround(t, 10, 14, 2, 6);
96 quarterround(t, 15, 3, 7, 11);
97
98 /* Horizontal quarter-rounds. */
99 quarterround(t, 0, 1, 2, 3);
100 quarterround(t, 5, 6, 7, 4);
101 quarterround(t, 10, 11, 8, 9);
102 quarterround(t, 15, 12, 13, 14);
103 }
104
105 /* Final feedforward. */
106 for(i = 0; i < 16; i++) t[i] += context->m[i];
107
108 /* Output. */
109 for(i = 0; i < 16; i++) st32(context->buf + 4*i, t[i]);
110}
111
112static inline void xorbuf(void *z, const void *x, const void *y, size_t sz) {
113 unsigned char *zz = z;
114 const unsigned char *xx = x, *yy = y;
115
116 if(!xx) memcpy(zz, yy, sz);
117 else while(sz--) *zz++ = *xx++ ^ *yy++;
118}
119
120static inline void step(salsa208_context *context)
121 { if(!++context->m[8]) context->m[9]++; }
122
123/** @brief Encrypt or decrypt data using Salsa20/8.
124 *
125 * @param context The Salsa20/8 context, initialized using salsa208_setkey().
126 * @param inbuf Pointer to input buffer
127 * @param outbuf Pointer to output buffer
128 * @param length Common size of both buffers
129 *
130 * Encrypt or decrypt (the operations are the same) @p length bytes of input
131 * data, writing the result, of the same length, to @p outbuf. The input and
132 * output pointers may be equal; the two buffers may not otherwise overlap.
133 *
134 * If @p inbuf is null, then simply write the next @p length bytes of
135 * Salsa20/8 output to @p outbuf.
136 */
137void salsa208_stream(salsa208_context *context,
138 const void *inbuf, void *outbuf, size_t length) {
139 size_t left = 64 - context->i;
140 unsigned char *z = outbuf;
141 const unsigned char *x = inbuf;
142
143 /* If we can satisfy the request from our buffer then we should do that. */
144 if(length <= left) {
145 xorbuf(z, x, context->buf + context->i, length);
146 context->i += length;
147 return;
148 }
149
150 /* Drain the buffer of what we currently have and cycle the state. */
151 xorbuf(z, x, context->buf + context->i, left);
152 length -= left; z += left; if(x) x += left;
153 core(context); step(context);
154
155 /* Take multiple complete blocks directly. */
156 while(length > 64) {
157 xorbuf(z, x, context->buf, 64);
158 length -= 64; z += 64; if(x) x += 64;
159 core(context); step(context);
160 }
161
162 /* And the final tail end. */
163 xorbuf(z, x, context->buf, length);
164 context->i = length;
165}
166
167/** @brief Initialize a Salsa20/8 context.
168 *
169 * @param context The Salsa20/8 context to initialize
170 * @param key A pointer to the key material
171 * @param keylen The length of the key data, in bytes (must be 10, 16, or 32)
172 *
173 * The context is implicitly initialized with a zero nonce, which is fine if
174 * the key will be used only for a single message. Otherwise, a fresh nonce
175 * should be chosen somehow and set using salsa208_setnonce().
176 */
177void salsa208_setkey(salsa208_context *context,
178 const void *key, size_t keylen) {
179 const unsigned char *k = key;
180 switch(keylen) {
181 case 32:
182 context->m[ 0] = 0x61707865;
183 context->m[ 1] = ld32(k + 0);
184 context->m[ 2] = ld32(k + 4);
185 context->m[ 3] = ld32(k + 8);
186 context->m[ 4] = ld32(k + 12);
187 context->m[ 5] = 0x3320646e;
188 context->m[ 6] = 0;
189 context->m[ 7] = 0;
190 context->m[ 8] = 0;
191 context->m[ 9] = 0;
192 context->m[10] = 0x79622d32;
193 context->m[11] = ld32(k + 16);
194 context->m[12] = ld32(k + 20);
195 context->m[13] = ld32(k + 24);
196 context->m[14] = ld32(k + 28);
197 context->m[15] = 0x6b206574;
198 break;
199 case 16:
200 context->m[ 0] = 0x61707865;
201 context->m[ 1] = ld32(k + 0);
202 context->m[ 2] = ld32(k + 4);
203 context->m[ 3] = ld32(k + 8);
204 context->m[ 4] = ld32(k + 12);
205 context->m[ 5] = 0x3120646e;
206 context->m[ 6] = 0;
207 context->m[ 7] = 0;
208 context->m[ 8] = 0;
209 context->m[ 9] = 0;
210 context->m[10] = 0x79622d36;
211 context->m[11] = context->m[1];
212 context->m[12] = context->m[2];
213 context->m[13] = context->m[3];
214 context->m[14] = context->m[4];
215 context->m[15] = 0x6b206574;
216 break;
217 case 10:
218 context->m[ 0] = 0x61707865;
219 context->m[ 1] = ld32(k + 0);
220 context->m[ 2] = ld32(k + 4);
221 context->m[ 3] = ld16(k + 8);
222 context->m[ 4] = 0;
223 context->m[ 5] = 0x3120646e;
224 context->m[ 6] = 0;
225 context->m[ 7] = 0;
226 context->m[ 8] = 0;
227 context->m[ 9] = 0;
228 context->m[10] = 0x79622d30;
229 context->m[11] = context->m[1];
230 context->m[12] = context->m[2];
231 context->m[13] = context->m[3];
232 context->m[14] = 0;
233 context->m[15] = 0x6b206574;
234 break;
235 default:
236 assert(!"bad Salsa20 key length");
237 }
238 context->i = 64;
239}
240
241/** @brief Set the Salsa20/8 nonce.
242 *
243 * @param context The Salsa20/8 context
244 * @param nonce A pointer to the nonce
245 * @param noncelen The length of the nonce data, in bytes (must be exactly 8)
246 *
247 * The context is automatically rewound to the start of the stream
248 * corresponding to this nonce.
249 */
250void salsa208_setnonce(salsa208_context *context,
251 const void *nonce, size_t noncelen) {
252 const unsigned char *n = nonce;
253 assert(noncelen == 8);
254 context->m[6] = ld32(n + 0);
255 context->m[7] = ld32(n + 4);
256 context->m[8] = context->m[9] = 0;
257 context->i = 64;
258}