chiark / gitweb /
changelog: work on documentation of changes since ea31544cc33a
[secnet.git] / site.c
1 /* site.c - manage communication with a remote network site */
2
3 /*
4  * This file is part of secnet.
5  * See README for full list of copyright holders.
6  *
7  * secnet is free software; you can redistribute it and/or modify it
8  * under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3 of the License, or
10  * (at your option) any later version.
11  * 
12  * secnet is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License for more details.
16  * 
17  * You should have received a copy of the GNU General Public License
18  * version 3 along with secnet; if not, see
19  * https://www.gnu.org/licenses/gpl.html.
20  */
21
22 /* The 'site' code doesn't know anything about the structure of the
23    packets it's transmitting.  In fact, under the new netlink
24    configuration scheme it doesn't need to know anything at all about
25    IP addresses, except how to contact its peer.  This means it could
26    potentially be used to tunnel other protocols too (IPv6, IPX, plain
27    old Ethernet frames) if appropriate netlink code can be written
28    (and that ought not to be too hard, eg. using the TUN/TAP device to
29    pretend to be an Ethernet interface).  */
30
31 /* At some point in the future the netlink code will be asked for
32    configuration information to go in the PING/PONG packets at the end
33    of the key exchange. */
34
35 #include "secnet.h"
36 #include <stdio.h>
37 #include <string.h>
38 #include <limits.h>
39 #include <assert.h>
40 #include <sys/socket.h>
41
42 #include <sys/mman.h>
43 #include "util.h"
44 #include "unaligned.h"
45 #include "magic.h"
46 #include "pubkeys.h"
47
48 #define SETUP_BUFFER_LEN 2048
49
50 #define DEFAULT_KEY_LIFETIME                  (3600*1000) /* [ms] */
51 #define DEFAULT_KEY_RENEGOTIATE_GAP           (5*60*1000) /* [ms] */
52 #define DEFAULT_SETUP_RETRIES 5
53 #define DEFAULT_SETUP_RETRY_INTERVAL             (2*1000) /* [ms] */
54 #define DEFAULT_WAIT_TIME                       (20*1000) /* [ms] */
55
56 #define DEFAULT_MOBILE_KEY_LIFETIME      (2*24*3600*1000) /* [ms] */
57 #define DEFAULT_MOBILE_KEY_RENEGOTIATE_GAP (12*3600*1000) /* [ms] */
58 #define DEFAULT_MOBILE_SETUP_RETRIES 30
59 #define DEFAULT_MOBILE_SETUP_RETRY_INTERVAL      (1*1000) /* [ms] */
60 #define DEFAULT_MOBILE_WAIT_TIME                (10*1000) /* [ms] */
61
62 #define DEFAULT_MOBILE_PEER_EXPIRY            (2*60)      /* [s] */
63
64 #define PEERKEYS_SUFFIX_MAXLEN (sizeof("~incoming")-1)
65
66 /* Each site can be in one of several possible states. */
67
68 /* States:
69    SITE_STOP         - nothing is allowed to happen; tunnel is down;
70                        all session keys have been erased
71      -> SITE_RUN upon external instruction
72    SITE_RUN          - site up, maybe with valid key
73      -> SITE_RESOLVE upon outgoing packet and no valid key
74          we start name resolution for the other end of the tunnel
75      -> SITE_SENTMSG2 upon valid incoming message 1 and suitable time
76          we send an appropriate message 2
77    SITE_RESOLVE      - waiting for name resolution
78      -> SITE_SENTMSG1 upon successful resolution
79          we send an appropriate message 1
80      -> SITE_SENTMSG2 upon valid incoming message 1 (then abort resolution)
81          we abort resolution and 
82      -> SITE_WAIT on timeout or resolution failure
83    SITE_SENTMSG1
84      -> SITE_SENTMSG2 upon valid incoming message 1 from higher priority end
85      -> SITE_SENTMSG3 upon valid incoming message 2
86      -> SITE_WAIT on timeout
87    SITE_SENTMSG2
88      -> SITE_SENTMSG4 upon valid incoming message 3
89      -> SITE_WAIT on timeout
90    SITE_SENTMSG3
91      -> SITE_SENTMSG5 upon valid incoming message 4
92      -> SITE_WAIT on timeout
93    SITE_SENTMSG4
94      -> SITE_RUN upon valid incoming message 5
95      -> SITE_WAIT on timeout
96    SITE_SENTMSG5
97      -> SITE_RUN upon valid incoming message 6
98      -> SITE_WAIT on timeout
99    SITE_WAIT         - failed to establish key; do nothing for a while
100      -> SITE_RUN on timeout
101    */
102
103 #define SITE_STOP     0
104 #define SITE_RUN      1
105 #define SITE_RESOLVE  2
106 #define SITE_SENTMSG1 3
107 #define SITE_SENTMSG2 4
108 #define SITE_SENTMSG3 5
109 #define SITE_SENTMSG4 6
110 #define SITE_SENTMSG5 7
111 #define SITE_WAIT     8
112
113 #define CASES_MSG3_KNOWN LABEL_MSG3: case LABEL_MSG3BIS
114
115 struct msg;
116
117 int32_t site_max_start_pad = 4*4;
118
119 static cstring_t state_name(uint32_t state)
120 {
121     switch (state) {
122     case 0: return "STOP";
123     case 1: return "RUN";
124     case 2: return "RESOLVE";
125     case 3: return "SENTMSG1";
126     case 4: return "SENTMSG2";
127     case 5: return "SENTMSG3";
128     case 6: return "SENTMSG4";
129     case 7: return "SENTMSG5";
130     case 8: return "WAIT";
131     default: return "*bad state*";
132     }
133 }
134
135 #define NONCELEN 8
136
137 #define LOG_UNEXPECTED    0x00000001
138 #define LOG_SETUP_INIT    0x00000002
139 #define LOG_SETUP_TIMEOUT 0x00000004
140 #define LOG_ACTIVATE_KEY  0x00000008
141 #define LOG_TIMEOUT_KEY   0x00000010
142 #define LOG_SEC           0x00000020
143 #define LOG_STATE         0x00000040
144 #define LOG_DROP          0x00000080
145 #define LOG_DUMP          0x00000100
146 #define LOG_ERROR         0x00000400
147 #define LOG_PEER_ADDRS    0x00000800
148 #define LOG_SIGKEYS       0x00001000
149
150 static struct flagstr log_event_table[]={
151     { "unexpected", LOG_UNEXPECTED },
152     { "setup-init", LOG_SETUP_INIT },
153     { "setup-timeout", LOG_SETUP_TIMEOUT },
154     { "activate-key", LOG_ACTIVATE_KEY },
155     { "timeout-key", LOG_TIMEOUT_KEY },
156     { "security", LOG_SEC },
157     { "state-change", LOG_STATE },
158     { "packet-drop", LOG_DROP },
159     { "dump-packets", LOG_DUMP },
160     { "errors", LOG_ERROR },
161     { "peer-addrs", LOG_PEER_ADDRS },
162     { "sigkeys", LOG_SIGKEYS },
163     { "default", LOG_SETUP_INIT|LOG_SETUP_TIMEOUT|
164       LOG_ACTIVATE_KEY|LOG_TIMEOUT_KEY|LOG_SEC|LOG_ERROR|LOG_SIGKEYS },
165     { "all", 0xffffffff },
166     { NULL, 0 }
167 };
168
169
170 /***** TRANSPORT PEERS declarations *****/
171
172 /* Details of "mobile peer" semantics:
173
174    - We use the same data structure for the different configurations,
175      but manage it with different algorithms.
176    
177    - We record up to mobile_peers_max peer address/port numbers
178      ("peers") for key setup, and separately up to mobile_peers_max
179      for data transfer.
180
181    - In general, we make a new set of addrs (see below) when we start
182      a new key exchange; the key setup addrs become the data transport
183      addrs when key setup complets.
184
185    If our peer is mobile:
186
187    - We send to all recent addresses of incoming packets, plus
188      initially all configured addresses (which we also expire).
189
190    - So, we record addrs of good incoming packets, as follows:
191       1. expire any peers last seen >120s ("mobile-peer-expiry") ago
192       2. add the peer of the just received packet to the applicable list
193          (possibly evicting the oldest entries to make room)
194      NB that we do not expire peers until an incoming packet arrives.
195
196    - If the peer has a configured address or name, we record them the
197      same way, but only as a result of our own initiation of key
198      setup.  (We might evict some incoming packet addrs to make room.)
199
200    - The default number of addrs to keep is 3, or 4 if we have a
201      configured name or address.  That's space for two configured
202      addresses (one IPv6 and one IPv4), plus two received addresses.
203
204    - Outgoing packets are sent to every recorded address in the
205      applicable list.  Any unsupported[1] addresses are deleted from
206      the list right away.  (This should only happen to configured
207      addresses, of course, but there is no need to check that.)
208
209    - When we successfully complete a key setup, we merge the key setup
210      peers into the data transfer peers.
211
212    [1] An unsupported address is one for whose AF we don't have a
213      socket (perhaps because we got EAFNOSUPPORT or some such) or for
214      which sendto gives ENETUNREACH.
215
216    If neither end is mobile:
217
218    - When peer initiated the key exchange, we use the incoming packet
219      address.
220
221    - When we initiate the key exchange, we try configured addresses
222      until we get one which isn't unsupported then fixate on that.
223
224    - When we complete a key setup, we replace the data transport peers
225      with those from the key setup.
226
227    If we are mobile:
228
229    - We can't tell when local network setup changes so we can't cache
230      the unsupported addrs and completely remove the spurious calls to
231      sendto, but we can optimise things a bit by deprioritising addrs
232      which seem to be unsupported.
233
234    - Use only configured addresses.  (Except, that if our peer
235      initiated a key exchange we use the incoming packet address until
236      our name resolution completes.)
237
238    - When we send a packet, try each address in turn; if addr
239      supported, put that address to the end of the list for future
240      packets, and go onto the next address.
241
242    - When we complete a key setup, we replace the data transport peers
243      with those from the key setup.
244
245    */
246
247 typedef struct {
248     struct timeval last;
249     struct comm_addr addr;
250 } transport_peer;
251
252 typedef struct {
253 /* configuration information */
254 /* runtime information */
255     int npeers;
256     transport_peer peers[MAX_PEER_ADDRS];
257 } transport_peers;
258
259 /* Basic operations on transport peer address sets */
260 static void transport_peers_clear(struct site *st, transport_peers *peers);
261 static int transport_peers_valid(transport_peers *peers);
262 static void transport_peers_copy(struct site *st, transport_peers *dst,
263                                  const transport_peers *src);
264
265 /* Record address of incoming setup packet; resp. data packet. */
266 static void transport_setup_msgok(struct site *st, const struct comm_addr *a);
267 static void transport_data_msgok(struct site *st, const struct comm_addr *a);
268
269 /* Initialise the setup addresses.  Called before we send the first
270  * packet in a key exchange.  If we are the initiator, as a result of
271  * resolve completing (or being determined not to be relevant) or an
272  * incoming PROD; if we are the responder, as a result of the MSG1. */
273 static bool_t transport_compute_setupinit_peers(struct site *st,
274         const struct comm_addr *configured_addrs /* 0 if none or not found */,
275         int n_configured_addrs /* 0 if none or not found */,
276         const struct comm_addr *incoming_packet_addr /* 0 if none */);
277
278 /* Called if we are the responder in a key setup, when the resolve
279  * completes.  transport_compute_setupinit_peers will hvae been called
280  * earlier.  If _complete is called, we are still doing the key setup
281  * (and we should use the new values for both the rest of the key
282  * setup and the ongoing data exchange); if _tardy is called, the key
283  * setup is done (either completed or not) and only the data peers are
284  * relevant */
285 static void transport_resolve_complete(struct site *st,
286         const struct comm_addr *addrs, int naddrs);
287 static void transport_resolve_complete_tardy(struct site *st,
288         const struct comm_addr *addrs, int naddrs);
289
290 static void transport_xmit(struct site *st, transport_peers *peers,
291                            struct buffer_if *buf, bool_t candebug);
292
293  /***** END of transport peers declarations *****/
294
295
296 struct data_key {
297     struct transform_inst_if *transform;
298     uint64_t key_timeout; /* End of life of current key */
299     uint32_t remote_session_id;
300 };
301
302 struct site {
303     closure_t cl;
304     struct site_if ops;
305 /* configuration information */
306     string_t localname;
307     string_t remotename;
308     bool_t keepalive;
309     bool_t local_mobile, peer_mobile; /* Mobile client support */
310     int32_t transport_peers_max;
311     string_t tunname; /* localname<->remotename by default, used in logs */
312     cstring_t *addresses; /* DNS name or address(es) for bootstrapping, optional */
313     int remoteport; /* Port for bootstrapping, optional */
314     uint32_t mtu_target;
315     struct netlink_if *netlink;
316     struct comm_if **comms;
317     struct comm_clientinfo **commclientinfos;
318     int ncomms;
319     struct resolver_if *resolver;
320     struct log_if *log;
321     struct random_if *random;
322     struct privcache_if *privkeys;
323     struct sigprivkey_if *privkey_fixed;
324     struct transform_if **transforms;
325     int ntransforms;
326     struct dh_if *dh;
327
328     uint32_t index; /* Index of this site */
329     uint32_t early_capabilities;
330     uint32_t local_capabilities;
331     int32_t setup_retries; /* How many times to send setup packets */
332     int32_t setup_retry_interval; /* Initial timeout for setup packets */
333     int32_t wait_timeout_mean; /* How long to wait if setup unsuccessful */
334     int32_t mobile_peer_expiry; /* How long to remember 2ary addresses */
335     int32_t key_lifetime; /* How long a key lasts once set up */
336     int32_t key_renegotiate_time; /* If we see traffic (or a keepalive)
337                                       after this time, initiate a new
338                                       key exchange */
339
340     bool_t our_name_later; /* our name > peer name */
341     uint32_t log_events;
342
343 /* runtime information */
344     uint32_t state;
345     uint64_t now; /* Most recently seen time */
346     bool_t allow_send_prod;
347     bool_t msg1_crossed_logged;
348     int resolving_count;
349     int resolving_n_results_all;
350     int resolving_n_results_stored;
351     struct comm_addr resolving_results[MAX_PEER_ADDRS];
352     const char *peerkeys_path;
353     struct pathprefix_template peerkeys_tmpl;
354     struct peer_keyset *peerkeys_current, *peerkeys_kex;
355
356     /* The currently established session */
357     struct data_key current;
358     struct data_key auxiliary_key;
359     bool_t auxiliary_is_new;
360     uint64_t renegotiate_key_time; /* When we can negotiate a new key */
361     uint64_t auxiliary_renegotiate_key_time;
362     transport_peers peers; /* Current address(es) of peer for data traffic */
363
364     /* The current key setup protocol exchange.  We can only be
365        involved in one of these at a time.  There's a potential for
366        denial of service here (the attacker keeps sending a setup
367        packet; we keep trying to continue the exchange, and have to
368        timeout before we can listen for another setup packet); perhaps
369        we should keep a list of 'bad' sources for setup packets. */
370     uint32_t remote_capabilities;
371     uint16_t remote_adv_mtu;
372     struct transform_if *chosen_transform;
373     uint32_t setup_session_id;
374     transport_peers setup_peers;
375     uint8_t localN[NONCELEN]; /* Nonces for key exchange */
376     uint8_t remoteN[NONCELEN];
377     struct buffer_if buffer; /* Current outgoing key exchange packet */
378     struct buffer_if scratch;
379     int32_t retries; /* Number of retries remaining */
380     uint64_t timeout; /* Timeout for current state */
381     uint8_t *dhsecret;
382     uint8_t *sharedsecret;
383     uint32_t sharedsecretlen, sharedsecretallocd;
384     struct transform_inst_if *new_transform; /* For key setup/verify */
385 };
386
387 static uint32_t event_log_priority(struct site *st, uint32_t event)
388 {
389     if (!(event&st->log_events))
390         return 0;
391     switch(event) {
392     case LOG_UNEXPECTED:    return M_INFO;
393     case LOG_SETUP_INIT:    return M_INFO;
394     case LOG_SETUP_TIMEOUT: return M_NOTICE;
395     case LOG_ACTIVATE_KEY:  return M_INFO;
396     case LOG_TIMEOUT_KEY:   return M_INFO;
397     case LOG_SEC:           return M_SECURITY;
398     case LOG_STATE:         return M_DEBUG;
399     case LOG_DROP:          return M_DEBUG;
400     case LOG_DUMP:          return M_DEBUG;
401     case LOG_ERROR:         return M_ERR;
402     case LOG_PEER_ADDRS:    return M_DEBUG;
403     case LOG_SIGKEYS:       return M_INFO;
404     default:                return M_ERR;
405     }
406 }
407
408 static uint32_t slog_start(struct site *st, uint32_t event)
409 {
410     uint32_t class=event_log_priority(st, event);
411     if (class) {
412         slilog_part(st->log,class,"%s: ",st->tunname);
413     }
414     return class;
415 }
416
417 static void vslog(struct site *st, uint32_t event, cstring_t msg, va_list ap)
418 FORMAT(printf,3,0);
419 static void vslog(struct site *st, uint32_t event, cstring_t msg, va_list ap)
420 {
421     uint32_t class;
422
423     class=slog_start(st,event);
424     if (class) {
425         vslilog_part(st->log,class,msg,ap);
426         slilog_part(st->log,class,"\n");
427     }
428 }
429
430 static void slog(struct site *st, uint32_t event, cstring_t msg, ...)
431 FORMAT(printf,3,4);
432 static void slog(struct site *st, uint32_t event, cstring_t msg, ...)
433 {
434     va_list ap;
435     va_start(ap,msg);
436     vslog(st,event,msg,ap);
437     va_end(ap);
438 }
439
440 static void logtimeout(struct site *st, const char *fmt, ...)
441 FORMAT(printf,2,3);
442 static void logtimeout(struct site *st, const char *fmt, ...)
443 {
444     uint32_t class=event_log_priority(st,LOG_SETUP_TIMEOUT);
445     if (!class)
446         return;
447
448     va_list ap;
449     va_start(ap,fmt);
450
451     slilog_part(st->log,class,"%s: ",st->tunname);
452     vslilog_part(st->log,class,fmt,ap);
453
454     const char *delim;
455     int i;
456     for (i=0, delim=" (tried ";
457          i<st->setup_peers.npeers;
458          i++, delim=", ") {
459         transport_peer *peer=&st->setup_peers.peers[i];
460         const char *s=comm_addr_to_string(&peer->addr);
461         slilog_part(st->log,class,"%s%s",delim,s);
462     }
463
464     slilog_part(st->log,class,")\n");
465     va_end(ap);
466 }
467
468 static void set_link_quality(struct site *st);
469 static void delete_keys(struct site *st, cstring_t reason, uint32_t loglevel);
470 static void delete_one_key(struct site *st, struct data_key *key,
471                            const char *reason /* may be 0 meaning don't log*/,
472                            const char *which /* ignored if !reasonn */,
473                            uint32_t loglevel /* ignored if !reasonn */);
474 static bool_t initiate_key_setup(struct site *st, cstring_t reason,
475                                  const struct comm_addr *prod_hint);
476 static void enter_state_run(struct site *st);
477 static bool_t enter_state_resolve(struct site *st);
478 static void decrement_resolving_count(struct site *st, int by);
479 static bool_t enter_new_state(struct site *st,uint32_t next,
480                               const struct msg *prompt
481                               /* may be 0 for SENTMSG1 */);
482 static void enter_state_wait(struct site *st);
483 static void activate_new_key(struct site *st);
484
485 static bool_t is_transform_valid(struct transform_inst_if *transform)
486 {
487     return transform && transform->valid(transform->st);
488 }
489
490 static bool_t current_valid(struct site *st)
491 {
492     return is_transform_valid(st->current.transform);
493 }
494
495 #define DEFINE_CALL_TRANSFORM(fwdrev)                                   \
496 static transform_apply_return                                           \
497 call_transform_##fwdrev(struct site *st,                                \
498                                    struct transform_inst_if *transform, \
499                                    struct buffer_if *buf,               \
500                                    const char **errmsg)                 \
501 {                                                                       \
502     if (!is_transform_valid(transform)) {                               \
503         *errmsg="transform not set up";                                 \
504         return transform_apply_err;                                     \
505     }                                                                   \
506     return transform->fwdrev(transform->st,buf,errmsg);                 \
507 }
508
509 DEFINE_CALL_TRANSFORM(forwards)
510 DEFINE_CALL_TRANSFORM(reverse)
511
512 static void dispose_transform(struct transform_inst_if **transform_var)
513 {
514     struct transform_inst_if *transform=*transform_var;
515     if (transform) {
516         transform->delkey(transform->st);
517         transform->destroy(transform->st);
518     }
519     *transform_var = 0;
520 }    
521
522 #define CHECK_AVAIL(b,l) do { if ((b)->size<(l)) return False; } while(0)
523 #define CHECK_EMPTY(b) do { if ((b)->size!=0) return False; } while(0)
524 #define CHECK_TYPE(b,t) do { uint32_t type; \
525     CHECK_AVAIL((b),4); \
526     type=buf_unprepend_uint32((b)); \
527     if (type!=(t)) return False; } while(0)
528
529 static _Bool type_is_msg23(uint32_t type)
530 {
531     switch (type) {
532         case LABEL_MSG2: case CASES_MSG3_KNOWN: return True;
533         default: return False;
534     }
535 }
536 static _Bool type_is_msg34(uint32_t type)
537 {
538     switch (type) {
539         case CASES_MSG3_KNOWN: case LABEL_MSG4: return True;
540         default: return False;
541     }
542 }
543
544 struct parsedname {
545     int32_t len;
546     uint8_t *name;
547     struct buffer_if extrainfo;
548 };
549
550 struct msg {
551     uint8_t *hashstart;
552     uint32_t dest;
553     uint32_t source;
554     struct parsedname remote;
555     struct parsedname local;
556     uint32_t remote_capabilities;
557     uint16_t remote_mtu;
558     int capab_transformnum;
559     uint8_t *nR;
560     uint8_t *nL;
561     int32_t pklen;
562     char *pk;
563     int32_t hashlen;
564     struct alg_msg_data sig;
565     int n_pubkeys_accepted_nom; /* may be > MAX_SIG_KEYS ! */
566     const struct sigkeyid *pubkeys_accepted[MAX_SIG_KEYS];
567     int signing_key_index;
568 };
569
570 static const struct sigkeyid keyid_zero;
571
572 static int32_t wait_timeout(struct site *st) {
573     int32_t t = st->wait_timeout_mean;
574     int8_t factor;
575     if (t < INT_MAX/2) {
576         st->random->generate(st->random->st,sizeof(factor),&factor);
577         t += (t / 256) * factor;
578     }
579     return t;
580 }
581
582 static _Bool set_new_transform(struct site *st, char *pk)
583 {
584     _Bool ok;
585
586     /* Make room for the shared key */
587     st->sharedsecretlen=st->chosen_transform->keylen?:st->dh->ceil_len;
588     assert(st->sharedsecretlen);
589     if (st->sharedsecretlen > st->sharedsecretallocd) {
590         st->sharedsecretallocd=st->sharedsecretlen;
591         st->sharedsecret=safe_realloc_ary(st->sharedsecret,1,
592                                           st->sharedsecretallocd,
593                                           "site:sharedsecret");
594     }
595
596     /* Generate the shared key */
597     st->dh->makeshared(st->dh->st,st->dhsecret,st->dh->len,pk,
598                        st->sharedsecret,st->sharedsecretlen);
599
600     /* Set up the transform */
601     struct transform_if *generator=st->chosen_transform;
602     struct transform_inst_if *generated=generator->create(generator->st);
603     ok = generated->setkey(generated->st,st->sharedsecret,
604                            st->sharedsecretlen,st->our_name_later);
605
606     dispose_transform(&st->new_transform);
607     if (!ok) return False;
608     st->new_transform=generated;
609
610     slog(st,LOG_SETUP_INIT,"key exchange negotiated transform"
611          " %d (capabilities ours=%#"PRIx32" theirs=%#"PRIx32")",
612          st->chosen_transform->capab_bit,
613          st->local_capabilities, st->remote_capabilities);
614     return True;
615 }
616
617 struct xinfoadd {
618     int32_t lenpos, afternul;
619 };
620 static void append_string_xinfo_start(struct buffer_if *buf,
621                                       struct xinfoadd *xia,
622                                       const char *str)
623     /* Helps construct one of the names with additional info as found
624      * in MSG1..4.  Call this function first, then append all the
625      * desired extra info (not including the nul byte) to the buffer,
626      * then call append_string_xinfo_done. */
627 {
628     xia->lenpos = buf->size;
629     buf_append_string(buf,str);
630     buf_append_uint8(buf,0);
631     xia->afternul = buf->size;
632 }
633 static void append_string_xinfo_done(struct buffer_if *buf,
634                                      struct xinfoadd *xia)
635 {
636     /* we just need to adjust the string length */
637     if (buf->size == xia->afternul) {
638         /* no extra info, strip the nul too */
639         buf_unappend_uint8(buf);
640     } else {
641         put_uint16(buf->start+xia->lenpos, buf->size-(xia->lenpos+2));
642     }
643 }
644
645 /* Build any of msg1 to msg4. msg5 and msg6 are built from the inside
646    out using a transform of config data supplied by netlink */
647 static bool_t generate_msg(struct site *st, uint32_t type, cstring_t what,
648                            const struct msg *prompt
649                            /* may be 0 for MSG1 */)
650 {
651     string_t dhpub;
652     unsigned minor;
653     int ki;
654
655     st->retries=st->setup_retries;
656     BUF_ALLOC(&st->buffer,what);
657     buffer_init(&st->buffer,0);
658     buf_append_uint32(&st->buffer,
659         (type==LABEL_MSG1?0:st->setup_session_id));
660     buf_append_uint32(&st->buffer,st->index);
661     buf_append_uint32(&st->buffer,type);
662
663     struct xinfoadd xia;
664     append_string_xinfo_start(&st->buffer,&xia,st->localname);
665     buf_append_uint32(&st->buffer,st->local_capabilities);
666     if (type_is_msg34(type)) {
667         buf_append_uint16(&st->buffer,st->mtu_target);
668     }
669     if (type_is_msg23(type)) {
670         buf_append_uint8(&st->buffer,st->peerkeys_kex->nkeys);
671         for (ki=0; ki<st->peerkeys_kex->nkeys; ki++) {
672             struct peer_pubkey *pk = &st->peerkeys_kex->keys[ki];
673             BUF_ADD_OBJ(append,&st->buffer,pk->id);
674         }
675     }
676     struct sigprivkey_if *privkey=0;
677     if (type_is_msg34(type)) {
678         assert(prompt->n_pubkeys_accepted_nom>0);
679         for (ki=0;
680              ki<prompt->n_pubkeys_accepted_nom && ki<MAX_SIG_KEYS;
681              ki++) {
682             const struct sigkeyid *kid=prompt->pubkeys_accepted[ki];
683             if (st->privkeys) {
684                 privkey=st->privkeys->lookup(st->privkeys->st,kid,st->log);
685                 if (privkey) goto privkey_found;
686             } else {
687                 if (sigkeyid_equal(&keyid_zero,kid)) {
688                     privkey=st->privkey_fixed;
689                     goto privkey_found;
690                 }
691             }
692         }
693         uint32_t class = slog_start(st,LOG_ERROR);
694         if (class) {
695             slilog_part(st->log,class,"no suitable private key, peer wanted");
696             for (ki=0;
697                  ki<prompt->n_pubkeys_accepted_nom && ki<MAX_SIG_KEYS;
698                  ki++) {
699                 slilog_part(st->log,class, " " SIGKEYID_PR_FMT,
700                             SIGKEYID_PR_VAL(prompt->pubkeys_accepted[ki]));
701             }
702             if (prompt->n_pubkeys_accepted_nom > MAX_SIG_KEYS)
703                 slilog_part(st->log,class," +%d",
704                             prompt->n_pubkeys_accepted_nom - MAX_SIG_KEYS);
705             slilog_part(st->log,class,"\n");
706         }
707         return False;
708
709     privkey_found:
710         slog(st,LOG_SIGKEYS,"using private key #%d " SIGKEYID_PR_FMT,
711              ki, SIGKEYID_PR_VAL(prompt->pubkeys_accepted[ki]));
712         buf_append_uint8(&st->buffer,ki);
713     }
714
715     append_string_xinfo_done(&st->buffer,&xia);
716
717     buf_append_string(&st->buffer,st->remotename);
718     BUF_ADD_OBJ(append,&st->buffer,st->localN);
719     if (type==LABEL_MSG1) return True;
720     BUF_ADD_OBJ(append,&st->buffer,st->remoteN);
721     if (type==LABEL_MSG2) return True;
722
723     if (hacky_par_mid_failnow()) return False;
724
725     if (MSGMAJOR(type) == 3) do {
726         minor = MSGMINOR(type);
727         if (minor < 1) break;
728         buf_append_uint8(&st->buffer,st->chosen_transform->capab_bit);
729     } while (0);
730
731     dhpub=st->dh->makepublic(st->dh->st,st->dhsecret,st->dh->len);
732     buf_append_string(&st->buffer,dhpub);
733     free(dhpub);
734
735     bool_t ok=privkey->sign(privkey->st,
736                             st->buffer.start,
737                             st->buffer.size,
738                             &st->buffer);
739     if (!ok) goto fail;
740     return True;
741
742  fail:
743     return False;
744 }
745
746 static bool_t unpick_name(struct buffer_if *msg, struct parsedname *nm)
747 {
748     CHECK_AVAIL(msg,2);
749     nm->len=buf_unprepend_uint16(msg);
750     CHECK_AVAIL(msg,nm->len);
751     nm->name=buf_unprepend(msg,nm->len);
752     uint8_t *nul=memchr(nm->name,0,nm->len);
753     if (!nul) {
754         buffer_readonly_view(&nm->extrainfo,0,0);
755     } else {
756         buffer_readonly_view(&nm->extrainfo, nul+1, msg->start-(nul+1));
757         nm->len=nul-nm->name;
758     }
759     return True;
760 }
761
762 static bool_t unpick_msg(struct site *st, uint32_t type,
763                          struct buffer_if *msg, struct msg *m)
764 {
765     unsigned minor;
766
767     m->n_pubkeys_accepted_nom=-1;
768     m->capab_transformnum=-1;
769     m->signing_key_index=-1;
770     m->hashstart=msg->start;
771     CHECK_AVAIL(msg,4);
772     m->dest=buf_unprepend_uint32(msg);
773     CHECK_AVAIL(msg,4);
774     m->source=buf_unprepend_uint32(msg);
775     CHECK_TYPE(msg,type);
776     if (!unpick_name(msg,&m->remote)) return False;
777     m->remote_capabilities=0;
778     m->remote_mtu=0;
779     if (m->remote.extrainfo.size) {
780         CHECK_AVAIL(&m->remote.extrainfo,4);
781         m->remote_capabilities=buf_unprepend_uint32(&m->remote.extrainfo);
782     }
783     if (type_is_msg34(type) && m->remote.extrainfo.size) {
784         CHECK_AVAIL(&m->remote.extrainfo,2);
785         m->remote_mtu=buf_unprepend_uint16(&m->remote.extrainfo);
786     }
787     if (type_is_msg23(type) && m->remote.extrainfo.size) {
788         m->n_pubkeys_accepted_nom = buf_unprepend_uint8(&m->remote.extrainfo);
789         if (!m->n_pubkeys_accepted_nom) return False;
790         for (int ki_nom=0; ki_nom<m->n_pubkeys_accepted_nom; ki_nom++) {
791             CHECK_AVAIL(&m->remote.extrainfo,KEYIDSZ);
792             struct sigkeyid *kid = buf_unprepend(&m->remote.extrainfo,KEYIDSZ);
793             if (ki_nom<MAX_SIG_KEYS) m->pubkeys_accepted[ki_nom] = kid;
794         }
795     } else {
796         m->n_pubkeys_accepted_nom = 1;
797         m->pubkeys_accepted[0] = &keyid_zero;
798     }
799     if (type_is_msg34(type) && m->remote.extrainfo.size) {
800         m->signing_key_index=buf_unprepend_uint8(&m->remote.extrainfo);
801     } else {
802         m->signing_key_index=0;
803     }
804     if (!unpick_name(msg,&m->local)) return False;
805     if (type==LABEL_PROD) {
806         CHECK_EMPTY(msg);
807         return True;
808     }
809     CHECK_AVAIL(msg,NONCELEN);
810     m->nR=buf_unprepend(msg,NONCELEN);
811     if (type==LABEL_MSG1) {
812         CHECK_EMPTY(msg);
813         return True;
814     }
815     CHECK_AVAIL(msg,NONCELEN);
816     m->nL=buf_unprepend(msg,NONCELEN);
817     if (type==LABEL_MSG2) {
818         CHECK_EMPTY(msg);
819         return True;
820     }
821     if (MSGMAJOR(type) == 3) do {
822         minor = MSGMINOR(type);
823 #define MAYBE_READ_CAP(minminor, kind, dflt) do {                       \
824     if (minor < (minminor))                                             \
825         m->capab_##kind##num = (dflt);                                  \
826     else {                                                              \
827         CHECK_AVAIL(msg, 1);                                            \
828         m->capab_##kind##num = buf_unprepend_uint8(msg);                \
829     }                                                                   \
830 } while (0)
831         MAYBE_READ_CAP(1, transform, CAPAB_BIT_ANCIENTTRANSFORM);
832 #undef MAYBE_READ_CAP
833     } while (0);
834     CHECK_AVAIL(msg,2);
835     m->pklen=buf_unprepend_uint16(msg);
836     CHECK_AVAIL(msg,m->pklen);
837     m->pk=buf_unprepend(msg,m->pklen);
838     m->hashlen=msg->start-m->hashstart;
839
840     if (m->signing_key_index < 0 ||
841         m->signing_key_index >= st->peerkeys_kex->nkeys) {
842         return False;
843     }
844     struct sigpubkey_if *pubkey=
845         st->peerkeys_kex->keys[m->signing_key_index].pubkey;
846     if (!pubkey->unpick(pubkey->st,msg,&m->sig)) {
847         return False;
848     }
849
850     CHECK_EMPTY(msg);
851
852     return True;
853 }
854
855 static bool_t name_matches(const struct parsedname *nm, const char *expected)
856 {
857     int expected_len=strlen(expected);
858     return
859         nm->len == expected_len &&
860         !memcmp(nm->name, expected, expected_len);
861 }    
862
863 static bool_t check_msg(struct site *st, uint32_t type, struct msg *m,
864                         cstring_t *error)
865 {
866     if (type==LABEL_MSG1) return True;
867
868     /* Check that the site names and our nonce have been sent
869        back correctly, and then store our peer's nonce. */ 
870     if (!name_matches(&m->remote,st->remotename)) {
871         *error="wrong remote site name";
872         return False;
873     }
874     if (!name_matches(&m->local,st->localname)) {
875         *error="wrong local site name";
876         return False;
877     }
878     if (memcmp(m->nL,st->localN,NONCELEN)!=0) {
879         *error="wrong locally-generated nonce";
880         return False;
881     }
882     if (type==LABEL_MSG2) return True;
883     if (!consttime_memeq(m->nR,st->remoteN,NONCELEN)) {
884         *error="wrong remotely-generated nonce";
885         return False;
886     }
887     /* MSG3 has complicated rules about capabilities, which are
888      * handled in process_msg3. */
889     if (MSGMAJOR(type) == 3) return True;
890     if (m->remote_capabilities!=st->remote_capabilities) {
891         *error="remote capabilities changed";
892         return False;
893     }
894     if (type==LABEL_MSG4) return True;
895     *error="unknown message type";
896     return False;
897 }
898
899 static void peerkeys_maybe_incorporate(struct site *st, const char *file,
900                                        const char *whatmore,
901                                        int logcl_enoent)
902 {
903     struct peer_keyset *atsuffix=
904         keyset_load(file,&st->scratch,st->log,logcl_enoent);
905     if (!atsuffix) return;
906
907     if (st->peerkeys_current &&
908         serial_cmp(atsuffix->serial,st->peerkeys_current->serial) <= 0) {
909         slog(st,LOG_SIGKEYS,"keys from %s%s are older, discarding",
910              file,whatmore);
911         keyset_dispose(&atsuffix);
912         int r=unlink(file);
913         if (r) slog(st,LOG_ERROR,"failed to remove old key update %s: %s\n",
914                     st->peerkeys_tmpl.buffer,strerror(errno));
915         return;
916     } else {
917         slog(st,LOG_SIGKEYS,"keys from %s%s are newer, installing",
918              file,whatmore);
919         keyset_dispose(&st->peerkeys_current);
920         st->peerkeys_current=atsuffix;
921         int r=rename(file,st->peerkeys_path);
922         if (r) slog(st,LOG_ERROR,"failed to install key update %s as %s: %s\n",
923                     st->peerkeys_tmpl.buffer,st->peerkeys_path,
924                     strerror(errno));
925     }
926 }
927
928 static void peerkeys_check_for_update(struct site *st)
929 {
930     if (!st->peerkeys_path) return;
931
932     pathprefix_template_setsuffix(&st->peerkeys_tmpl,"~proc");
933     peerkeys_maybe_incorporate(st,st->peerkeys_tmpl.buffer,
934                                " (found old update)",
935                                M_DEBUG);
936
937     pathprefix_template_setsuffix(&st->peerkeys_tmpl,"~update");
938     const char *inputp=st->peerkeys_tmpl.buffer;
939     if (access(inputp,R_OK)) {
940         if (errno!=ENOENT)
941             slog(st,LOG_ERROR,"cannot access peer key update file %s\n",
942                  inputp);
943         return;
944     }
945
946     buffer_init(&st->scratch,0);
947     BUF_ADD_BYTES(append,&st->scratch,
948                   st->peerkeys_tmpl.buffer,
949                   strlen(st->peerkeys_tmpl.buffer)+1);
950     inputp=st->scratch.start;
951
952     pathprefix_template_setsuffix(&st->peerkeys_tmpl,"~proc");
953     const char *oursp=st->peerkeys_tmpl.buffer;
954
955     int r=rename(inputp,oursp);
956     if (r) {
957         slog(st,LOG_ERROR,"failed to claim key update file %s as %s: %s",
958              inputp,oursp,strerror(errno));
959         return;
960     }
961
962     peerkeys_maybe_incorporate(st,oursp," (update)",M_ERR);
963 }
964
965
966 static bool_t kex_init(struct site *st)
967 {
968     keyset_dispose(&st->peerkeys_kex);
969     peerkeys_check_for_update(st);
970     if (!st->peerkeys_current) {
971         slog(st,LOG_SETUP_INIT,"no peer public keys, abandoning key setup");
972         return False;
973     }
974     st->peerkeys_kex = keyset_dup(st->peerkeys_current);
975     st->random->generate(st->random->st,NONCELEN,st->localN);
976     return True;
977 }
978
979 static bool_t generate_msg1(struct site *st, const struct msg *prompt_maybe_0)
980 {
981     return
982         generate_msg(st,LABEL_MSG1,"site:MSG1",prompt_maybe_0);
983 }
984
985 static bool_t process_msg1(struct site *st, struct buffer_if *msg1,
986                            const struct comm_addr *src,
987                            const struct msg *m)
988 {
989     /* We've already determined we're in an appropriate state to
990        process an incoming MSG1, and that the MSG1 has correct values
991        of A and B. */
992
993     st->setup_session_id=m->source;
994     st->remote_capabilities=m->remote_capabilities;
995     memcpy(st->remoteN,m->nR,NONCELEN);
996     return True;
997 }
998
999 static bool_t generate_msg2(struct site *st,
1000                             const struct msg *prompt_may_be_null)
1001 {
1002     return
1003         generate_msg(st,LABEL_MSG2,"site:MSG2",prompt_may_be_null);
1004 }
1005
1006 static bool_t process_msg2(struct site *st, struct buffer_if *msg2,
1007                            const struct comm_addr *src,
1008                            struct msg *m /* returned */)
1009 {
1010     cstring_t err;
1011
1012     if (!unpick_msg(st,LABEL_MSG2,msg2,m)) return False;
1013     if (!check_msg(st,LABEL_MSG2,m,&err)) {
1014         slog(st,LOG_SEC,"msg2: %s",err);
1015         return False;
1016     }
1017     st->setup_session_id=m->source;
1018     st->remote_capabilities=m->remote_capabilities;
1019
1020     /* Select the transform to use */
1021
1022     uint32_t remote_crypto_caps = st->remote_capabilities & CAPAB_TRANSFORM_MASK;
1023     if (!remote_crypto_caps)
1024         /* old secnets only had this one transform */
1025         remote_crypto_caps = 1UL << CAPAB_BIT_ANCIENTTRANSFORM;
1026
1027 #define CHOOSE_CRYPTO(kind, whats) do {                                 \
1028     struct kind##_if *iface;                                            \
1029     uint32_t bit, ours = 0;                                             \
1030     int i;                                                              \
1031     for (i= 0; i < st->n##kind##s; i++) {                               \
1032         iface=st->kind##s[i];                                           \
1033         bit = 1UL << iface->capab_bit;                                  \
1034         if (bit & remote_crypto_caps) goto kind##_found;                \
1035         ours |= bit;                                                    \
1036     }                                                                   \
1037     slog(st,LOG_ERROR,"no " whats " in common"                          \
1038          " (us %#"PRIx32"; them: %#"PRIx32")",                          \
1039          st->local_capabilities & ours, remote_crypto_caps);            \
1040     return False;                                                       \
1041 kind##_found:                                                           \
1042     st->chosen_##kind = iface;                                          \
1043 } while (0)
1044
1045     CHOOSE_CRYPTO(transform, "transforms");
1046
1047 #undef CHOOSE_CRYPTO
1048
1049     memcpy(st->remoteN,m->nR,NONCELEN);
1050     return True;
1051 }
1052
1053 static bool_t generate_msg3(struct site *st, const struct msg *prompt)
1054 {
1055     /* Now we have our nonce and their nonce. Think of a secret key,
1056        and create message number 3. */
1057     st->random->generate(st->random->st,st->dh->len,st->dhsecret);
1058     return generate_msg(st,
1059                         (st->remote_capabilities & CAPAB_TRANSFORM_MASK)
1060                         ? LABEL_MSG3BIS
1061                         : LABEL_MSG3,
1062                         "site:MSG3",prompt);
1063 }
1064
1065 static bool_t process_msg3_msg4(struct site *st, struct msg *m)
1066 {
1067     /* Check signature and store g^x mod m */
1068     int ki;
1069
1070     if (m->signing_key_index >= 0) {
1071         if (m->signing_key_index >= st->peerkeys_kex->nkeys)
1072             return False;
1073         ki=m->signing_key_index;
1074     } else {
1075         for (ki=0; ki<st->peerkeys_kex->nkeys; ki++)
1076             if (sigkeyid_equal(&keyid_zero,&st->peerkeys_kex->keys[ki].id))
1077                 goto found;
1078         /* not found */
1079         slog(st,LOG_ERROR,
1080              "peer signed with keyid zero, which we do not accept");
1081         return False;
1082     found:;
1083     }
1084     struct sigpubkey_if *pubkey=st->peerkeys_kex->keys[ki].pubkey;
1085
1086     if (!pubkey->check(pubkey->st,
1087                        m->hashstart,m->hashlen,
1088                        &m->sig)) {
1089         slog(st,LOG_SEC,"msg3/msg4 signature failed check!"
1090              " (key #%d " SIGKEYID_PR_FMT ")",
1091              ki, SIGKEYID_PR_VAL(&st->peerkeys_kex->keys[ki].id));
1092         return False;
1093     }
1094     slog(st,LOG_SIGKEYS,"verified peer signature with key #%d "
1095          SIGKEYID_PR_FMT, ki,
1096          SIGKEYID_PR_VAL(&st->peerkeys_kex->keys[ki].id));
1097
1098     st->remote_adv_mtu=m->remote_mtu;
1099
1100     return True;
1101 }
1102
1103 static bool_t process_msg3(struct site *st, struct buffer_if *msg3,
1104                            const struct comm_addr *src, uint32_t msgtype,
1105                            struct msg *m /* returned */)
1106 {
1107     cstring_t err;
1108
1109     switch (msgtype) {
1110         case CASES_MSG3_KNOWN: break;
1111         default: assert(0);
1112     }
1113
1114     if (!unpick_msg(st,msgtype,msg3,m)) return False;
1115     if (!check_msg(st,msgtype,m,&err)) {
1116         slog(st,LOG_SEC,"msg3: %s",err);
1117         return False;
1118     }
1119     uint32_t capab_adv_late = m->remote_capabilities
1120         & ~st->remote_capabilities & st->early_capabilities;
1121     if (capab_adv_late) {
1122         slog(st,LOG_SEC,"msg3 impermissibly adds early capability flag(s)"
1123              " %#"PRIx32" (was %#"PRIx32", now %#"PRIx32")",
1124              capab_adv_late, st->remote_capabilities, m->remote_capabilities);
1125         return False;
1126     }
1127
1128 #define CHOSE_CRYPTO(kind, what) do {                                   \
1129     struct kind##_if *iface;                                            \
1130     int i;                                                              \
1131     for (i=0; i<st->n##kind##s; i++) {                                  \
1132         iface=st->kind##s[i];                                           \
1133         if (iface->capab_bit == m->capab_##kind##num)                   \
1134             goto kind##_found;                                          \
1135     }                                                                   \
1136     slog(st,LOG_SEC,"peer chose unknown-to-us " what " %d!",            \
1137          m->capab_##kind##num);                                                 \
1138     return False;                                                       \
1139 kind##_found:                                                           \
1140     st->chosen_##kind=iface;                                            \
1141 } while (0)
1142
1143     CHOSE_CRYPTO(transform, "transform");
1144
1145 #undef CHOSE_CRYPTO
1146
1147     if (!process_msg3_msg4(st,m))
1148         return False;
1149
1150     /* Update our idea of the remote site's capabilities, now that we've
1151      * verified that its message was authentic.
1152      *
1153      * Our previous idea of the remote site's capabilities came from the
1154      * unauthenticated MSG1.  We've already checked that this new message
1155      * doesn't change any of the bits we relied upon in the past, but it may
1156      * also have set additional capability bits.  We simply throw those away
1157      * now, and use the authentic capabilities from this MSG3. */
1158     st->remote_capabilities=m->remote_capabilities;
1159
1160     /* Terminate their DH public key with a '0' */
1161     m->pk[m->pklen]=0;
1162     /* Invent our DH secret key */
1163     st->random->generate(st->random->st,st->dh->len,st->dhsecret);
1164
1165     /* Generate the shared key and set up the transform */
1166     if (!set_new_transform(st,m->pk)) return False;
1167
1168     return True;
1169 }
1170
1171 static bool_t generate_msg4(struct site *st, const struct msg *prompt)
1172 {
1173     /* We have both nonces, their public key and our private key. Generate
1174        our public key, sign it and send it to them. */
1175     return generate_msg(st,LABEL_MSG4,"site:MSG4",prompt);
1176 }
1177
1178 static bool_t process_msg4(struct site *st, struct buffer_if *msg4,
1179                            const struct comm_addr *src,
1180                            struct msg *m /* returned */)
1181 {
1182     cstring_t err;
1183
1184     if (!unpick_msg(st,LABEL_MSG4,msg4,m)) return False;
1185     if (!check_msg(st,LABEL_MSG4,m,&err)) {
1186         slog(st,LOG_SEC,"msg4: %s",err);
1187         return False;
1188     }
1189     
1190     if (!process_msg3_msg4(st,m))
1191         return False;
1192
1193     /* Terminate their DH public key with a '0' */
1194     m->pk[m->pklen]=0;
1195
1196     /* Generate the shared key and set up the transform */
1197     if (!set_new_transform(st,m->pk)) return False;
1198
1199     return True;
1200 }
1201
1202 struct msg0 {
1203     uint32_t dest;
1204     uint32_t source;
1205     uint32_t type;
1206 };
1207
1208 static bool_t unpick_msg0(struct site *st, struct buffer_if *msg0,
1209                           struct msg0 *m)
1210 {
1211     CHECK_AVAIL(msg0,4);
1212     m->dest=buf_unprepend_uint32(msg0);
1213     CHECK_AVAIL(msg0,4);
1214     m->source=buf_unprepend_uint32(msg0);
1215     CHECK_AVAIL(msg0,4);
1216     m->type=buf_unprepend_uint32(msg0);
1217     return True;
1218     /* Leaves transformed part of buffer untouched */
1219 }
1220
1221 static bool_t generate_msg5(struct site *st, const struct msg *prompt)
1222 {
1223     cstring_t transform_err;
1224
1225     BUF_ALLOC(&st->buffer,"site:MSG5");
1226     /* We are going to add four words to the message */
1227     buffer_init(&st->buffer,calculate_max_start_pad());
1228     /* Give the netlink code an opportunity to put its own stuff in the
1229        message (configuration information, etc.) */
1230     buf_prepend_uint32(&st->buffer,LABEL_MSG5);
1231     if (call_transform_forwards(st,st->new_transform,
1232                                 &st->buffer,&transform_err))
1233         return False;
1234     buf_prepend_uint32(&st->buffer,LABEL_MSG5);
1235     buf_prepend_uint32(&st->buffer,st->index);
1236     buf_prepend_uint32(&st->buffer,st->setup_session_id);
1237
1238     st->retries=st->setup_retries;
1239     return True;
1240 }
1241
1242 static bool_t process_msg5(struct site *st, struct buffer_if *msg5,
1243                            const struct comm_addr *src,
1244                            struct transform_inst_if *transform)
1245 {
1246     struct msg0 m;
1247     cstring_t transform_err;
1248
1249     if (!unpick_msg0(st,msg5,&m)) return False;
1250
1251     if (call_transform_reverse(st,transform,msg5,&transform_err)) {
1252         /* There's a problem */
1253         slog(st,LOG_SEC,"process_msg5: transform: %s",transform_err);
1254         return False;
1255     }
1256     /* Buffer should now contain untransformed PING packet data */
1257     CHECK_AVAIL(msg5,4);
1258     if (buf_unprepend_uint32(msg5)!=LABEL_MSG5) {
1259         slog(st,LOG_SEC,"MSG5/PING packet contained wrong label");
1260         return False;
1261     }
1262     /* Older versions of secnet used to write some config data here
1263      * which we ignore.  So we don't CHECK_EMPTY */
1264     return True;
1265 }
1266
1267 static void create_msg6(struct site *st, struct transform_inst_if *transform,
1268                         uint32_t session_id)
1269 {
1270     cstring_t transform_err;
1271
1272     BUF_ALLOC(&st->buffer,"site:MSG6");
1273     /* We are going to add four words to the message */
1274     buffer_init(&st->buffer,calculate_max_start_pad());
1275     /* Give the netlink code an opportunity to put its own stuff in the
1276        message (configuration information, etc.) */
1277     buf_prepend_uint32(&st->buffer,LABEL_MSG6);
1278     transform_apply_return problem =
1279         call_transform_forwards(st,transform,
1280                                 &st->buffer,&transform_err);
1281     assert(!problem);
1282     buf_prepend_uint32(&st->buffer,LABEL_MSG6);
1283     buf_prepend_uint32(&st->buffer,st->index);
1284     buf_prepend_uint32(&st->buffer,session_id);
1285 }
1286
1287 static bool_t generate_msg6(struct site *st, const struct msg *prompt)
1288 {
1289     if (!is_transform_valid(st->new_transform))
1290         return False;
1291     create_msg6(st,st->new_transform,st->setup_session_id);
1292     st->retries=1; /* Peer will retransmit MSG5 if this packet gets lost */
1293     return True;
1294 }
1295
1296 static bool_t process_msg6(struct site *st, struct buffer_if *msg6,
1297                            const struct comm_addr *src)
1298 {
1299     struct msg0 m;
1300     cstring_t transform_err;
1301
1302     if (!unpick_msg0(st,msg6,&m)) return False;
1303
1304     if (call_transform_reverse(st,st->new_transform,msg6,&transform_err)) {
1305         /* There's a problem */
1306         slog(st,LOG_SEC,"process_msg6: transform: %s",transform_err);
1307         return False;
1308     }
1309     /* Buffer should now contain untransformed PING packet data */
1310     CHECK_AVAIL(msg6,4);
1311     if (buf_unprepend_uint32(msg6)!=LABEL_MSG6) {
1312         slog(st,LOG_SEC,"MSG6/PONG packet contained invalid data");
1313         return False;
1314     }
1315     /* Older versions of secnet used to write some config data here
1316      * which we ignore.  So we don't CHECK_EMPTY */
1317     return True;
1318 }
1319
1320 static transform_apply_return
1321 decrypt_msg0(struct site *st, struct buffer_if *msg0,
1322                            const struct comm_addr *src)
1323 {
1324     cstring_t transform_err, auxkey_err, newkey_err="n/a";
1325     struct msg0 m;
1326     transform_apply_return problem;
1327
1328     if (!unpick_msg0(st,msg0,&m)) return False;
1329
1330     /* Keep a copy so we can try decrypting it with multiple keys */
1331     buffer_copy(&st->scratch, msg0);
1332
1333     problem = call_transform_reverse(st,st->current.transform,
1334                                      msg0,&transform_err);
1335     if (!problem) {
1336         if (!st->auxiliary_is_new)
1337             delete_one_key(st,&st->auxiliary_key,
1338                            "peer has used new key","auxiliary key",LOG_SEC);
1339         return 0;
1340     }
1341     if (transform_apply_return_badseq(problem))
1342         goto badseq;
1343
1344     buffer_copy(msg0, &st->scratch);
1345     problem = call_transform_reverse(st,st->auxiliary_key.transform,
1346                                      msg0,&auxkey_err);
1347     if (!problem) {
1348         slog(st,LOG_DROP,"processing packet which uses auxiliary key");
1349         if (st->auxiliary_is_new) {
1350             /* We previously timed out in state SENTMSG5 but it turns
1351              * out that our peer did in fact get our MSG5 and is
1352              * using the new key.  So we should switch to it too. */
1353             /* This is a bit like activate_new_key. */
1354             struct data_key t;
1355             t=st->current;
1356             st->current=st->auxiliary_key;
1357             st->auxiliary_key=t;
1358
1359             delete_one_key(st,&st->auxiliary_key,"peer has used new key",
1360                            "previous key",LOG_SEC);
1361             st->auxiliary_is_new=0;
1362             st->renegotiate_key_time=st->auxiliary_renegotiate_key_time;
1363         }
1364         return 0;
1365     }
1366     if (transform_apply_return_badseq(problem))
1367         goto badseq;
1368
1369     if (st->state==SITE_SENTMSG5) {
1370         buffer_copy(msg0, &st->scratch);
1371         problem = call_transform_reverse(st,st->new_transform,
1372                                          msg0,&newkey_err);
1373         if (!problem) {
1374             /* It looks like we didn't get the peer's MSG6 */
1375             /* This is like a cut-down enter_new_state(SITE_RUN) */
1376             slog(st,LOG_STATE,"will enter state RUN (MSG0 with new key)");
1377             BUF_FREE(&st->buffer);
1378             st->timeout=0;
1379             activate_new_key(st);
1380             return 0; /* do process the data in this packet */
1381         }
1382         if (transform_apply_return_badseq(problem))
1383             goto badseq;
1384     }
1385
1386     slog(st,LOG_SEC,"transform: %s (aux: %s, new: %s)",
1387          transform_err,auxkey_err,newkey_err);
1388     initiate_key_setup(st,"incoming message would not decrypt",0);
1389     send_nak(src,m.dest,m.source,m.type,msg0,"message would not decrypt");
1390     assert(problem);
1391     return problem;
1392
1393  badseq:
1394     slog(st,LOG_DROP,"transform: %s (bad seq.)",transform_err);
1395     assert(problem);
1396     return problem;
1397 }
1398
1399 static bool_t process_msg0(struct site *st, struct buffer_if *msg0,
1400                            const struct comm_addr *src)
1401 {
1402     uint32_t type;
1403     transform_apply_return problem;
1404
1405     problem = decrypt_msg0(st,msg0,src);
1406     if (problem==transform_apply_seqdupe) {
1407         /* We recently received another copy of this packet, maybe due
1408          * to polypath.  That's not a problem; indeed, for the
1409          * purposes of transport address management it is a success.
1410          * But we don't want to process the packet. */
1411         transport_data_msgok(st,src);
1412         return False;
1413     }
1414     if (problem)
1415         return False;
1416
1417     CHECK_AVAIL(msg0,4);
1418     type=buf_unprepend_uint32(msg0);
1419     switch(type) {
1420     case LABEL_MSG7:
1421         /* We must forget about the current session. */
1422         delete_keys(st,"request from peer",LOG_SEC);
1423         /* probably, the peer is shutting down, and this is going to fail,
1424          * but we need to be trying to bring the link up again */
1425         if (st->keepalive)
1426             initiate_key_setup(st,"peer requested key teardown",0);
1427         return True;
1428     case LABEL_MSG9:
1429         /* Deliver to netlink layer */
1430         st->netlink->deliver(st->netlink->st,msg0);
1431         transport_data_msgok(st,src);
1432         /* See whether we should start negotiating a new key */
1433         if (st->now > st->renegotiate_key_time)
1434             initiate_key_setup(st,"incoming packet in renegotiation window",0);
1435         return True;
1436     default:
1437         slog(st,LOG_SEC,"incoming encrypted message of type %08x "
1438              "(unknown)",type);
1439         break;
1440     }
1441     return False;
1442 }
1443
1444 static void dump_packet(struct site *st, struct buffer_if *buf,
1445                         const struct comm_addr *addr, bool_t incoming,
1446                         bool_t ok)
1447 {
1448     uint32_t dest=get_uint32(buf->start);
1449     uint32_t source=get_uint32(buf->start+4);
1450     uint32_t msgtype=get_uint32(buf->start+8);
1451
1452     if (st->log_events & LOG_DUMP)
1453         slilog(st->log,M_DEBUG,"%s: %s: %08x<-%08x: %08x: %s%s",
1454                st->tunname,incoming?"incoming":"outgoing",
1455                dest,source,msgtype,comm_addr_to_string(addr),
1456                ok?"":" - fail");
1457 }
1458
1459 static bool_t comm_addr_sendmsg(struct site *st,
1460                                 const struct comm_addr *dest,
1461                                 struct buffer_if *buf)
1462 {
1463     int i;
1464     struct comm_clientinfo *commclientinfo = 0;
1465
1466     for (i=0; i < st->ncomms; i++) {
1467         if (st->comms[i] == dest->comm) {
1468             commclientinfo = st->commclientinfos[i];
1469             break;
1470         }
1471     }
1472     return dest->comm->sendmsg(dest->comm->st, buf, dest, commclientinfo);
1473 }
1474
1475 static uint32_t site_status(void *st)
1476 {
1477     return 0;
1478 }
1479
1480 static bool_t send_msg(struct site *st)
1481 {
1482     if (st->retries>0) {
1483         transport_xmit(st, &st->setup_peers, &st->buffer, True);
1484         st->timeout=st->now+st->setup_retry_interval;
1485         st->retries--;
1486         return True;
1487     } else if (st->state==SITE_SENTMSG5) {
1488         logtimeout(st,"timed out sending MSG5, stashing new key");
1489         /* We stash the key we have produced, in case it turns out that
1490          * our peer did see our MSG5 after all and starts using it. */
1491         /* This is a bit like some of activate_new_key */
1492         struct transform_inst_if *t;
1493         t=st->auxiliary_key.transform;
1494         st->auxiliary_key.transform=st->new_transform;
1495         st->new_transform=t;
1496         dispose_transform(&st->new_transform);
1497
1498         st->auxiliary_is_new=1;
1499         st->auxiliary_key.key_timeout=st->now+st->key_lifetime;
1500         st->auxiliary_renegotiate_key_time=st->now+st->key_renegotiate_time;
1501         st->auxiliary_key.remote_session_id=st->setup_session_id;
1502
1503         enter_state_wait(st);
1504         return False;
1505     } else {
1506         logtimeout(st,"timed out sending key setup packet "
1507             "(in state %s)",state_name(st->state));
1508         enter_state_wait(st);
1509         return False;
1510     }
1511 }
1512
1513 static void site_resolve_callback(void *sst, const struct comm_addr *addrs,
1514                                   int stored_naddrs, int all_naddrs,
1515                                   const char *address, const char *failwhy)
1516 {
1517     struct site *st=sst;
1518
1519     if (!stored_naddrs) {
1520         slog(st,LOG_ERROR,"resolution of %s failed: %s",address,failwhy);
1521     } else {
1522         slog(st,LOG_PEER_ADDRS,"resolution of %s completed, %d addrs, eg: %s",
1523              address, all_naddrs, comm_addr_to_string(&addrs[0]));;
1524
1525         int space=st->transport_peers_max-st->resolving_n_results_stored;
1526         int n_tocopy=MIN(stored_naddrs,space);
1527         COPY_ARRAY(st->resolving_results + st->resolving_n_results_stored,
1528                    addrs,
1529                    n_tocopy);
1530         st->resolving_n_results_stored += n_tocopy;
1531         st->resolving_n_results_all += all_naddrs;
1532     }
1533
1534     decrement_resolving_count(st,1);
1535 }
1536
1537 static void decrement_resolving_count(struct site *st, int by)
1538 {
1539     assert(st->resolving_count>0);
1540     st->resolving_count-=by;
1541
1542     if (st->resolving_count)
1543         return;
1544
1545     /* OK, we are done with them all.  Handle combined results. */
1546
1547     const struct comm_addr *addrs=st->resolving_results;
1548     int naddrs=st->resolving_n_results_stored;
1549     assert(naddrs<=st->transport_peers_max);
1550
1551     if (naddrs) {
1552         if (naddrs != st->resolving_n_results_all) {
1553             slog(st,LOG_SETUP_INIT,"resolution of supplied addresses/names"
1554                  " yielded too many results (%d > %d), some ignored",
1555                  st->resolving_n_results_all, naddrs);
1556         }
1557         slog(st,LOG_STATE,"resolution completed, %d addrs, eg: %s",
1558              naddrs, iaddr_to_string(&addrs[0].ia));;
1559     }
1560
1561     switch (st->state) {
1562     case SITE_RESOLVE:
1563         if (transport_compute_setupinit_peers(st,addrs,naddrs,0)) {
1564             enter_new_state(st,SITE_SENTMSG1,0);
1565         } else {
1566             /* Can't figure out who to try to to talk to */
1567             slog(st,LOG_SETUP_INIT,
1568                  "key exchange failed: cannot find peer address");
1569             enter_state_run(st);
1570         }
1571         break;
1572     case SITE_SENTMSG1: case SITE_SENTMSG2:
1573     case SITE_SENTMSG3: case SITE_SENTMSG4:
1574     case SITE_SENTMSG5:
1575         if (naddrs) {
1576             /* We start using the address immediately for data too.
1577              * It's best to store it in st->peers now because we might
1578              * go via SENTMSG5, WAIT, and a MSG0, straight into using
1579              * the new key (without updating the data peer addrs). */
1580             transport_resolve_complete(st,addrs,naddrs);
1581         } else if (st->local_mobile) {
1582             /* We can't let this rest because we may have a peer
1583              * address which will break in the future. */
1584             slog(st,LOG_SETUP_INIT,"resolution failed: "
1585                  "abandoning key exchange");
1586             enter_state_wait(st);
1587         } else {
1588             slog(st,LOG_SETUP_INIT,"resolution failed: "
1589                  " continuing to use source address of peer's packets"
1590                  " for key exchange and ultimately data");
1591         }
1592         break;
1593     case SITE_RUN:
1594         if (naddrs) {
1595             slog(st,LOG_SETUP_INIT,"resolution completed tardily,"
1596                  " updating peer address(es)");
1597             transport_resolve_complete_tardy(st,addrs,naddrs);
1598         } else if (st->local_mobile) {
1599             /* Not very good.  We should queue (another) renegotiation
1600              * so that we can update the peer address. */
1601             st->key_renegotiate_time=st->now+wait_timeout(st);
1602         } else {
1603             slog(st,LOG_SETUP_INIT,"resolution failed: "
1604                  " continuing to use source address of peer's packets");
1605         }
1606         break;
1607     case SITE_WAIT:
1608     case SITE_STOP:
1609         /* oh well */
1610         break;
1611     }
1612 }
1613
1614 static bool_t initiate_key_setup(struct site *st, cstring_t reason,
1615                                  const struct comm_addr *prod_hint)
1616 {
1617     /* Reentrancy hazard: can call enter_new_state/enter_state_* */
1618     if (st->state!=SITE_RUN) return False;
1619     slog(st,LOG_SETUP_INIT,"initiating key exchange (%s)",reason);
1620     if (st->addresses) {
1621         slog(st,LOG_SETUP_INIT,"resolving peer address(es)");
1622         return enter_state_resolve(st);
1623     } else if (transport_compute_setupinit_peers(st,0,0,prod_hint)) {
1624         return enter_new_state(st,SITE_SENTMSG1,0);
1625     }
1626     slog(st,LOG_SETUP_INIT,"key exchange failed: no address for peer");
1627     return False;
1628 }
1629
1630 static void activate_new_key(struct site *st)
1631 {
1632     struct transform_inst_if *t;
1633
1634     /* We have three transform instances, which we swap between old,
1635        active and setup */
1636     t=st->auxiliary_key.transform;
1637     st->auxiliary_key.transform=st->current.transform;
1638     st->current.transform=st->new_transform;
1639     st->new_transform=t;
1640     dispose_transform(&st->new_transform);
1641
1642     st->timeout=0;
1643     st->auxiliary_is_new=0;
1644     st->auxiliary_key.key_timeout=st->current.key_timeout;
1645     st->current.key_timeout=st->now+st->key_lifetime;
1646     st->renegotiate_key_time=st->now+st->key_renegotiate_time;
1647     transport_peers_copy(st,&st->peers,&st->setup_peers);
1648     st->current.remote_session_id=st->setup_session_id;
1649
1650     /* Compute the inter-site MTU.  This is min( our_mtu, their_mtu ).
1651      * But their mtu be unspecified, in which case we just use ours. */
1652     uint32_t intersite_mtu=
1653         MIN(st->mtu_target, st->remote_adv_mtu ?: ~(uint32_t)0);
1654     st->netlink->set_mtu(st->netlink->st,intersite_mtu);
1655
1656     slog(st,LOG_ACTIVATE_KEY,"new key activated"
1657          " (mtu ours=%"PRId32" theirs=%"PRId32" intersite=%"PRId32")",
1658          st->mtu_target, st->remote_adv_mtu, intersite_mtu);
1659     enter_state_run(st);
1660 }
1661
1662 static void delete_one_key(struct site *st, struct data_key *key,
1663                            cstring_t reason, cstring_t which, uint32_t loglevel)
1664 {
1665     if (!is_transform_valid(key->transform)) return;
1666     if (reason) slog(st,loglevel,"%s deleted (%s)",which,reason);
1667     dispose_transform(&key->transform);
1668     key->key_timeout=0;
1669 }
1670
1671 static void delete_keys(struct site *st, cstring_t reason, uint32_t loglevel)
1672 {
1673     if (current_valid(st)) {
1674         slog(st,loglevel,"session closed (%s)",reason);
1675
1676         delete_one_key(st,&st->current,0,0,0);
1677         set_link_quality(st);
1678     }
1679     delete_one_key(st,&st->auxiliary_key,0,0,0);
1680 }
1681
1682 static void state_assert(struct site *st, bool_t ok)
1683 {
1684     if (!ok) fatal("site:state_assert");
1685 }
1686
1687 static void enter_state_stop(struct site *st)
1688 {
1689     st->state=SITE_STOP;
1690     st->timeout=0;
1691     delete_keys(st,"entering state STOP",LOG_TIMEOUT_KEY);
1692     dispose_transform(&st->new_transform);
1693 }
1694
1695 static void set_link_quality(struct site *st)
1696 {
1697     uint32_t quality;
1698     if (current_valid(st))
1699         quality=LINK_QUALITY_UP;
1700     else if (st->state==SITE_WAIT || st->state==SITE_STOP)
1701         quality=LINK_QUALITY_DOWN;
1702     else if (st->addresses)
1703         quality=LINK_QUALITY_DOWN_CURRENT_ADDRESS;
1704     else if (transport_peers_valid(&st->peers))
1705         quality=LINK_QUALITY_DOWN_STALE_ADDRESS;
1706     else
1707         quality=LINK_QUALITY_DOWN;
1708
1709     st->netlink->set_quality(st->netlink->st,quality);
1710 }
1711
1712 static void enter_state_run(struct site *st)
1713 {
1714     slog(st,LOG_STATE,"entering state RUN%s",
1715          current_valid(st) ? " (keyed)" : " (unkeyed)");
1716     st->state=SITE_RUN;
1717     st->timeout=0;
1718
1719     st->setup_session_id=0;
1720     transport_peers_clear(st,&st->setup_peers);
1721     keyset_dispose(&st->peerkeys_kex);
1722     FILLZERO(st->localN);
1723     FILLZERO(st->remoteN);
1724     dispose_transform(&st->new_transform);
1725     memset(st->dhsecret,0,st->dh->len);
1726     if (st->sharedsecret) memset(st->sharedsecret,0,st->sharedsecretlen);
1727     set_link_quality(st);
1728
1729     if (st->keepalive && !current_valid(st))
1730         initiate_key_setup(st, "keepalive", 0);
1731 }
1732
1733 static bool_t ensure_resolving(struct site *st)
1734 {
1735     /* Reentrancy hazard: may call site_resolve_callback and hence
1736      * enter_new_state, enter_state_* and generate_msg*. */
1737     if (st->resolving_count)
1738         return True;
1739
1740     assert(st->addresses);
1741
1742     /* resolver->request might reentrantly call site_resolve_callback
1743      * which will decrement st->resolving, so we need to increment it
1744      * twice beforehand to prevent decrement from thinking we're
1745      * finished, and decrement it ourselves.  Alternatively if
1746      * everything fails then there are no callbacks due and we simply
1747      * set it to 0 and return false.. */
1748     st->resolving_n_results_stored=0;
1749     st->resolving_n_results_all=0;
1750     st->resolving_count+=2;
1751     const char **addrp=st->addresses;
1752     const char *address;
1753     bool_t anyok=False;
1754     for (; (address=*addrp++); ) {
1755         bool_t ok = st->resolver->request(st->resolver->st,address,
1756                                           st->remoteport,st->comms[0],
1757                                           site_resolve_callback,st);
1758         if (ok)
1759             st->resolving_count++;
1760         anyok|=ok;
1761     }
1762     if (!anyok) {
1763         st->resolving_count=0;
1764         return False;
1765     }
1766     decrement_resolving_count(st,2);
1767     return True;
1768 }
1769
1770 static bool_t enter_state_resolve(struct site *st)
1771 {
1772     /* Reentrancy hazard!  See ensure_resolving. */
1773     state_assert(st,st->state==SITE_RUN);
1774     slog(st,LOG_STATE,"entering state RESOLVE");
1775     st->state=SITE_RESOLVE;
1776     return ensure_resolving(st);
1777 }
1778
1779 static bool_t enter_new_state(struct site *st, uint32_t next,
1780                               const struct msg *prompt
1781                               /* may be 0 for SENTMSG1 */)
1782 {
1783     bool_t (*gen)(struct site *st, const struct msg *prompt);
1784     int r;
1785
1786     slog(st,LOG_STATE,"entering state %s",state_name(next));
1787     switch(next) {
1788     case SITE_SENTMSG1:
1789         state_assert(st,st->state==SITE_RUN || st->state==SITE_RESOLVE);
1790         if (!kex_init(st)) return False;
1791         gen=generate_msg1;
1792         st->msg1_crossed_logged = False;
1793         break;
1794     case SITE_SENTMSG2:
1795         state_assert(st,st->state==SITE_RUN || st->state==SITE_RESOLVE ||
1796                      st->state==SITE_SENTMSG1 || st->state==SITE_WAIT);
1797         if (!kex_init(st)) return False;
1798         gen=generate_msg2;
1799         break;
1800     case SITE_SENTMSG3:
1801         state_assert(st,st->state==SITE_SENTMSG1);
1802         BUF_FREE(&st->buffer);
1803         gen=generate_msg3;
1804         break;
1805     case SITE_SENTMSG4:
1806         state_assert(st,st->state==SITE_SENTMSG2);
1807         BUF_FREE(&st->buffer);
1808         gen=generate_msg4;
1809         break;
1810     case SITE_SENTMSG5:
1811         state_assert(st,st->state==SITE_SENTMSG3);
1812         BUF_FREE(&st->buffer);
1813         gen=generate_msg5;
1814         break;
1815     case SITE_RUN:
1816         state_assert(st,st->state==SITE_SENTMSG4);
1817         BUF_FREE(&st->buffer);
1818         gen=generate_msg6;
1819         break;
1820     default:
1821         gen=NULL;
1822         fatal("enter_new_state(%s): invalid new state",state_name(next));
1823         break;
1824     }
1825
1826     if (hacky_par_start_failnow()) return False;
1827
1828     r= gen(st,prompt) && send_msg(st);
1829
1830     hacky_par_end(&r,
1831                   st->setup_retries, st->setup_retry_interval,
1832                   send_msg, st);
1833     
1834     if (r) {
1835         st->state=next;
1836         if (next==SITE_RUN) {
1837             BUF_FREE(&st->buffer); /* Never reused */
1838             st->timeout=0; /* Never retransmit */
1839             activate_new_key(st);
1840         }
1841         return True;
1842     }
1843     slog(st,LOG_ERROR,"error entering state %s",state_name(next));
1844     st->buffer.free=False; /* Unconditionally use the buffer; it may be
1845                               in either state, and enter_state_wait() will
1846                               do a BUF_FREE() */
1847     enter_state_wait(st);
1848     return False;
1849 }
1850
1851 /* msg7 tells our peer that we're about to forget our key */
1852 static bool_t send_msg7(struct site *st, cstring_t reason)
1853 {
1854     cstring_t transform_err;
1855
1856     if (current_valid(st) && st->buffer.free
1857         && transport_peers_valid(&st->peers)) {
1858         BUF_ALLOC(&st->buffer,"site:MSG7");
1859         buffer_init(&st->buffer,calculate_max_start_pad());
1860         buf_append_uint32(&st->buffer,LABEL_MSG7);
1861         buf_append_string(&st->buffer,reason);
1862         if (call_transform_forwards(st, st->current.transform,
1863                                     &st->buffer, &transform_err))
1864             goto free_out;
1865         buf_prepend_uint32(&st->buffer,LABEL_MSG0);
1866         buf_prepend_uint32(&st->buffer,st->index);
1867         buf_prepend_uint32(&st->buffer,st->current.remote_session_id);
1868         transport_xmit(st,&st->peers,&st->buffer,True);
1869         BUF_FREE(&st->buffer);
1870     free_out:
1871         return True;
1872     }
1873     return False;
1874 }
1875
1876 /* We go into this state if our peer becomes uncommunicative. Similar to
1877    the "stop" state, we forget all session keys for a while, before
1878    re-entering the "run" state. */
1879 static void enter_state_wait(struct site *st)
1880 {
1881     slog(st,LOG_STATE,"entering state WAIT");
1882     st->timeout=st->now+wait_timeout(st);
1883     st->state=SITE_WAIT;
1884     set_link_quality(st);
1885     BUF_FREE(&st->buffer); /* will have had an outgoing packet in it */
1886     /* XXX Erase keys etc. */
1887 }
1888
1889 static void generate_prod(struct site *st, struct buffer_if *buf)
1890 {
1891     buffer_init(buf,0);
1892     buf_append_uint32(buf,0);
1893     buf_append_uint32(buf,0);
1894     buf_append_uint32(buf,LABEL_PROD);
1895     buf_append_string(buf,st->localname);
1896     buf_append_string(buf,st->remotename);
1897 }
1898
1899 static void generate_send_prod(struct site *st,
1900                                const struct comm_addr *source)
1901 {
1902     if (!st->allow_send_prod) return; /* too soon */
1903     if (!(st->state==SITE_RUN || st->state==SITE_RESOLVE ||
1904           st->state==SITE_WAIT)) return; /* we'd ignore peer's MSG1 */
1905
1906     slog(st,LOG_SETUP_INIT,"prodding peer for key exchange");
1907     st->allow_send_prod=0;
1908     generate_prod(st,&st->scratch);
1909     bool_t ok = comm_addr_sendmsg(st, source, &st->scratch);
1910     dump_packet(st,&st->scratch,source,False,ok);
1911 }
1912
1913 static inline void site_settimeout(uint64_t timeout, int *timeout_io)
1914 {
1915     if (timeout) {
1916         int64_t offset=timeout-*now;
1917         if (offset<0) offset=0;
1918         if (offset>INT_MAX) offset=INT_MAX;
1919         if (*timeout_io<0 || offset<*timeout_io)
1920             *timeout_io=offset;
1921     }
1922 }
1923
1924 static int site_beforepoll(void *sst, struct pollfd *fds, int *nfds_io,
1925                            int *timeout_io)
1926 {
1927     struct site *st=sst;
1928
1929     BEFOREPOLL_WANT_FDS(0); /* We don't use any file descriptors */
1930     st->now=*now;
1931
1932     /* Work out when our next timeout is. The earlier of 'timeout' or
1933        'current.key_timeout'. A stored value of '0' indicates no timeout
1934        active. */
1935     site_settimeout(st->timeout, timeout_io);
1936     site_settimeout(st->current.key_timeout, timeout_io);
1937     site_settimeout(st->auxiliary_key.key_timeout, timeout_io);
1938
1939     return 0; /* success */
1940 }
1941
1942 static void check_expiry(struct site *st, struct data_key *key,
1943                          const char *which)
1944 {
1945     if (key->key_timeout && *now>key->key_timeout) {
1946         delete_one_key(st,key,"maximum life exceeded",which,LOG_TIMEOUT_KEY);
1947     }
1948 }
1949
1950 /* NB site_afterpoll will be called before site_beforepoll is ever called */
1951 static void site_afterpoll(void *sst, struct pollfd *fds, int nfds)
1952 {
1953     struct site *st=sst;
1954
1955     st->now=*now;
1956     if (st->timeout && *now>st->timeout) {
1957         st->timeout=0;
1958         if (st->state>=SITE_SENTMSG1 && st->state<=SITE_SENTMSG5) {
1959             if (!hacky_par_start_failnow())
1960                 send_msg(st);
1961         } else if (st->state==SITE_WAIT) {
1962             enter_state_run(st);
1963         } else {
1964             slog(st,LOG_ERROR,"site_afterpoll: unexpected timeout, state=%d",
1965                  st->state);
1966         }
1967     }
1968     check_expiry(st,&st->current,"current key");
1969     check_expiry(st,&st->auxiliary_key,"auxiliary key");
1970 }
1971
1972 /* This function is called by the netlink device to deliver packets
1973    intended for the remote network. The packet is in "raw" wire
1974    format, but is guaranteed to be word-aligned. */
1975 static void site_outgoing(void *sst, struct buffer_if *buf)
1976 {
1977     struct site *st=sst;
1978     cstring_t transform_err;
1979     
1980     if (st->state==SITE_STOP) {
1981         BUF_FREE(buf);
1982         return;
1983     }
1984
1985     st->allow_send_prod=1;
1986
1987     /* In all other states we consider delivering the packet if we have
1988        a valid key and a valid address to send it to. */
1989     if (current_valid(st) && transport_peers_valid(&st->peers)) {
1990         /* Transform it and send it */
1991         if (buf->size>0) {
1992             buf_prepend_uint32(buf,LABEL_MSG9);
1993             if (call_transform_forwards(st, st->current.transform,
1994                                         buf, &transform_err))
1995                 goto free_out;
1996             buf_prepend_uint32(buf,LABEL_MSG0);
1997             buf_prepend_uint32(buf,st->index);
1998             buf_prepend_uint32(buf,st->current.remote_session_id);
1999             transport_xmit(st,&st->peers,buf,False);
2000         }
2001     free_out:
2002         BUF_FREE(buf);
2003         return;
2004     }
2005
2006     slog(st,LOG_DROP,"discarding outgoing packet of size %d",buf->size);
2007     BUF_FREE(buf);
2008     initiate_key_setup(st,"outgoing packet",0);
2009 }
2010
2011 static bool_t named_for_us(struct site *st, const struct buffer_if *buf_in,
2012                            uint32_t type, struct msg *m,
2013                            struct priomsg *whynot)
2014     /* For packets which are identified by the local and remote names.
2015      * If it has our name and our peer's name in it it's for us. */
2016 {
2017     struct buffer_if buf[1];
2018     buffer_readonly_clone(buf,buf_in);
2019
2020     if (!unpick_msg(st,type,buf,m)) {
2021         priomsg_update_fixed(whynot, comm_notify_whynot_unpick, "malformed");
2022         return False;
2023     }
2024 #define NAME_MATCHES(lr)                                                \
2025     if (!name_matches(&m->lr, st->lr##name)) {                          \
2026         if (priomsg_update_fixed(whynot, comm_notify_whynot_name_##lr,  \
2027                                  "unknown " #lr " name: ")) {           \
2028             truncmsg_add_packet_string(&whynot->m, m->lr.len, m->lr.name); \
2029         }                                                               \
2030         return False;                                                   \
2031     }
2032     NAME_MATCHES(remote);
2033     NAME_MATCHES(local );
2034 #undef NAME_MATCHES
2035
2036     return True;
2037 }
2038
2039 static bool_t we_have_priority(struct site *st, const struct msg *m) {
2040     if (st->local_capabilities & m->remote_capabilities &
2041         CAPAB_PRIORITY_MOBILE) {
2042         if (st->local_mobile) return True;
2043         if (st-> peer_mobile) return False;
2044     }
2045     return st->our_name_later;
2046 }
2047
2048 static bool_t setup_late_msg_ok(struct site *st, 
2049                                 const struct buffer_if *buf_in,
2050                                 uint32_t msgtype,
2051                                 const struct comm_addr *source,
2052                                 struct msg *m /* returned */) {
2053     /* For setup packets which seem from their type like they are
2054      * late.  Maybe they came via a different path.  All we do is make
2055      * a note of the sending address, iff they look like they are part
2056      * of the current key setup attempt. */
2057     if (!named_for_us(st,buf_in,msgtype,m,0))
2058         /* named_for_us calls unpick_msg which gets the nonces */
2059         return False;
2060     if (!consttime_memeq(m->nR,st->remoteN,NONCELEN) ||
2061         !consttime_memeq(m->nL,st->localN, NONCELEN))
2062         /* spoof ?  from stale run ?  who knows */
2063         return False;
2064     transport_setup_msgok(st,source);
2065     return True;
2066 }
2067
2068 /* This function is called by the communication device to deliver
2069    packets from our peers.
2070    It should return True if the packet is recognised as being for
2071    this current site instance (and should therefore not be processed
2072    by other sites), even if the packet was otherwise ignored. */
2073 static bool_t site_incoming(void *sst, struct buffer_if *buf,
2074                             const struct comm_addr *source,
2075                             struct priomsg *whynot)
2076 {
2077     struct site *st=sst;
2078
2079     if (buf->size < 12) return False;
2080
2081     uint32_t dest=get_uint32(buf->start);
2082     uint32_t msgtype=get_uint32(buf->start+8);
2083     struct msg msg;
2084       /* initialised by named_for_us, or process_msgN for N!=1 */
2085
2086     if (msgtype==LABEL_MSG1) {
2087         if (!named_for_us(st,buf,msgtype,&msg,whynot))
2088             return False;
2089         /* It's a MSG1 addressed to us. Decide what to do about it. */
2090         dump_packet(st,buf,source,True,True);
2091         if (st->state==SITE_RUN || st->state==SITE_RESOLVE ||
2092             st->state==SITE_WAIT) {
2093             /* We should definitely process it */
2094             transport_compute_setupinit_peers(st,0,0,source);
2095             if (process_msg1(st,buf,source,&msg)) {
2096                 slog(st,LOG_SETUP_INIT,"key setup initiated by peer");
2097                 bool_t entered=enter_new_state(st,SITE_SENTMSG2,&msg);
2098                 if (entered && st->addresses && st->local_mobile)
2099                     /* We must do this as the very last thing, because
2100                        the resolver callback might reenter us. */
2101                     ensure_resolving(st);
2102             } else {
2103                 slog(st,LOG_ERROR,"failed to process incoming msg1");
2104             }
2105             BUF_FREE(buf);
2106             return True;
2107         } else if (st->state==SITE_SENTMSG1) {
2108             /* We've just sent a message 1! They may have crossed on
2109                the wire. If we have priority then we ignore the
2110                incoming one, otherwise we process it as usual. */
2111             if (we_have_priority(st,&msg)) {
2112                 BUF_FREE(buf);
2113                 if (!st->msg1_crossed_logged++)
2114                     slog(st,LOG_SETUP_INIT,"crossed msg1s; we are higher "
2115                          "priority => ignore incoming msg1");
2116                 return True;
2117             } else {
2118                 slog(st,LOG_SETUP_INIT,"crossed msg1s; we are lower "
2119                      "priority => use incoming msg1");
2120                 if (process_msg1(st,buf,source,&msg)) {
2121                     BUF_FREE(&st->buffer); /* Free our old message 1 */
2122                     transport_setup_msgok(st,source);
2123                     enter_new_state(st,SITE_SENTMSG2,&msg);
2124                 } else {
2125                     slog(st,LOG_ERROR,"failed to process an incoming "
2126                          "crossed msg1 (we have low priority)");
2127                 }
2128                 BUF_FREE(buf);
2129                 return True;
2130             }
2131         } else if (st->state==SITE_SENTMSG2 ||
2132                    st->state==SITE_SENTMSG4) {
2133             if (consttime_memeq(msg.nR,st->remoteN,NONCELEN)) {
2134                 /* We are ahead in the protocol, but that msg1 had the
2135                  * peer's nonce so presumably it is from this key
2136                  * exchange run, via a slower route */
2137                 transport_setup_msgok(st,source);
2138             } else {
2139                 slog(st,LOG_UNEXPECTED,"competing incoming message 1");
2140             }
2141             BUF_FREE(buf);
2142             return True;
2143         }
2144         /* The message 1 was received at an unexpected stage of the
2145            key setup.  Well, they lost the race. */
2146         slog(st,LOG_UNEXPECTED,"unexpected incoming message 1");
2147         BUF_FREE(buf);
2148         return True;
2149     }
2150     if (msgtype==LABEL_PROD) {
2151         if (!named_for_us(st,buf,msgtype,&msg,whynot))
2152             return False;
2153         dump_packet(st,buf,source,True,True);
2154         if (st->state!=SITE_RUN) {
2155             slog(st,LOG_DROP,"ignoring PROD when not in state RUN");
2156         } else if (current_valid(st)) {
2157             slog(st,LOG_DROP,"ignoring PROD when we think we have a key");
2158         } else {
2159             initiate_key_setup(st,"peer sent PROD packet",source);
2160         }
2161         BUF_FREE(buf);
2162         return True;
2163     }
2164     if (dest==st->index) {
2165         /* Explicitly addressed to us */
2166         if (msgtype!=LABEL_MSG0) dump_packet(st,buf,source,True,True);
2167         switch (msgtype) {
2168         case LABEL_NAK:
2169             /* If the source is our current peer then initiate a key setup,
2170                because our peer's forgotten the key */
2171             if (get_uint32(buf->start+4)==st->current.remote_session_id) {
2172                 bool_t initiated;
2173                 initiated = initiate_key_setup(st,"received a NAK",source);
2174                 if (!initiated) generate_send_prod(st,source);
2175             } else {
2176                 slog(st,LOG_SEC,"bad incoming NAK");
2177             }
2178             break;
2179         case LABEL_MSG0:
2180             process_msg0(st,buf,source);
2181             break;
2182         case LABEL_MSG1:
2183             /* Setup packet: should not have been explicitly addressed
2184                to us */
2185             slog(st,LOG_SEC,"incoming explicitly addressed msg1");
2186             break;
2187         case LABEL_MSG2:
2188             /* Setup packet: expected only in state SENTMSG1 */
2189             if (st->state!=SITE_SENTMSG1) {
2190                 if ((st->state==SITE_SENTMSG3 ||
2191                      st->state==SITE_SENTMSG5) &&
2192                     setup_late_msg_ok(st,buf,msgtype,source,&msg))
2193                     break;
2194                 slog(st,LOG_UNEXPECTED,"unexpected MSG2");
2195             } else if (process_msg2(st,buf,source,&msg)) {
2196                 transport_setup_msgok(st,source);
2197                 enter_new_state(st,SITE_SENTMSG3,&msg);
2198             } else {
2199                 slog(st,LOG_SEC,"invalid MSG2");
2200             }
2201             break;
2202         case CASES_MSG3_KNOWN:
2203             /* Setup packet: expected only in state SENTMSG2 */
2204             if (st->state!=SITE_SENTMSG2) {
2205                 if ((st->state==SITE_SENTMSG4) &&
2206                     setup_late_msg_ok(st,buf,msgtype,source,&msg))
2207                     break;
2208                 slog(st,LOG_UNEXPECTED,"unexpected MSG3");
2209             } else if (process_msg3(st,buf,source,msgtype,&msg)) {
2210                 transport_setup_msgok(st,source);
2211                 enter_new_state(st,SITE_SENTMSG4,&msg);
2212             } else {
2213                 slog(st,LOG_SEC,"invalid MSG3");
2214             }
2215             break;
2216         case LABEL_MSG4:
2217             /* Setup packet: expected only in state SENTMSG3 */
2218             if (st->state!=SITE_SENTMSG3) {
2219                 if ((st->state==SITE_SENTMSG5) &&
2220                     setup_late_msg_ok(st,buf,msgtype,source,&msg))
2221                     break;
2222                 slog(st,LOG_UNEXPECTED,"unexpected MSG4");
2223             } else if (process_msg4(st,buf,source,&msg)) {
2224                 transport_setup_msgok(st,source);
2225                 enter_new_state(st,SITE_SENTMSG5,&msg);
2226             } else {
2227                 slog(st,LOG_SEC,"invalid MSG4");
2228             }
2229             break;
2230         case LABEL_MSG5:
2231             /* Setup packet: expected only in state SENTMSG4 */
2232             /* (may turn up in state RUN if our return MSG6 was lost
2233                and the new key has already been activated. In that
2234                case we discard it. The peer will realise that we
2235                are using the new key when they see our data packets.
2236                Until then the peer's data packets to us get discarded. */
2237             if (st->state==SITE_SENTMSG4) {
2238                 if (process_msg5(st,buf,source,st->new_transform)) {
2239                     transport_setup_msgok(st,source);
2240                     enter_new_state(st,SITE_RUN,&msg);
2241                 } else {
2242                     slog(st,LOG_SEC,"invalid MSG5");
2243                 }
2244             } else if (st->state==SITE_RUN) {
2245                 if (process_msg5(st,buf,source,st->current.transform)) {
2246                     slog(st,LOG_DROP,"got MSG5, retransmitting MSG6");
2247                     transport_setup_msgok(st,source);
2248                     create_msg6(st,st->current.transform,
2249                                 st->current.remote_session_id);
2250                     transport_xmit(st,&st->peers,&st->buffer,True);
2251                     BUF_FREE(&st->buffer);
2252                 } else {
2253                     slog(st,LOG_SEC,"invalid MSG5 (in state RUN)");
2254                 }
2255             } else {
2256                 slog(st,LOG_UNEXPECTED,"unexpected MSG5");
2257             }
2258             break;
2259         case LABEL_MSG6:
2260             /* Setup packet: expected only in state SENTMSG5 */
2261             if (st->state!=SITE_SENTMSG5) {
2262                 slog(st,LOG_UNEXPECTED,"unexpected MSG6");
2263             } else if (process_msg6(st,buf,source)) {
2264                 BUF_FREE(&st->buffer); /* Free message 5 */
2265                 transport_setup_msgok(st,source);
2266                 activate_new_key(st);
2267             } else {
2268                 slog(st,LOG_SEC,"invalid MSG6");
2269             }
2270             break;
2271         default:
2272             slog(st,LOG_SEC,"received message of unknown type 0x%08x",
2273                  msgtype);
2274             break;
2275         }
2276         BUF_FREE(buf);
2277         return True;
2278     }
2279
2280     priomsg_update_fixed(whynot, comm_notify_whynot_general,
2281                          "not MSG1 or PROD; unknown dest index");
2282     return False;
2283 }
2284
2285 static void site_control(void *vst, bool_t run)
2286 {
2287     struct site *st=vst;
2288     if (run) enter_state_run(st);
2289     else enter_state_stop(st);
2290 }
2291
2292 static void site_phase_hook(void *sst, uint32_t newphase)
2293 {
2294     struct site *st=sst;
2295
2296     /* The program is shutting down; tell our peer */
2297     send_msg7(st,"shutting down");
2298 }
2299
2300 static void site_childpersist_clearkeys(void *sst, uint32_t newphase)
2301 {
2302     struct site *st=sst;
2303     dispose_transform(&st->current.transform);
2304     dispose_transform(&st->auxiliary_key.transform);
2305     dispose_transform(&st->new_transform);
2306     /* Not much point overwiting the signing key, since we loaded it
2307        from disk, and it is only valid prospectively if at all,
2308        anyway. */
2309     /* XXX it would be best to overwrite the DH state, because that
2310        _is_ relevant to forward secrecy.  However we have no
2311        convenient interface for doing that and in practice gmp has
2312        probably dribbled droppings all over the malloc arena.  A good
2313        way to fix this would be to have a privsep child for asymmetric
2314        crypto operations, but that's a task for another day. */
2315 }
2316
2317 static list_t *site_apply(closure_t *self, struct cloc loc, dict_t *context,
2318                           list_t *args)
2319 {
2320     static uint32_t index_sequence;
2321     struct site *st;
2322     item_t *item;
2323     dict_t *dict;
2324     int i;
2325
2326     NEW(st);
2327
2328     st->cl.description="site";
2329     st->cl.type=CL_SITE;
2330     st->cl.apply=NULL;
2331     st->cl.interface=&st->ops;
2332     st->ops.st=st;
2333     st->ops.control=site_control;
2334     st->ops.status=site_status;
2335     st->peerkeys_path=0;
2336     st->peerkeys_tmpl.buffer=0;
2337     st->peerkeys_current=st->peerkeys_kex=0;
2338
2339     /* First parameter must be a dict */
2340     item=list_elem(args,0);
2341     if (!item || item->type!=t_dict)
2342         cfgfatal(loc,"site","parameter must be a dictionary\n");
2343     
2344     dict=item->data.dict;
2345     st->log=find_cl_if(dict,"log",CL_LOG,True,"site",loc);
2346     st->log_events=string_list_to_word(dict_lookup(dict,"log-events"),
2347                                        log_event_table,"site");
2348
2349     st->localname=dict_read_string(dict, "local-name", True, "site", loc);
2350     st->remotename=dict_read_string(dict, "name", True, "site", loc);
2351
2352     st->tunname=safe_malloc(strlen(st->localname)+strlen(st->remotename)+5,
2353                             "site_apply");
2354     sprintf(st->tunname,"%s<->%s",st->localname,st->remotename);
2355
2356     /* Now slog is working */
2357
2358     st->keepalive=dict_read_bool(dict,"keepalive",False,"site",loc,False);
2359
2360     st->peer_mobile=dict_read_bool(dict,"mobile",False,"site",loc,False);
2361     st->local_mobile=
2362         dict_read_bool(dict,"local-mobile",False,"site",loc,False);
2363
2364     /* Sanity check (which also allows the 'sites' file to include
2365        site() closures for all sites including our own): refuse to
2366        talk to ourselves */
2367     if (strcmp(st->localname,st->remotename)==0) {
2368         Message(M_DEBUG,"site %s: local-name==name -> ignoring this site\n",
2369                 st->localname);
2370         if (st->peer_mobile != st->local_mobile)
2371             cfgfatal(loc,"site","site %s's peer-mobile=%d"
2372                     " but our local-mobile=%d\n",
2373                     st->localname, st->peer_mobile, st->local_mobile);
2374         free(st);
2375         return NULL;
2376     }
2377     if (st->peer_mobile && st->local_mobile) {
2378         Message(M_WARNING,"site %s: site is mobile but so are we"
2379                 " -> ignoring this site\n", st->remotename);
2380         free(st);
2381         return NULL;
2382     }
2383
2384     assert(index_sequence < 0xffffffffUL);
2385     st->index = ++index_sequence;
2386     st->local_capabilities = 0;
2387     st->early_capabilities = CAPAB_PRIORITY_MOBILE;
2388     st->netlink=find_cl_if(dict,"link",CL_NETLINK,True,"site",loc);
2389
2390 #define GET_CLOSURE_LIST(dictkey,things,nthings,CL_TYPE) do{            \
2391     list_t *things##_cfg=dict_lookup(dict,dictkey);                     \
2392     if (!things##_cfg)                                                  \
2393         cfgfatal(loc,"site","closure list \"%s\" not found\n",dictkey); \
2394     st->nthings=list_length(things##_cfg);                              \
2395     NEW_ARY(st->things,st->nthings);                                    \
2396     assert(st->nthings);                                                \
2397     for (i=0; i<st->nthings; i++) {                                     \
2398         item_t *item=list_elem(things##_cfg,i);                         \
2399         if (item->type!=t_closure)                                      \
2400             cfgfatal(loc,"site","%s is not a closure\n",dictkey);       \
2401         closure_t *cl=item->data.closure;                               \
2402         if (cl->type!=CL_TYPE)                                          \
2403             cfgfatal(loc,"site","%s closure wrong type\n",dictkey);     \
2404         st->things[i]=cl->interface;                                    \
2405     }                                                                   \
2406 }while(0)
2407
2408     GET_CLOSURE_LIST("comm",comms,ncomms,CL_COMM);
2409
2410     NEW_ARY(st->commclientinfos, st->ncomms);
2411     dict_t *comminfo = dict_read_dict(dict,"comm-info",False,"site",loc);
2412     for (i=0; i<st->ncomms; i++) {
2413         st->commclientinfos[i] =
2414             !comminfo ? 0 :
2415             st->comms[i]->clientinfo(st->comms[i],comminfo,loc);
2416     }
2417
2418     st->resolver=find_cl_if(dict,"resolver",CL_RESOLVER,True,"site",loc);
2419     st->random=find_cl_if(dict,"random",CL_RANDOMSRC,True,"site",loc);
2420
2421     st->privkeys=find_cl_if(dict,"key-cache",CL_PRIVCACHE,False,"site",loc);
2422     if (!st->privkeys) {
2423         st->privkey_fixed=
2424             find_cl_if(dict,"local-key",CL_SIGPRIVKEY,True,"site",loc);
2425     }
2426
2427     struct sigpubkey_if *fixed_pubkey
2428         =find_cl_if(dict,"key",CL_SIGPUBKEY,False,"site",loc);
2429     st->peerkeys_path=dict_read_string(dict,"peer-keys",fixed_pubkey==0,
2430                                        "site",loc);
2431     if (st->peerkeys_path) {
2432         pathprefix_template_init(&st->peerkeys_tmpl,st->peerkeys_path,
2433                                  PEERKEYS_SUFFIX_MAXLEN + 1 /* nul */);
2434         st->peerkeys_current=keyset_load(st->peerkeys_path,
2435                                          &st->scratch,st->log,M_ERR);
2436         if (fixed_pubkey) {
2437             fixed_pubkey->dispose(fixed_pubkey->st);
2438         }
2439     } else {
2440         assert(fixed_pubkey);
2441         NEW(st->peerkeys_current);
2442         st->peerkeys_current->refcount=1;
2443         st->peerkeys_current->nkeys=1;
2444         st->peerkeys_current->keys[0].id=keyid_zero;
2445         st->peerkeys_current->keys[0].pubkey=fixed_pubkey;
2446         slog(st,LOG_SIGKEYS,
2447              "using old-style fixed peer public key (no `peer-keys')");
2448     }
2449
2450     st->addresses=dict_read_string_array(dict,"address",False,"site",loc,0);
2451     if (st->addresses)
2452         st->remoteport=dict_read_number(dict,"port",True,"site",loc,0);
2453     else st->remoteport=0;
2454
2455     GET_CLOSURE_LIST("transform",transforms,ntransforms,CL_TRANSFORM);
2456
2457     st->dh=find_cl_if(dict,"dh",CL_DH,True,"site",loc);
2458
2459 #define DEFAULT(D) (st->peer_mobile || st->local_mobile \
2460                     ? DEFAULT_MOBILE_##D : DEFAULT_##D)
2461 #define CFG_NUMBER(k,D) dict_read_number(dict,(k),False,"site",loc,DEFAULT(D));
2462
2463     st->key_lifetime=         CFG_NUMBER("key-lifetime",  KEY_LIFETIME);
2464     st->setup_retries=        CFG_NUMBER("setup-retries", SETUP_RETRIES);
2465     st->setup_retry_interval= CFG_NUMBER("setup-timeout", SETUP_RETRY_INTERVAL);
2466     st->wait_timeout_mean=    CFG_NUMBER("wait-time",     WAIT_TIME);
2467     st->mtu_target= dict_read_number(dict,"mtu-target",False,"site",loc,0);
2468
2469     st->mobile_peer_expiry= dict_read_number(
2470        dict,"mobile-peer-expiry",False,"site",loc,DEFAULT_MOBILE_PEER_EXPIRY);
2471
2472     const char *peerskey= st->peer_mobile
2473         ? "mobile-peers-max" : "static-peers-max";
2474     st->transport_peers_max= dict_read_number(
2475         dict,peerskey,False,"site",loc, st->addresses ? 4 : 3);
2476     if (st->transport_peers_max<1 ||
2477         st->transport_peers_max>MAX_PEER_ADDRS) {
2478         cfgfatal(loc,"site", "%s must be in range 1.."
2479                  STRING(MAX_PEER_ADDRS) "\n", peerskey);
2480     }
2481
2482     if (st->key_lifetime < DEFAULT(KEY_RENEGOTIATE_GAP)*2)
2483         st->key_renegotiate_time=st->key_lifetime/2;
2484     else
2485         st->key_renegotiate_time=st->key_lifetime-DEFAULT(KEY_RENEGOTIATE_GAP);
2486     st->key_renegotiate_time=dict_read_number(
2487         dict,"renegotiate-time",False,"site",loc,st->key_renegotiate_time);
2488     if (st->key_renegotiate_time > st->key_lifetime) {
2489         cfgfatal(loc,"site",
2490                  "renegotiate-time must be less than key-lifetime\n");
2491     }
2492
2493     st->resolving_count=0;
2494     st->allow_send_prod=0;
2495
2496     /* The information we expect to see in incoming messages of type 1 */
2497     /* fixme: lots of unchecked overflows here, but the results are only
2498        corrupted packets rather than undefined behaviour */
2499     st->our_name_later=(strcmp(st->localname,st->remotename)>0);
2500
2501     buffer_new(&st->buffer,SETUP_BUFFER_LEN);
2502
2503     buffer_new(&st->scratch,SETUP_BUFFER_LEN);
2504     BUF_ALLOC(&st->scratch,"site:scratch");
2505
2506     /* We are interested in poll(), but only for timeouts. We don't have
2507        any fds of our own. */
2508     register_for_poll(st, site_beforepoll, site_afterpoll, "site");
2509     st->timeout=0;
2510
2511     st->remote_capabilities=0;
2512     st->chosen_transform=0;
2513     st->current.key_timeout=0;
2514     st->auxiliary_key.key_timeout=0;
2515     transport_peers_clear(st,&st->peers);
2516     transport_peers_clear(st,&st->setup_peers);
2517     /* XXX mlock these */
2518     st->dhsecret=safe_malloc(st->dh->len,"site:dhsecret");
2519     st->sharedsecretlen=st->sharedsecretallocd=0;
2520     st->sharedsecret=0;
2521
2522 #define SET_CAPBIT(bit) do {                                            \
2523     uint32_t capflag = 1UL << (bit);                                    \
2524     if (st->local_capabilities & capflag)                               \
2525         slog(st,LOG_ERROR,"capability bit"                              \
2526              " %d (%#"PRIx32") reused", (bit), capflag);                \
2527     st->local_capabilities |= capflag;                                  \
2528 } while (0)
2529
2530     for (i=0; i<st->ntransforms; i++)
2531         SET_CAPBIT(st->transforms[i]->capab_bit);
2532
2533 #undef SET_CAPBIT
2534
2535     if (st->local_mobile || st->peer_mobile)
2536         st->local_capabilities |= CAPAB_PRIORITY_MOBILE;
2537
2538     /* We need to register the remote networks with the netlink device */
2539     uint32_t netlink_mtu; /* local virtual interface mtu */
2540     st->netlink->reg(st->netlink->st, site_outgoing, st, &netlink_mtu);
2541     if (!st->mtu_target)
2542         st->mtu_target=netlink_mtu;
2543     
2544     for (i=0; i<st->ncomms; i++)
2545         st->comms[i]->request_notify(st->comms[i]->st, st, site_incoming);
2546
2547     st->current.transform=0;
2548     st->auxiliary_key.transform=0;
2549     st->new_transform=0;
2550     st->auxiliary_is_new=0;
2551
2552     enter_state_stop(st);
2553
2554     add_hook(PHASE_SHUTDOWN,site_phase_hook,st);
2555     add_hook(PHASE_CHILDPERSIST,site_childpersist_clearkeys,st);
2556
2557     return new_closure(&st->cl);
2558 }
2559
2560 void site_module(dict_t *dict)
2561 {
2562     add_closure(dict,"site",site_apply);
2563 }
2564
2565
2566 /***** TRANSPORT PEERS definitions *****/
2567
2568 static void transport_peers_debug(struct site *st, transport_peers *dst,
2569                                   const char *didwhat,
2570                                   int nargs, const struct comm_addr *args,
2571                                   size_t stride) {
2572     int i;
2573     char *argp;
2574
2575     if (!(st->log_events & LOG_PEER_ADDRS))
2576         return; /* an optimisation */
2577
2578     slog(st, LOG_PEER_ADDRS, "peers (%s) %s nargs=%d => npeers=%d",
2579          (dst==&st->peers ? "data" :
2580           dst==&st->setup_peers ? "setup" : "UNKNOWN"),
2581          didwhat, nargs, dst->npeers);
2582
2583     for (i=0, argp=(void*)args;
2584          i<nargs;
2585          i++, (argp+=stride?stride:sizeof(*args))) {
2586         const struct comm_addr *ca=(void*)argp;
2587         slog(st, LOG_PEER_ADDRS, " args: addrs[%d]=%s",
2588              i, comm_addr_to_string(ca));
2589     }
2590     for (i=0; i<dst->npeers; i++) {
2591         struct timeval diff;
2592         timersub(tv_now,&dst->peers[i].last,&diff);
2593         const struct comm_addr *ca=&dst->peers[i].addr;
2594         slog(st, LOG_PEER_ADDRS, " peers: addrs[%d]=%s T-%ld.%06ld",
2595              i, comm_addr_to_string(ca),
2596              (unsigned long)diff.tv_sec, (unsigned long)diff.tv_usec);
2597     }
2598 }
2599
2600 static void transport_peers_expire(struct site *st, transport_peers *peers) {
2601     /* peers must be sorted first */
2602     if (st->local_mobile) return;
2603
2604     int previous_peers=peers->npeers;
2605     struct timeval oldest;
2606     oldest.tv_sec  = tv_now->tv_sec - st->mobile_peer_expiry;
2607     oldest.tv_usec = tv_now->tv_usec;
2608     while (peers->npeers>1 &&
2609            timercmp(&peers->peers[peers->npeers-1].last, &oldest, <))
2610         peers->npeers--;
2611     if (peers->npeers != previous_peers)
2612         transport_peers_debug(st,peers,"expire", 0,0,0);
2613 }
2614
2615 static bool_t transport_peer_record_one(struct site *st, transport_peers *peers,
2616                                         const struct comm_addr *ca,
2617                                         const struct timeval *tv) {
2618     /* returns false if output is full */
2619     int search;
2620
2621     if (peers->npeers >= st->transport_peers_max)
2622         return 0;
2623
2624     for (search=0; search<peers->npeers; search++)
2625         if (comm_addr_equal(&peers->peers[search].addr, ca))
2626             return 1;
2627
2628     peers->peers[peers->npeers].addr = *ca;
2629     peers->peers[peers->npeers].last = *tv;
2630     peers->npeers++;
2631     return 1;
2632 }
2633
2634 static void transport_record_peers(struct site *st, transport_peers *peers,
2635                                    const struct comm_addr *addrs, int naddrs,
2636                                    const char *m) {
2637     /* We add addrs into peers.  The new entries end up at the front
2638      * and displace entries towards the end (perhaps even off the
2639      * end).  Any existing matching entries are moved up to the front.
2640      *
2641      * Caller must first call transport_peers_expire. */
2642
2643     if (naddrs==1) {
2644         /* avoids debug for uninteresting updates */
2645         int i;
2646         for (i=0; i<peers->npeers; i++) {
2647             if (comm_addr_equal(&addrs[0], &peers->peers[i].addr)) {
2648                 memmove(peers->peers+1, peers->peers,
2649                         sizeof(peers->peers[0]) * i);
2650                 peers->peers[0].addr = addrs[0];
2651                 peers->peers[0].last = *tv_now;
2652                 return;
2653             }
2654         }
2655     }
2656
2657     int old_npeers=peers->npeers;
2658     transport_peer old_peers[old_npeers];
2659     COPY_ARRAY(old_peers,peers->peers,old_npeers);
2660
2661     peers->npeers=0;
2662     int i;
2663     for (i=0; i<naddrs; i++) {
2664         if (!transport_peer_record_one(st,peers, &addrs[i], tv_now))
2665             break;
2666     }
2667     for (i=0; i<old_npeers; i++) {
2668         const transport_peer *old=&old_peers[i];
2669         if (!transport_peer_record_one(st,peers, &old->addr, &old->last))
2670             break;
2671     }
2672
2673     transport_peers_debug(st,peers,m, naddrs,addrs,0);
2674 }
2675
2676 static void transport_expire_record_peers(struct site *st,
2677                                           transport_peers *peers,
2678                                           const struct comm_addr *addrs,
2679                                           int naddrs, const char *m) {
2680     /* Convenience function */
2681     transport_peers_expire(st,peers);
2682     transport_record_peers(st,peers,addrs,naddrs,m);
2683 }
2684
2685 static bool_t transport_compute_setupinit_peers(struct site *st,
2686         const struct comm_addr *configured_addrs /* 0 if none or not found */,
2687         int n_configured_addrs /* 0 if none or not found */,
2688         const struct comm_addr *incoming_packet_addr /* 0 if none */) {
2689     if (!n_configured_addrs && !incoming_packet_addr &&
2690         !transport_peers_valid(&st->peers))
2691         return False;
2692
2693     slog(st,LOG_SETUP_INIT,
2694          "using: %d configured addr(s);%s %d old peer addrs(es)",
2695          n_configured_addrs,
2696          incoming_packet_addr ? " incoming packet address;" : "",
2697          st->peers.npeers);
2698
2699     /* Non-mobile peers try addresses until one is plausible.  The
2700      * effect is that this code always tries first the configured
2701      * address if supplied, or otherwise the address of the incoming
2702      * PROD, or finally the existing data peer if one exists; this is
2703      * as desired. */
2704
2705     transport_peers_copy(st,&st->setup_peers,&st->peers);
2706     transport_peers_expire(st,&st->setup_peers);
2707
2708     if (incoming_packet_addr)
2709         transport_record_peers(st,&st->setup_peers,
2710                                incoming_packet_addr,1, "incoming");
2711
2712     if (n_configured_addrs)
2713         transport_record_peers(st,&st->setup_peers,
2714                               configured_addrs,n_configured_addrs, "setupinit");
2715
2716     assert(transport_peers_valid(&st->setup_peers));
2717     return True;
2718 }
2719
2720 static void transport_setup_msgok(struct site *st, const struct comm_addr *a) {
2721     if (st->peer_mobile)
2722         transport_expire_record_peers(st,&st->setup_peers,a,1,"setupmsg");
2723 }
2724 static void transport_data_msgok(struct site *st, const struct comm_addr *a) {
2725     if (st->peer_mobile)
2726         transport_expire_record_peers(st,&st->peers,a,1,"datamsg");
2727 }
2728
2729 static int transport_peers_valid(transport_peers *peers) {
2730     return peers->npeers;
2731 }
2732 static void transport_peers_clear(struct site *st, transport_peers *peers) {
2733     peers->npeers= 0;
2734     transport_peers_debug(st,peers,"clear",0,0,0);
2735 }
2736 static void transport_peers_copy(struct site *st, transport_peers *dst,
2737                                  const transport_peers *src) {
2738     dst->npeers=src->npeers;
2739     COPY_ARRAY(dst->peers, src->peers, dst->npeers);
2740     transport_peers_debug(st,dst,"copy",
2741                           src->npeers, &src->peers->addr, sizeof(*src->peers));
2742 }
2743
2744 static void transport_resolve_complete(struct site *st,
2745                                        const struct comm_addr *addrs,
2746                                        int naddrs) {
2747     transport_expire_record_peers(st,&st->peers,addrs,naddrs,
2748                                   "resolved data");
2749     transport_expire_record_peers(st,&st->setup_peers,addrs,naddrs,
2750                                   "resolved setup");
2751 }
2752
2753 static void transport_resolve_complete_tardy(struct site *st,
2754                                              const struct comm_addr *addrs,
2755                                              int naddrs) {
2756     transport_expire_record_peers(st,&st->peers,addrs,naddrs,
2757                                   "resolved tardily");
2758 }
2759
2760 static void transport_peers__copy_by_mask(transport_peer *out, int *nout_io,
2761                                           unsigned mask,
2762                                           const transport_peers *inp) {
2763     /* out and in->peers may be the same region, or nonoverlapping */
2764     const transport_peer *in=inp->peers;
2765     int slot;
2766     for (slot=0; slot<inp->npeers; slot++) {
2767         if (!(mask & (1U << slot)))
2768             continue;
2769         if (!(out==in && slot==*nout_io))
2770             COPY_OBJ(out[*nout_io], in[slot]);
2771         (*nout_io)++;
2772     }
2773 }
2774
2775 void transport_xmit(struct site *st, transport_peers *peers,
2776                     struct buffer_if *buf, bool_t candebug) {
2777     int slot;
2778     transport_peers_expire(st, peers);
2779     unsigned failed=0; /* bitmask */
2780     assert(MAX_PEER_ADDRS < sizeof(unsigned)*CHAR_BIT);
2781
2782     int nfailed=0;
2783     for (slot=0; slot<peers->npeers; slot++) {
2784         transport_peer *peer=&peers->peers[slot];
2785         bool_t ok = comm_addr_sendmsg(st, &peer->addr, buf);
2786         if (candebug)
2787             dump_packet(st, buf, &peer->addr, False, ok);
2788         if (!ok) {
2789             failed |= 1U << slot;
2790             nfailed++;
2791         }
2792         if (ok && !st->peer_mobile)
2793             break;
2794     }
2795     /* Now we need to demote/delete failing addrs: if we are mobile we
2796      * merely demote them; otherwise we delete them. */
2797     if (st->local_mobile) {
2798         unsigned expected = ((1U << nfailed)-1) << (peers->npeers-nfailed);
2799         /* `expected' has all the failures at the end already */
2800         if (failed != expected) {
2801             int fslot=0;
2802             transport_peer failedpeers[nfailed];
2803             transport_peers__copy_by_mask(failedpeers, &fslot, failed,peers);
2804             assert(fslot == nfailed);
2805             int wslot=0;
2806             transport_peers__copy_by_mask(peers->peers,&wslot,~failed,peers);
2807             assert(wslot+nfailed == peers->npeers);
2808             COPY_ARRAY(peers->peers+wslot, failedpeers, nfailed);
2809             transport_peers_debug(st,peers,"mobile failure reorder",0,0,0);
2810         }
2811     } else {
2812         if (failed && peers->npeers > 1) {
2813             int wslot=0;
2814             transport_peers__copy_by_mask(peers->peers,&wslot,~failed,peers);
2815             peers->npeers=wslot;
2816             transport_peers_debug(st,peers,"non-mobile failure cleanup",0,0,0);
2817         }
2818     }
2819 }
2820
2821 /***** END of transport peers declarations *****/