chiark / gitweb /
sig: Move hashing into algorithm
[secnet.git] / site.c
1 /* site.c - manage communication with a remote network site */
2
3 /*
4  * This file is part of secnet.
5  * See README for full list of copyright holders.
6  *
7  * secnet is free software; you can redistribute it and/or modify it
8  * under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3 of the License, or
10  * (at your option) any later version.
11  * 
12  * secnet is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License for more details.
16  * 
17  * You should have received a copy of the GNU General Public License
18  * version 3 along with secnet; if not, see
19  * https://www.gnu.org/licenses/gpl.html.
20  */
21
22 /* The 'site' code doesn't know anything about the structure of the
23    packets it's transmitting.  In fact, under the new netlink
24    configuration scheme it doesn't need to know anything at all about
25    IP addresses, except how to contact its peer.  This means it could
26    potentially be used to tunnel other protocols too (IPv6, IPX, plain
27    old Ethernet frames) if appropriate netlink code can be written
28    (and that ought not to be too hard, eg. using the TUN/TAP device to
29    pretend to be an Ethernet interface).  */
30
31 /* At some point in the future the netlink code will be asked for
32    configuration information to go in the PING/PONG packets at the end
33    of the key exchange. */
34
35 #include "secnet.h"
36 #include <stdio.h>
37 #include <string.h>
38 #include <limits.h>
39 #include <assert.h>
40 #include <sys/socket.h>
41
42 #include <sys/mman.h>
43 #include "util.h"
44 #include "unaligned.h"
45 #include "magic.h"
46
47 #define SETUP_BUFFER_LEN 2048
48
49 #define DEFAULT_KEY_LIFETIME                  (3600*1000) /* [ms] */
50 #define DEFAULT_KEY_RENEGOTIATE_GAP           (5*60*1000) /* [ms] */
51 #define DEFAULT_SETUP_RETRIES 5
52 #define DEFAULT_SETUP_RETRY_INTERVAL             (2*1000) /* [ms] */
53 #define DEFAULT_WAIT_TIME                       (20*1000) /* [ms] */
54
55 #define DEFAULT_MOBILE_KEY_LIFETIME      (2*24*3600*1000) /* [ms] */
56 #define DEFAULT_MOBILE_KEY_RENEGOTIATE_GAP (12*3600*1000) /* [ms] */
57 #define DEFAULT_MOBILE_SETUP_RETRIES 30
58 #define DEFAULT_MOBILE_SETUP_RETRY_INTERVAL      (1*1000) /* [ms] */
59 #define DEFAULT_MOBILE_WAIT_TIME                (10*1000) /* [ms] */
60
61 #define DEFAULT_MOBILE_PEER_EXPIRY            (2*60)      /* [s] */
62
63 /* Each site can be in one of several possible states. */
64
65 /* States:
66    SITE_STOP         - nothing is allowed to happen; tunnel is down;
67                        all session keys have been erased
68      -> SITE_RUN upon external instruction
69    SITE_RUN          - site up, maybe with valid key
70      -> SITE_RESOLVE upon outgoing packet and no valid key
71          we start name resolution for the other end of the tunnel
72      -> SITE_SENTMSG2 upon valid incoming message 1 and suitable time
73          we send an appropriate message 2
74    SITE_RESOLVE      - waiting for name resolution
75      -> SITE_SENTMSG1 upon successful resolution
76          we send an appropriate message 1
77      -> SITE_SENTMSG2 upon valid incoming message 1 (then abort resolution)
78          we abort resolution and 
79      -> SITE_WAIT on timeout or resolution failure
80    SITE_SENTMSG1
81      -> SITE_SENTMSG2 upon valid incoming message 1 from higher priority end
82      -> SITE_SENTMSG3 upon valid incoming message 2
83      -> SITE_WAIT on timeout
84    SITE_SENTMSG2
85      -> SITE_SENTMSG4 upon valid incoming message 3
86      -> SITE_WAIT on timeout
87    SITE_SENTMSG3
88      -> SITE_SENTMSG5 upon valid incoming message 4
89      -> SITE_WAIT on timeout
90    SITE_SENTMSG4
91      -> SITE_RUN upon valid incoming message 5
92      -> SITE_WAIT on timeout
93    SITE_SENTMSG5
94      -> SITE_RUN upon valid incoming message 6
95      -> SITE_WAIT on timeout
96    SITE_WAIT         - failed to establish key; do nothing for a while
97      -> SITE_RUN on timeout
98    */
99
100 #define SITE_STOP     0
101 #define SITE_RUN      1
102 #define SITE_RESOLVE  2
103 #define SITE_SENTMSG1 3
104 #define SITE_SENTMSG2 4
105 #define SITE_SENTMSG3 5
106 #define SITE_SENTMSG4 6
107 #define SITE_SENTMSG5 7
108 #define SITE_WAIT     8
109
110 #define CASES_MSG3_KNOWN LABEL_MSG3: case LABEL_MSG3BIS
111
112 int32_t site_max_start_pad = 4*4;
113
114 static cstring_t state_name(uint32_t state)
115 {
116     switch (state) {
117     case 0: return "STOP";
118     case 1: return "RUN";
119     case 2: return "RESOLVE";
120     case 3: return "SENTMSG1";
121     case 4: return "SENTMSG2";
122     case 5: return "SENTMSG3";
123     case 6: return "SENTMSG4";
124     case 7: return "SENTMSG5";
125     case 8: return "WAIT";
126     default: return "*bad state*";
127     }
128 }
129
130 #define NONCELEN 8
131
132 #define LOG_UNEXPECTED    0x00000001
133 #define LOG_SETUP_INIT    0x00000002
134 #define LOG_SETUP_TIMEOUT 0x00000004
135 #define LOG_ACTIVATE_KEY  0x00000008
136 #define LOG_TIMEOUT_KEY   0x00000010
137 #define LOG_SEC           0x00000020
138 #define LOG_STATE         0x00000040
139 #define LOG_DROP          0x00000080
140 #define LOG_DUMP          0x00000100
141 #define LOG_ERROR         0x00000400
142 #define LOG_PEER_ADDRS    0x00000800
143
144 static struct flagstr log_event_table[]={
145     { "unexpected", LOG_UNEXPECTED },
146     { "setup-init", LOG_SETUP_INIT },
147     { "setup-timeout", LOG_SETUP_TIMEOUT },
148     { "activate-key", LOG_ACTIVATE_KEY },
149     { "timeout-key", LOG_TIMEOUT_KEY },
150     { "security", LOG_SEC },
151     { "state-change", LOG_STATE },
152     { "packet-drop", LOG_DROP },
153     { "dump-packets", LOG_DUMP },
154     { "errors", LOG_ERROR },
155     { "peer-addrs", LOG_PEER_ADDRS },
156     { "default", LOG_SETUP_INIT|LOG_SETUP_TIMEOUT|
157       LOG_ACTIVATE_KEY|LOG_TIMEOUT_KEY|LOG_SEC|LOG_ERROR },
158     { "all", 0xffffffff },
159     { NULL, 0 }
160 };
161
162
163 /***** TRANSPORT PEERS declarations *****/
164
165 /* Details of "mobile peer" semantics:
166
167    - We use the same data structure for the different configurations,
168      but manage it with different algorithms.
169    
170    - We record up to mobile_peers_max peer address/port numbers
171      ("peers") for key setup, and separately up to mobile_peers_max
172      for data transfer.
173
174    - In general, we make a new set of addrs (see below) when we start
175      a new key exchange; the key setup addrs become the data transport
176      addrs when key setup complets.
177
178    If our peer is mobile:
179
180    - We send to all recent addresses of incoming packets, plus
181      initially all configured addresses (which we also expire).
182
183    - So, we record addrs of good incoming packets, as follows:
184       1. expire any peers last seen >120s ("mobile-peer-expiry") ago
185       2. add the peer of the just received packet to the applicable list
186          (possibly evicting the oldest entries to make room)
187      NB that we do not expire peers until an incoming packet arrives.
188
189    - If the peer has a configured address or name, we record them the
190      same way, but only as a result of our own initiation of key
191      setup.  (We might evict some incoming packet addrs to make room.)
192
193    - The default number of addrs to keep is 3, or 4 if we have a
194      configured name or address.  That's space for two configured
195      addresses (one IPv6 and one IPv4), plus two received addresses.
196
197    - Outgoing packets are sent to every recorded address in the
198      applicable list.  Any unsupported[1] addresses are deleted from
199      the list right away.  (This should only happen to configured
200      addresses, of course, but there is no need to check that.)
201
202    - When we successfully complete a key setup, we merge the key setup
203      peers into the data transfer peers.
204
205    [1] An unsupported address is one for whose AF we don't have a
206      socket (perhaps because we got EAFNOSUPPORT or some such) or for
207      which sendto gives ENETUNREACH.
208
209    If neither end is mobile:
210
211    - When peer initiated the key exchange, we use the incoming packet
212      address.
213
214    - When we initiate the key exchange, we try configured addresses
215      until we get one which isn't unsupported then fixate on that.
216
217    - When we complete a key setup, we replace the data transport peers
218      with those from the key setup.
219
220    If we are mobile:
221
222    - We can't tell when local network setup changes so we can't cache
223      the unsupported addrs and completely remove the spurious calls to
224      sendto, but we can optimise things a bit by deprioritising addrs
225      which seem to be unsupported.
226
227    - Use only configured addresses.  (Except, that if our peer
228      initiated a key exchange we use the incoming packet address until
229      our name resolution completes.)
230
231    - When we send a packet, try each address in turn; if addr
232      supported, put that address to the end of the list for future
233      packets, and go onto the next address.
234
235    - When we complete a key setup, we replace the data transport peers
236      with those from the key setup.
237
238    */
239
240 typedef struct {
241     struct timeval last;
242     struct comm_addr addr;
243 } transport_peer;
244
245 typedef struct {
246 /* configuration information */
247 /* runtime information */
248     int npeers;
249     transport_peer peers[MAX_PEER_ADDRS];
250 } transport_peers;
251
252 /* Basic operations on transport peer address sets */
253 static void transport_peers_clear(struct site *st, transport_peers *peers);
254 static int transport_peers_valid(transport_peers *peers);
255 static void transport_peers_copy(struct site *st, transport_peers *dst,
256                                  const transport_peers *src);
257
258 /* Record address of incoming setup packet; resp. data packet. */
259 static void transport_setup_msgok(struct site *st, const struct comm_addr *a);
260 static void transport_data_msgok(struct site *st, const struct comm_addr *a);
261
262 /* Initialise the setup addresses.  Called before we send the first
263  * packet in a key exchange.  If we are the initiator, as a result of
264  * resolve completing (or being determined not to be relevant) or an
265  * incoming PROD; if we are the responder, as a result of the MSG1. */
266 static bool_t transport_compute_setupinit_peers(struct site *st,
267         const struct comm_addr *configured_addrs /* 0 if none or not found */,
268         int n_configured_addrs /* 0 if none or not found */,
269         const struct comm_addr *incoming_packet_addr /* 0 if none */);
270
271 /* Called if we are the responder in a key setup, when the resolve
272  * completes.  transport_compute_setupinit_peers will hvae been called
273  * earlier.  If _complete is called, we are still doing the key setup
274  * (and we should use the new values for both the rest of the key
275  * setup and the ongoing data exchange); if _tardy is called, the key
276  * setup is done (either completed or not) and only the data peers are
277  * relevant */
278 static void transport_resolve_complete(struct site *st,
279         const struct comm_addr *addrs, int naddrs);
280 static void transport_resolve_complete_tardy(struct site *st,
281         const struct comm_addr *addrs, int naddrs);
282
283 static void transport_xmit(struct site *st, transport_peers *peers,
284                            struct buffer_if *buf, bool_t candebug);
285
286  /***** END of transport peers declarations *****/
287
288
289 struct data_key {
290     struct transform_inst_if *transform;
291     uint64_t key_timeout; /* End of life of current key */
292     uint32_t remote_session_id;
293 };
294
295 struct site {
296     closure_t cl;
297     struct site_if ops;
298 /* configuration information */
299     string_t localname;
300     string_t remotename;
301     bool_t keepalive;
302     bool_t local_mobile, peer_mobile; /* Mobile client support */
303     int32_t transport_peers_max;
304     string_t tunname; /* localname<->remotename by default, used in logs */
305     cstring_t *addresses; /* DNS name or address(es) for bootstrapping, optional */
306     int remoteport; /* Port for bootstrapping, optional */
307     uint32_t mtu_target;
308     struct netlink_if *netlink;
309     struct comm_if **comms;
310     struct comm_clientinfo **commclientinfos;
311     int ncomms;
312     struct resolver_if *resolver;
313     struct log_if *log;
314     struct random_if *random;
315     struct sigprivkey_if *privkey;
316     struct sigpubkey_if *pubkey;
317     struct transform_if **transforms;
318     int ntransforms;
319     struct dh_if *dh;
320
321     uint32_t index; /* Index of this site */
322     uint32_t early_capabilities;
323     uint32_t local_capabilities;
324     int32_t setup_retries; /* How many times to send setup packets */
325     int32_t setup_retry_interval; /* Initial timeout for setup packets */
326     int32_t wait_timeout_mean; /* How long to wait if setup unsuccessful */
327     int32_t mobile_peer_expiry; /* How long to remember 2ary addresses */
328     int32_t key_lifetime; /* How long a key lasts once set up */
329     int32_t key_renegotiate_time; /* If we see traffic (or a keepalive)
330                                       after this time, initiate a new
331                                       key exchange */
332
333     bool_t our_name_later; /* our name > peer name */
334     uint32_t log_events;
335
336 /* runtime information */
337     uint32_t state;
338     uint64_t now; /* Most recently seen time */
339     bool_t allow_send_prod;
340     bool_t msg1_crossed_logged;
341     int resolving_count;
342     int resolving_n_results_all;
343     int resolving_n_results_stored;
344     struct comm_addr resolving_results[MAX_PEER_ADDRS];
345
346     /* The currently established session */
347     struct data_key current;
348     struct data_key auxiliary_key;
349     bool_t auxiliary_is_new;
350     uint64_t renegotiate_key_time; /* When we can negotiate a new key */
351     uint64_t auxiliary_renegotiate_key_time;
352     transport_peers peers; /* Current address(es) of peer for data traffic */
353
354     /* The current key setup protocol exchange.  We can only be
355        involved in one of these at a time.  There's a potential for
356        denial of service here (the attacker keeps sending a setup
357        packet; we keep trying to continue the exchange, and have to
358        timeout before we can listen for another setup packet); perhaps
359        we should keep a list of 'bad' sources for setup packets. */
360     uint32_t remote_capabilities;
361     uint16_t remote_adv_mtu;
362     struct transform_if *chosen_transform;
363     uint32_t setup_session_id;
364     transport_peers setup_peers;
365     uint8_t localN[NONCELEN]; /* Nonces for key exchange */
366     uint8_t remoteN[NONCELEN];
367     struct buffer_if buffer; /* Current outgoing key exchange packet */
368     struct buffer_if scratch;
369     int32_t retries; /* Number of retries remaining */
370     uint64_t timeout; /* Timeout for current state */
371     uint8_t *dhsecret;
372     uint8_t *sharedsecret;
373     uint32_t sharedsecretlen, sharedsecretallocd;
374     struct transform_inst_if *new_transform; /* For key setup/verify */
375 };
376
377 static uint32_t event_log_priority(struct site *st, uint32_t event)
378 {
379     if (!(event&st->log_events))
380         return 0;
381     switch(event) {
382     case LOG_UNEXPECTED:    return M_INFO;
383     case LOG_SETUP_INIT:    return M_INFO;
384     case LOG_SETUP_TIMEOUT: return M_NOTICE;
385     case LOG_ACTIVATE_KEY:  return M_INFO;
386     case LOG_TIMEOUT_KEY:   return M_INFO;
387     case LOG_SEC:           return M_SECURITY;
388     case LOG_STATE:         return M_DEBUG;
389     case LOG_DROP:          return M_DEBUG;
390     case LOG_DUMP:          return M_DEBUG;
391     case LOG_ERROR:         return M_ERR;
392     case LOG_PEER_ADDRS:    return M_DEBUG;
393     default:                return M_ERR;
394     }
395 }
396
397 static void vslog(struct site *st, uint32_t event, cstring_t msg, va_list ap)
398 FORMAT(printf,3,0);
399 static void vslog(struct site *st, uint32_t event, cstring_t msg, va_list ap)
400 {
401     uint32_t class;
402
403     class=event_log_priority(st, event);
404     if (class) {
405         slilog_part(st->log,class,"%s: ",st->tunname);
406         vslilog_part(st->log,class,msg,ap);
407         slilog_part(st->log,class,"\n");
408     }
409 }
410
411 static void slog(struct site *st, uint32_t event, cstring_t msg, ...)
412 FORMAT(printf,3,4);
413 static void slog(struct site *st, uint32_t event, cstring_t msg, ...)
414 {
415     va_list ap;
416     va_start(ap,msg);
417     vslog(st,event,msg,ap);
418     va_end(ap);
419 }
420
421 static void logtimeout(struct site *st, const char *fmt, ...)
422 FORMAT(printf,2,3);
423 static void logtimeout(struct site *st, const char *fmt, ...)
424 {
425     uint32_t class=event_log_priority(st,LOG_SETUP_TIMEOUT);
426     if (!class)
427         return;
428
429     va_list ap;
430     va_start(ap,fmt);
431
432     slilog_part(st->log,class,"%s: ",st->tunname);
433     vslilog_part(st->log,class,fmt,ap);
434
435     const char *delim;
436     int i;
437     for (i=0, delim=" (tried ";
438          i<st->setup_peers.npeers;
439          i++, delim=", ") {
440         transport_peer *peer=&st->setup_peers.peers[i];
441         const char *s=comm_addr_to_string(&peer->addr);
442         slilog_part(st->log,class,"%s%s",delim,s);
443     }
444
445     slilog_part(st->log,class,")\n");
446     va_end(ap);
447 }
448
449 static void set_link_quality(struct site *st);
450 static void delete_keys(struct site *st, cstring_t reason, uint32_t loglevel);
451 static void delete_one_key(struct site *st, struct data_key *key,
452                            const char *reason /* may be 0 meaning don't log*/,
453                            const char *which /* ignored if !reasonn */,
454                            uint32_t loglevel /* ignored if !reasonn */);
455 static bool_t initiate_key_setup(struct site *st, cstring_t reason,
456                                  const struct comm_addr *prod_hint);
457 static void enter_state_run(struct site *st);
458 static bool_t enter_state_resolve(struct site *st);
459 static void decrement_resolving_count(struct site *st, int by);
460 static bool_t enter_new_state(struct site *st,uint32_t next);
461 static void enter_state_wait(struct site *st);
462 static void activate_new_key(struct site *st);
463
464 static bool_t is_transform_valid(struct transform_inst_if *transform)
465 {
466     return transform && transform->valid(transform->st);
467 }
468
469 static bool_t current_valid(struct site *st)
470 {
471     return is_transform_valid(st->current.transform);
472 }
473
474 #define DEFINE_CALL_TRANSFORM(fwdrev)                                   \
475 static transform_apply_return                                           \
476 call_transform_##fwdrev(struct site *st,                                \
477                                    struct transform_inst_if *transform, \
478                                    struct buffer_if *buf,               \
479                                    const char **errmsg)                 \
480 {                                                                       \
481     if (!is_transform_valid(transform)) {                               \
482         *errmsg="transform not set up";                                 \
483         return transform_apply_err;                                     \
484     }                                                                   \
485     return transform->fwdrev(transform->st,buf,errmsg);                 \
486 }
487
488 DEFINE_CALL_TRANSFORM(forwards)
489 DEFINE_CALL_TRANSFORM(reverse)
490
491 static void dispose_transform(struct transform_inst_if **transform_var)
492 {
493     struct transform_inst_if *transform=*transform_var;
494     if (transform) {
495         transform->delkey(transform->st);
496         transform->destroy(transform->st);
497     }
498     *transform_var = 0;
499 }    
500
501 #define CHECK_AVAIL(b,l) do { if ((b)->size<(l)) return False; } while(0)
502 #define CHECK_EMPTY(b) do { if ((b)->size!=0) return False; } while(0)
503 #define CHECK_TYPE(b,t) do { uint32_t type; \
504     CHECK_AVAIL((b),4); \
505     type=buf_unprepend_uint32((b)); \
506     if (type!=(t)) return False; } while(0)
507
508 static _Bool type_is_msg34(uint32_t type)
509 {
510     switch (type) {
511         case CASES_MSG3_KNOWN: case LABEL_MSG4: return True;
512         default: return False;
513     }
514 }
515
516 struct parsedname {
517     int32_t len;
518     uint8_t *name;
519     struct buffer_if extrainfo;
520 };
521
522 struct msg {
523     uint8_t *hashstart;
524     uint32_t dest;
525     uint32_t source;
526     struct parsedname remote;
527     struct parsedname local;
528     uint32_t remote_capabilities;
529     uint16_t remote_mtu;
530     int capab_transformnum;
531     uint8_t *nR;
532     uint8_t *nL;
533     int32_t pklen;
534     char *pk;
535     int32_t hashlen;
536     struct alg_msg_data sig;
537 };
538
539 static int32_t wait_timeout(struct site *st) {
540     int32_t t = st->wait_timeout_mean;
541     int8_t factor;
542     if (t < INT_MAX/2) {
543         st->random->generate(st->random->st,sizeof(factor),&factor);
544         t += (t / 256) * factor;
545     }
546     return t;
547 }
548
549 static _Bool set_new_transform(struct site *st, char *pk)
550 {
551     _Bool ok;
552
553     /* Make room for the shared key */
554     st->sharedsecretlen=st->chosen_transform->keylen?:st->dh->ceil_len;
555     assert(st->sharedsecretlen);
556     if (st->sharedsecretlen > st->sharedsecretallocd) {
557         st->sharedsecretallocd=st->sharedsecretlen;
558         st->sharedsecret=safe_realloc_ary(st->sharedsecret,1,
559                                           st->sharedsecretallocd,
560                                           "site:sharedsecret");
561     }
562
563     /* Generate the shared key */
564     st->dh->makeshared(st->dh->st,st->dhsecret,st->dh->len,pk,
565                        st->sharedsecret,st->sharedsecretlen);
566
567     /* Set up the transform */
568     struct transform_if *generator=st->chosen_transform;
569     struct transform_inst_if *generated=generator->create(generator->st);
570     ok = generated->setkey(generated->st,st->sharedsecret,
571                            st->sharedsecretlen,st->our_name_later);
572
573     dispose_transform(&st->new_transform);
574     if (!ok) return False;
575     st->new_transform=generated;
576
577     slog(st,LOG_SETUP_INIT,"key exchange negotiated transform"
578          " %d (capabilities ours=%#"PRIx32" theirs=%#"PRIx32")",
579          st->chosen_transform->capab_bit,
580          st->local_capabilities, st->remote_capabilities);
581     return True;
582 }
583
584 struct xinfoadd {
585     int32_t lenpos, afternul;
586 };
587 static void append_string_xinfo_start(struct buffer_if *buf,
588                                       struct xinfoadd *xia,
589                                       const char *str)
590     /* Helps construct one of the names with additional info as found
591      * in MSG1..4.  Call this function first, then append all the
592      * desired extra info (not including the nul byte) to the buffer,
593      * then call append_string_xinfo_done. */
594 {
595     xia->lenpos = buf->size;
596     buf_append_string(buf,str);
597     buf_append_uint8(buf,0);
598     xia->afternul = buf->size;
599 }
600 static void append_string_xinfo_done(struct buffer_if *buf,
601                                      struct xinfoadd *xia)
602 {
603     /* we just need to adjust the string length */
604     if (buf->size == xia->afternul) {
605         /* no extra info, strip the nul too */
606         buf_unappend_uint8(buf);
607     } else {
608         put_uint16(buf->start+xia->lenpos, buf->size-(xia->lenpos+2));
609     }
610 }
611
612 /* Build any of msg1 to msg4. msg5 and msg6 are built from the inside
613    out using a transform of config data supplied by netlink */
614 static bool_t generate_msg(struct site *st, uint32_t type, cstring_t what)
615 {
616     string_t dhpub;
617     unsigned minor;
618
619     st->retries=st->setup_retries;
620     BUF_ALLOC(&st->buffer,what);
621     buffer_init(&st->buffer,0);
622     buf_append_uint32(&st->buffer,
623         (type==LABEL_MSG1?0:st->setup_session_id));
624     buf_append_uint32(&st->buffer,st->index);
625     buf_append_uint32(&st->buffer,type);
626
627     struct xinfoadd xia;
628     append_string_xinfo_start(&st->buffer,&xia,st->localname);
629     if ((st->local_capabilities & st->early_capabilities) ||
630         (type != LABEL_MSG1)) {
631         buf_append_uint32(&st->buffer,st->local_capabilities);
632     }
633     if (type_is_msg34(type)) {
634         buf_append_uint16(&st->buffer,st->mtu_target);
635     }
636     append_string_xinfo_done(&st->buffer,&xia);
637
638     buf_append_string(&st->buffer,st->remotename);
639     BUF_ADD_OBJ(append,&st->buffer,st->localN);
640     if (type==LABEL_MSG1) return True;
641     BUF_ADD_OBJ(append,&st->buffer,st->remoteN);
642     if (type==LABEL_MSG2) return True;
643
644     if (hacky_par_mid_failnow()) return False;
645
646     if (MSGMAJOR(type) == 3) do {
647         minor = MSGMINOR(type);
648         if (minor < 1) break;
649         buf_append_uint8(&st->buffer,st->chosen_transform->capab_bit);
650     } while (0);
651
652     dhpub=st->dh->makepublic(st->dh->st,st->dhsecret,st->dh->len);
653     buf_append_string(&st->buffer,dhpub);
654     free(dhpub);
655
656     bool_t ok=st->privkey->sign(st->privkey->st,
657                                 st->buffer.start,
658                                 st->buffer.size,
659                                 &st->buffer);
660     if (!ok) goto fail;
661     return True;
662
663  fail:
664     return False;
665 }
666
667 static bool_t unpick_name(struct buffer_if *msg, struct parsedname *nm)
668 {
669     CHECK_AVAIL(msg,2);
670     nm->len=buf_unprepend_uint16(msg);
671     CHECK_AVAIL(msg,nm->len);
672     nm->name=buf_unprepend(msg,nm->len);
673     uint8_t *nul=memchr(nm->name,0,nm->len);
674     if (!nul) {
675         buffer_readonly_view(&nm->extrainfo,0,0);
676     } else {
677         buffer_readonly_view(&nm->extrainfo, nul+1, msg->start-(nul+1));
678         nm->len=nul-nm->name;
679     }
680     return True;
681 }
682
683 static bool_t unpick_msg(struct site *st, uint32_t type,
684                          struct buffer_if *msg, struct msg *m)
685 {
686     unsigned minor;
687
688     m->capab_transformnum=-1;
689     m->hashstart=msg->start;
690     CHECK_AVAIL(msg,4);
691     m->dest=buf_unprepend_uint32(msg);
692     CHECK_AVAIL(msg,4);
693     m->source=buf_unprepend_uint32(msg);
694     CHECK_TYPE(msg,type);
695     if (!unpick_name(msg,&m->remote)) return False;
696     m->remote_capabilities=0;
697     m->remote_mtu=0;
698     if (m->remote.extrainfo.size) {
699         CHECK_AVAIL(&m->remote.extrainfo,4);
700         m->remote_capabilities=buf_unprepend_uint32(&m->remote.extrainfo);
701     }
702     if (type_is_msg34(type) && m->remote.extrainfo.size) {
703         CHECK_AVAIL(&m->remote.extrainfo,2);
704         m->remote_mtu=buf_unprepend_uint16(&m->remote.extrainfo);
705     }
706     if (!unpick_name(msg,&m->local)) return False;
707     if (type==LABEL_PROD) {
708         CHECK_EMPTY(msg);
709         return True;
710     }
711     CHECK_AVAIL(msg,NONCELEN);
712     m->nR=buf_unprepend(msg,NONCELEN);
713     if (type==LABEL_MSG1) {
714         CHECK_EMPTY(msg);
715         return True;
716     }
717     CHECK_AVAIL(msg,NONCELEN);
718     m->nL=buf_unprepend(msg,NONCELEN);
719     if (type==LABEL_MSG2) {
720         CHECK_EMPTY(msg);
721         return True;
722     }
723     if (MSGMAJOR(type) == 3) do {
724         minor = MSGMINOR(type);
725 #define MAYBE_READ_CAP(minminor, kind, dflt) do {                       \
726     if (minor < (minminor))                                             \
727         m->capab_##kind##num = (dflt);                                  \
728     else {                                                              \
729         CHECK_AVAIL(msg, 1);                                            \
730         m->capab_##kind##num = buf_unprepend_uint8(msg);                \
731     }                                                                   \
732 } while (0)
733         MAYBE_READ_CAP(1, transform, CAPAB_BIT_ANCIENTTRANSFORM);
734 #undef MAYBE_READ_CAP
735     } while (0);
736     CHECK_AVAIL(msg,2);
737     m->pklen=buf_unprepend_uint16(msg);
738     CHECK_AVAIL(msg,m->pklen);
739     m->pk=buf_unprepend(msg,m->pklen);
740     m->hashlen=msg->start-m->hashstart;
741
742     if (!st->pubkey->unpick(st->pubkey->st,msg,&m->sig)) {
743         return False;
744     }
745
746     CHECK_EMPTY(msg);
747
748     return True;
749 }
750
751 static bool_t name_matches(const struct parsedname *nm, const char *expected)
752 {
753     int expected_len=strlen(expected);
754     return
755         nm->len == expected_len &&
756         !memcmp(nm->name, expected, expected_len);
757 }    
758
759 static bool_t check_msg(struct site *st, uint32_t type, struct msg *m,
760                         cstring_t *error)
761 {
762     if (type==LABEL_MSG1) return True;
763
764     /* Check that the site names and our nonce have been sent
765        back correctly, and then store our peer's nonce. */ 
766     if (!name_matches(&m->remote,st->remotename)) {
767         *error="wrong remote site name";
768         return False;
769     }
770     if (!name_matches(&m->local,st->localname)) {
771         *error="wrong local site name";
772         return False;
773     }
774     if (memcmp(m->nL,st->localN,NONCELEN)!=0) {
775         *error="wrong locally-generated nonce";
776         return False;
777     }
778     if (type==LABEL_MSG2) return True;
779     if (!consttime_memeq(m->nR,st->remoteN,NONCELEN)) {
780         *error="wrong remotely-generated nonce";
781         return False;
782     }
783     /* MSG3 has complicated rules about capabilities, which are
784      * handled in process_msg3. */
785     if (MSGMAJOR(type) == 3) return True;
786     if (m->remote_capabilities!=st->remote_capabilities) {
787         *error="remote capabilities changed";
788         return False;
789     }
790     if (type==LABEL_MSG4) return True;
791     *error="unknown message type";
792     return False;
793 }
794
795 static bool_t generate_msg1(struct site *st)
796 {
797     st->random->generate(st->random->st,NONCELEN,st->localN);
798     return generate_msg(st,LABEL_MSG1,"site:MSG1");
799 }
800
801 static bool_t process_msg1(struct site *st, struct buffer_if *msg1,
802                            const struct comm_addr *src, struct msg *m)
803 {
804     /* We've already determined we're in an appropriate state to
805        process an incoming MSG1, and that the MSG1 has correct values
806        of A and B. */
807
808     st->setup_session_id=m->source;
809     st->remote_capabilities=m->remote_capabilities;
810     memcpy(st->remoteN,m->nR,NONCELEN);
811     return True;
812 }
813
814 static bool_t generate_msg2(struct site *st)
815 {
816     st->random->generate(st->random->st,NONCELEN,st->localN);
817     return generate_msg(st,LABEL_MSG2,"site:MSG2");
818 }
819
820 static bool_t process_msg2(struct site *st, struct buffer_if *msg2,
821                            const struct comm_addr *src)
822 {
823     struct msg m;
824     cstring_t err;
825
826     if (!unpick_msg(st,LABEL_MSG2,msg2,&m)) return False;
827     if (!check_msg(st,LABEL_MSG2,&m,&err)) {
828         slog(st,LOG_SEC,"msg2: %s",err);
829         return False;
830     }
831     st->setup_session_id=m.source;
832     st->remote_capabilities=m.remote_capabilities;
833
834     /* Select the transform to use */
835
836     uint32_t remote_crypto_caps = st->remote_capabilities & CAPAB_TRANSFORM_MASK;
837     if (!remote_crypto_caps)
838         /* old secnets only had this one transform */
839         remote_crypto_caps = 1UL << CAPAB_BIT_ANCIENTTRANSFORM;
840
841 #define CHOOSE_CRYPTO(kind, whats) do {                                 \
842     struct kind##_if *iface;                                            \
843     uint32_t bit, ours = 0;                                             \
844     int i;                                                              \
845     for (i= 0; i < st->n##kind##s; i++) {                               \
846         iface=st->kind##s[i];                                           \
847         bit = 1UL << iface->capab_bit;                                  \
848         if (bit & remote_crypto_caps) goto kind##_found;                \
849         ours |= bit;                                                    \
850     }                                                                   \
851     slog(st,LOG_ERROR,"no " whats " in common"                          \
852          " (us %#"PRIx32"; them: %#"PRIx32")",                          \
853          st->local_capabilities & ours, remote_crypto_caps);            \
854     return False;                                                       \
855 kind##_found:                                                           \
856     st->chosen_##kind = iface;                                          \
857 } while (0)
858
859     CHOOSE_CRYPTO(transform, "transforms");
860
861 #undef CHOOSE_CRYPTO
862
863     memcpy(st->remoteN,m.nR,NONCELEN);
864     return True;
865 }
866
867 static bool_t generate_msg3(struct site *st)
868 {
869     /* Now we have our nonce and their nonce. Think of a secret key,
870        and create message number 3. */
871     st->random->generate(st->random->st,st->dh->len,st->dhsecret);
872     return generate_msg(st,
873                         (st->remote_capabilities & CAPAB_TRANSFORM_MASK)
874                         ? LABEL_MSG3BIS
875                         : LABEL_MSG3,
876                         "site:MSG3");
877 }
878
879 static bool_t process_msg3_msg4(struct site *st, struct msg *m)
880 {
881     /* Check signature and store g^x mod m */
882     if (!st->pubkey->check(st->pubkey->st,
883                            m->hashstart,m->hashlen,
884                            &m->sig)) {
885         slog(st,LOG_SEC,"msg3/msg4 signature failed check!");
886         return False;
887     }
888
889     st->remote_adv_mtu=m->remote_mtu;
890
891     return True;
892 }
893
894 static bool_t process_msg3(struct site *st, struct buffer_if *msg3,
895                            const struct comm_addr *src, uint32_t msgtype)
896 {
897     struct msg m;
898     cstring_t err;
899
900     switch (msgtype) {
901         case CASES_MSG3_KNOWN: break;
902         default: assert(0);
903     }
904
905     if (!unpick_msg(st,msgtype,msg3,&m)) return False;
906     if (!check_msg(st,msgtype,&m,&err)) {
907         slog(st,LOG_SEC,"msg3: %s",err);
908         return False;
909     }
910     uint32_t capab_adv_late = m.remote_capabilities
911         & ~st->remote_capabilities & st->early_capabilities;
912     if (capab_adv_late) {
913         slog(st,LOG_SEC,"msg3 impermissibly adds early capability flag(s)"
914              " %#"PRIx32" (was %#"PRIx32", now %#"PRIx32")",
915              capab_adv_late, st->remote_capabilities, m.remote_capabilities);
916         return False;
917     }
918
919 #define CHOSE_CRYPTO(kind, what) do {                                   \
920     struct kind##_if *iface;                                            \
921     int i;                                                              \
922     for (i=0; i<st->n##kind##s; i++) {                                  \
923         iface=st->kind##s[i];                                           \
924         if (iface->capab_bit == m.capab_##kind##num)                    \
925             goto kind##_found;                                          \
926     }                                                                   \
927     slog(st,LOG_SEC,"peer chose unknown-to-us " what " %d!",            \
928          m.capab_##kind##num);                                                  \
929     return False;                                                       \
930 kind##_found:                                                           \
931     st->chosen_##kind=iface;                                            \
932 } while (0)
933
934     CHOSE_CRYPTO(transform, "transform");
935
936 #undef CHOSE_CRYPTO
937
938     if (!process_msg3_msg4(st,&m))
939         return False;
940
941     /* Update our idea of the remote site's capabilities, now that we've
942      * verified that its message was authentic.
943      *
944      * Our previous idea of the remote site's capabilities came from the
945      * unauthenticated MSG1.  We've already checked that this new message
946      * doesn't change any of the bits we relied upon in the past, but it may
947      * also have set additional capability bits.  We simply throw those away
948      * now, and use the authentic capabilities from this MSG3. */
949     st->remote_capabilities=m.remote_capabilities;
950
951     /* Terminate their DH public key with a '0' */
952     m.pk[m.pklen]=0;
953     /* Invent our DH secret key */
954     st->random->generate(st->random->st,st->dh->len,st->dhsecret);
955
956     /* Generate the shared key and set up the transform */
957     if (!set_new_transform(st,m.pk)) return False;
958
959     return True;
960 }
961
962 static bool_t generate_msg4(struct site *st)
963 {
964     /* We have both nonces, their public key and our private key. Generate
965        our public key, sign it and send it to them. */
966     return generate_msg(st,LABEL_MSG4,"site:MSG4");
967 }
968
969 static bool_t process_msg4(struct site *st, struct buffer_if *msg4,
970                            const struct comm_addr *src)
971 {
972     struct msg m;
973     cstring_t err;
974
975     if (!unpick_msg(st,LABEL_MSG4,msg4,&m)) return False;
976     if (!check_msg(st,LABEL_MSG4,&m,&err)) {
977         slog(st,LOG_SEC,"msg4: %s",err);
978         return False;
979     }
980     
981     if (!process_msg3_msg4(st,&m))
982         return False;
983
984     /* Terminate their DH public key with a '0' */
985     m.pk[m.pklen]=0;
986
987     /* Generate the shared key and set up the transform */
988     if (!set_new_transform(st,m.pk)) return False;
989
990     return True;
991 }
992
993 struct msg0 {
994     uint32_t dest;
995     uint32_t source;
996     uint32_t type;
997 };
998
999 static bool_t unpick_msg0(struct site *st, struct buffer_if *msg0,
1000                           struct msg0 *m)
1001 {
1002     CHECK_AVAIL(msg0,4);
1003     m->dest=buf_unprepend_uint32(msg0);
1004     CHECK_AVAIL(msg0,4);
1005     m->source=buf_unprepend_uint32(msg0);
1006     CHECK_AVAIL(msg0,4);
1007     m->type=buf_unprepend_uint32(msg0);
1008     return True;
1009     /* Leaves transformed part of buffer untouched */
1010 }
1011
1012 static bool_t generate_msg5(struct site *st)
1013 {
1014     cstring_t transform_err;
1015
1016     BUF_ALLOC(&st->buffer,"site:MSG5");
1017     /* We are going to add four words to the message */
1018     buffer_init(&st->buffer,calculate_max_start_pad());
1019     /* Give the netlink code an opportunity to put its own stuff in the
1020        message (configuration information, etc.) */
1021     buf_prepend_uint32(&st->buffer,LABEL_MSG5);
1022     if (call_transform_forwards(st,st->new_transform,
1023                                 &st->buffer,&transform_err))
1024         return False;
1025     buf_prepend_uint32(&st->buffer,LABEL_MSG5);
1026     buf_prepend_uint32(&st->buffer,st->index);
1027     buf_prepend_uint32(&st->buffer,st->setup_session_id);
1028
1029     st->retries=st->setup_retries;
1030     return True;
1031 }
1032
1033 static bool_t process_msg5(struct site *st, struct buffer_if *msg5,
1034                            const struct comm_addr *src,
1035                            struct transform_inst_if *transform)
1036 {
1037     struct msg0 m;
1038     cstring_t transform_err;
1039
1040     if (!unpick_msg0(st,msg5,&m)) return False;
1041
1042     if (call_transform_reverse(st,transform,msg5,&transform_err)) {
1043         /* There's a problem */
1044         slog(st,LOG_SEC,"process_msg5: transform: %s",transform_err);
1045         return False;
1046     }
1047     /* Buffer should now contain untransformed PING packet data */
1048     CHECK_AVAIL(msg5,4);
1049     if (buf_unprepend_uint32(msg5)!=LABEL_MSG5) {
1050         slog(st,LOG_SEC,"MSG5/PING packet contained wrong label");
1051         return False;
1052     }
1053     /* Older versions of secnet used to write some config data here
1054      * which we ignore.  So we don't CHECK_EMPTY */
1055     return True;
1056 }
1057
1058 static void create_msg6(struct site *st, struct transform_inst_if *transform,
1059                         uint32_t session_id)
1060 {
1061     cstring_t transform_err;
1062
1063     BUF_ALLOC(&st->buffer,"site:MSG6");
1064     /* We are going to add four words to the message */
1065     buffer_init(&st->buffer,calculate_max_start_pad());
1066     /* Give the netlink code an opportunity to put its own stuff in the
1067        message (configuration information, etc.) */
1068     buf_prepend_uint32(&st->buffer,LABEL_MSG6);
1069     transform_apply_return problem =
1070         call_transform_forwards(st,transform,
1071                                 &st->buffer,&transform_err);
1072     assert(!problem);
1073     buf_prepend_uint32(&st->buffer,LABEL_MSG6);
1074     buf_prepend_uint32(&st->buffer,st->index);
1075     buf_prepend_uint32(&st->buffer,session_id);
1076 }
1077
1078 static bool_t generate_msg6(struct site *st)
1079 {
1080     if (!is_transform_valid(st->new_transform))
1081         return False;
1082     create_msg6(st,st->new_transform,st->setup_session_id);
1083     st->retries=1; /* Peer will retransmit MSG5 if this packet gets lost */
1084     return True;
1085 }
1086
1087 static bool_t process_msg6(struct site *st, struct buffer_if *msg6,
1088                            const struct comm_addr *src)
1089 {
1090     struct msg0 m;
1091     cstring_t transform_err;
1092
1093     if (!unpick_msg0(st,msg6,&m)) return False;
1094
1095     if (call_transform_reverse(st,st->new_transform,msg6,&transform_err)) {
1096         /* There's a problem */
1097         slog(st,LOG_SEC,"process_msg6: transform: %s",transform_err);
1098         return False;
1099     }
1100     /* Buffer should now contain untransformed PING packet data */
1101     CHECK_AVAIL(msg6,4);
1102     if (buf_unprepend_uint32(msg6)!=LABEL_MSG6) {
1103         slog(st,LOG_SEC,"MSG6/PONG packet contained invalid data");
1104         return False;
1105     }
1106     /* Older versions of secnet used to write some config data here
1107      * which we ignore.  So we don't CHECK_EMPTY */
1108     return True;
1109 }
1110
1111 static transform_apply_return
1112 decrypt_msg0(struct site *st, struct buffer_if *msg0,
1113                            const struct comm_addr *src)
1114 {
1115     cstring_t transform_err, auxkey_err, newkey_err="n/a";
1116     struct msg0 m;
1117     transform_apply_return problem;
1118
1119     if (!unpick_msg0(st,msg0,&m)) return False;
1120
1121     /* Keep a copy so we can try decrypting it with multiple keys */
1122     buffer_copy(&st->scratch, msg0);
1123
1124     problem = call_transform_reverse(st,st->current.transform,
1125                                      msg0,&transform_err);
1126     if (!problem) {
1127         if (!st->auxiliary_is_new)
1128             delete_one_key(st,&st->auxiliary_key,
1129                            "peer has used new key","auxiliary key",LOG_SEC);
1130         return 0;
1131     }
1132     if (transform_apply_return_badseq(problem))
1133         goto badseq;
1134
1135     buffer_copy(msg0, &st->scratch);
1136     problem = call_transform_reverse(st,st->auxiliary_key.transform,
1137                                      msg0,&auxkey_err);
1138     if (!problem) {
1139         slog(st,LOG_DROP,"processing packet which uses auxiliary key");
1140         if (st->auxiliary_is_new) {
1141             /* We previously timed out in state SENTMSG5 but it turns
1142              * out that our peer did in fact get our MSG5 and is
1143              * using the new key.  So we should switch to it too. */
1144             /* This is a bit like activate_new_key. */
1145             struct data_key t;
1146             t=st->current;
1147             st->current=st->auxiliary_key;
1148             st->auxiliary_key=t;
1149
1150             delete_one_key(st,&st->auxiliary_key,"peer has used new key",
1151                            "previous key",LOG_SEC);
1152             st->auxiliary_is_new=0;
1153             st->renegotiate_key_time=st->auxiliary_renegotiate_key_time;
1154         }
1155         return 0;
1156     }
1157     if (transform_apply_return_badseq(problem))
1158         goto badseq;
1159
1160     if (st->state==SITE_SENTMSG5) {
1161         buffer_copy(msg0, &st->scratch);
1162         problem = call_transform_reverse(st,st->new_transform,
1163                                          msg0,&newkey_err);
1164         if (!problem) {
1165             /* It looks like we didn't get the peer's MSG6 */
1166             /* This is like a cut-down enter_new_state(SITE_RUN) */
1167             slog(st,LOG_STATE,"will enter state RUN (MSG0 with new key)");
1168             BUF_FREE(&st->buffer);
1169             st->timeout=0;
1170             activate_new_key(st);
1171             return 0; /* do process the data in this packet */
1172         }
1173         if (transform_apply_return_badseq(problem))
1174             goto badseq;
1175     }
1176
1177     slog(st,LOG_SEC,"transform: %s (aux: %s, new: %s)",
1178          transform_err,auxkey_err,newkey_err);
1179     initiate_key_setup(st,"incoming message would not decrypt",0);
1180     send_nak(src,m.dest,m.source,m.type,msg0,"message would not decrypt");
1181     assert(problem);
1182     return problem;
1183
1184  badseq:
1185     slog(st,LOG_DROP,"transform: %s (bad seq.)",transform_err);
1186     assert(problem);
1187     return problem;
1188 }
1189
1190 static bool_t process_msg0(struct site *st, struct buffer_if *msg0,
1191                            const struct comm_addr *src)
1192 {
1193     uint32_t type;
1194     transform_apply_return problem;
1195
1196     problem = decrypt_msg0(st,msg0,src);
1197     if (problem==transform_apply_seqdupe) {
1198         /* We recently received another copy of this packet, maybe due
1199          * to polypath.  That's not a problem; indeed, for the
1200          * purposes of transport address management it is a success.
1201          * But we don't want to process the packet. */
1202         transport_data_msgok(st,src);
1203         return False;
1204     }
1205     if (problem)
1206         return False;
1207
1208     CHECK_AVAIL(msg0,4);
1209     type=buf_unprepend_uint32(msg0);
1210     switch(type) {
1211     case LABEL_MSG7:
1212         /* We must forget about the current session. */
1213         delete_keys(st,"request from peer",LOG_SEC);
1214         /* probably, the peer is shutting down, and this is going to fail,
1215          * but we need to be trying to bring the link up again */
1216         if (st->keepalive)
1217             initiate_key_setup(st,"peer requested key teardown",0);
1218         return True;
1219     case LABEL_MSG9:
1220         /* Deliver to netlink layer */
1221         st->netlink->deliver(st->netlink->st,msg0);
1222         transport_data_msgok(st,src);
1223         /* See whether we should start negotiating a new key */
1224         if (st->now > st->renegotiate_key_time)
1225             initiate_key_setup(st,"incoming packet in renegotiation window",0);
1226         return True;
1227     default:
1228         slog(st,LOG_SEC,"incoming encrypted message of type %08x "
1229              "(unknown)",type);
1230         break;
1231     }
1232     return False;
1233 }
1234
1235 static void dump_packet(struct site *st, struct buffer_if *buf,
1236                         const struct comm_addr *addr, bool_t incoming,
1237                         bool_t ok)
1238 {
1239     uint32_t dest=get_uint32(buf->start);
1240     uint32_t source=get_uint32(buf->start+4);
1241     uint32_t msgtype=get_uint32(buf->start+8);
1242
1243     if (st->log_events & LOG_DUMP)
1244         slilog(st->log,M_DEBUG,"%s: %s: %08x<-%08x: %08x: %s%s",
1245                st->tunname,incoming?"incoming":"outgoing",
1246                dest,source,msgtype,comm_addr_to_string(addr),
1247                ok?"":" - fail");
1248 }
1249
1250 static bool_t comm_addr_sendmsg(struct site *st,
1251                                 const struct comm_addr *dest,
1252                                 struct buffer_if *buf)
1253 {
1254     int i;
1255     struct comm_clientinfo *commclientinfo = 0;
1256
1257     for (i=0; i < st->ncomms; i++) {
1258         if (st->comms[i] == dest->comm) {
1259             commclientinfo = st->commclientinfos[i];
1260             break;
1261         }
1262     }
1263     return dest->comm->sendmsg(dest->comm->st, buf, dest, commclientinfo);
1264 }
1265
1266 static uint32_t site_status(void *st)
1267 {
1268     return 0;
1269 }
1270
1271 static bool_t send_msg(struct site *st)
1272 {
1273     if (st->retries>0) {
1274         transport_xmit(st, &st->setup_peers, &st->buffer, True);
1275         st->timeout=st->now+st->setup_retry_interval;
1276         st->retries--;
1277         return True;
1278     } else if (st->state==SITE_SENTMSG5) {
1279         logtimeout(st,"timed out sending MSG5, stashing new key");
1280         /* We stash the key we have produced, in case it turns out that
1281          * our peer did see our MSG5 after all and starts using it. */
1282         /* This is a bit like some of activate_new_key */
1283         struct transform_inst_if *t;
1284         t=st->auxiliary_key.transform;
1285         st->auxiliary_key.transform=st->new_transform;
1286         st->new_transform=t;
1287         dispose_transform(&st->new_transform);
1288
1289         st->auxiliary_is_new=1;
1290         st->auxiliary_key.key_timeout=st->now+st->key_lifetime;
1291         st->auxiliary_renegotiate_key_time=st->now+st->key_renegotiate_time;
1292         st->auxiliary_key.remote_session_id=st->setup_session_id;
1293
1294         enter_state_wait(st);
1295         return False;
1296     } else {
1297         logtimeout(st,"timed out sending key setup packet "
1298             "(in state %s)",state_name(st->state));
1299         enter_state_wait(st);
1300         return False;
1301     }
1302 }
1303
1304 static void site_resolve_callback(void *sst, const struct comm_addr *addrs,
1305                                   int stored_naddrs, int all_naddrs,
1306                                   const char *address, const char *failwhy)
1307 {
1308     struct site *st=sst;
1309
1310     if (!stored_naddrs) {
1311         slog(st,LOG_ERROR,"resolution of %s failed: %s",address,failwhy);
1312     } else {
1313         slog(st,LOG_PEER_ADDRS,"resolution of %s completed, %d addrs, eg: %s",
1314              address, all_naddrs, comm_addr_to_string(&addrs[0]));;
1315
1316         int space=st->transport_peers_max-st->resolving_n_results_stored;
1317         int n_tocopy=MIN(stored_naddrs,space);
1318         COPY_ARRAY(st->resolving_results + st->resolving_n_results_stored,
1319                    addrs,
1320                    n_tocopy);
1321         st->resolving_n_results_stored += n_tocopy;
1322         st->resolving_n_results_all += all_naddrs;
1323     }
1324
1325     decrement_resolving_count(st,1);
1326 }
1327
1328 static void decrement_resolving_count(struct site *st, int by)
1329 {
1330     assert(st->resolving_count>0);
1331     st->resolving_count-=by;
1332
1333     if (st->resolving_count)
1334         return;
1335
1336     /* OK, we are done with them all.  Handle combined results. */
1337
1338     const struct comm_addr *addrs=st->resolving_results;
1339     int naddrs=st->resolving_n_results_stored;
1340     assert(naddrs<=st->transport_peers_max);
1341
1342     if (naddrs) {
1343         if (naddrs != st->resolving_n_results_all) {
1344             slog(st,LOG_SETUP_INIT,"resolution of supplied addresses/names"
1345                  " yielded too many results (%d > %d), some ignored",
1346                  st->resolving_n_results_all, naddrs);
1347         }
1348         slog(st,LOG_STATE,"resolution completed, %d addrs, eg: %s",
1349              naddrs, iaddr_to_string(&addrs[0].ia));;
1350     }
1351
1352     switch (st->state) {
1353     case SITE_RESOLVE:
1354         if (transport_compute_setupinit_peers(st,addrs,naddrs,0)) {
1355             enter_new_state(st,SITE_SENTMSG1);
1356         } else {
1357             /* Can't figure out who to try to to talk to */
1358             slog(st,LOG_SETUP_INIT,
1359                  "key exchange failed: cannot find peer address");
1360             enter_state_run(st);
1361         }
1362         break;
1363     case SITE_SENTMSG1: case SITE_SENTMSG2:
1364     case SITE_SENTMSG3: case SITE_SENTMSG4:
1365     case SITE_SENTMSG5:
1366         if (naddrs) {
1367             /* We start using the address immediately for data too.
1368              * It's best to store it in st->peers now because we might
1369              * go via SENTMSG5, WAIT, and a MSG0, straight into using
1370              * the new key (without updating the data peer addrs). */
1371             transport_resolve_complete(st,addrs,naddrs);
1372         } else if (st->local_mobile) {
1373             /* We can't let this rest because we may have a peer
1374              * address which will break in the future. */
1375             slog(st,LOG_SETUP_INIT,"resolution failed: "
1376                  "abandoning key exchange");
1377             enter_state_wait(st);
1378         } else {
1379             slog(st,LOG_SETUP_INIT,"resolution failed: "
1380                  " continuing to use source address of peer's packets"
1381                  " for key exchange and ultimately data");
1382         }
1383         break;
1384     case SITE_RUN:
1385         if (naddrs) {
1386             slog(st,LOG_SETUP_INIT,"resolution completed tardily,"
1387                  " updating peer address(es)");
1388             transport_resolve_complete_tardy(st,addrs,naddrs);
1389         } else if (st->local_mobile) {
1390             /* Not very good.  We should queue (another) renegotiation
1391              * so that we can update the peer address. */
1392             st->key_renegotiate_time=st->now+wait_timeout(st);
1393         } else {
1394             slog(st,LOG_SETUP_INIT,"resolution failed: "
1395                  " continuing to use source address of peer's packets");
1396         }
1397         break;
1398     case SITE_WAIT:
1399     case SITE_STOP:
1400         /* oh well */
1401         break;
1402     }
1403 }
1404
1405 static bool_t initiate_key_setup(struct site *st, cstring_t reason,
1406                                  const struct comm_addr *prod_hint)
1407 {
1408     /* Reentrancy hazard: can call enter_new_state/enter_state_* */
1409     if (st->state!=SITE_RUN) return False;
1410     slog(st,LOG_SETUP_INIT,"initiating key exchange (%s)",reason);
1411     if (st->addresses) {
1412         slog(st,LOG_SETUP_INIT,"resolving peer address(es)");
1413         return enter_state_resolve(st);
1414     } else if (transport_compute_setupinit_peers(st,0,0,prod_hint)) {
1415         return enter_new_state(st,SITE_SENTMSG1);
1416     }
1417     slog(st,LOG_SETUP_INIT,"key exchange failed: no address for peer");
1418     return False;
1419 }
1420
1421 static void activate_new_key(struct site *st)
1422 {
1423     struct transform_inst_if *t;
1424
1425     /* We have three transform instances, which we swap between old,
1426        active and setup */
1427     t=st->auxiliary_key.transform;
1428     st->auxiliary_key.transform=st->current.transform;
1429     st->current.transform=st->new_transform;
1430     st->new_transform=t;
1431     dispose_transform(&st->new_transform);
1432
1433     st->timeout=0;
1434     st->auxiliary_is_new=0;
1435     st->auxiliary_key.key_timeout=st->current.key_timeout;
1436     st->current.key_timeout=st->now+st->key_lifetime;
1437     st->renegotiate_key_time=st->now+st->key_renegotiate_time;
1438     transport_peers_copy(st,&st->peers,&st->setup_peers);
1439     st->current.remote_session_id=st->setup_session_id;
1440
1441     /* Compute the inter-site MTU.  This is min( our_mtu, their_mtu ).
1442      * But their mtu be unspecified, in which case we just use ours. */
1443     uint32_t intersite_mtu=
1444         MIN(st->mtu_target, st->remote_adv_mtu ?: ~(uint32_t)0);
1445     st->netlink->set_mtu(st->netlink->st,intersite_mtu);
1446
1447     slog(st,LOG_ACTIVATE_KEY,"new key activated"
1448          " (mtu ours=%"PRId32" theirs=%"PRId32" intersite=%"PRId32")",
1449          st->mtu_target, st->remote_adv_mtu, intersite_mtu);
1450     enter_state_run(st);
1451 }
1452
1453 static void delete_one_key(struct site *st, struct data_key *key,
1454                            cstring_t reason, cstring_t which, uint32_t loglevel)
1455 {
1456     if (!is_transform_valid(key->transform)) return;
1457     if (reason) slog(st,loglevel,"%s deleted (%s)",which,reason);
1458     dispose_transform(&key->transform);
1459     key->key_timeout=0;
1460 }
1461
1462 static void delete_keys(struct site *st, cstring_t reason, uint32_t loglevel)
1463 {
1464     if (current_valid(st)) {
1465         slog(st,loglevel,"session closed (%s)",reason);
1466
1467         delete_one_key(st,&st->current,0,0,0);
1468         set_link_quality(st);
1469     }
1470     delete_one_key(st,&st->auxiliary_key,0,0,0);
1471 }
1472
1473 static void state_assert(struct site *st, bool_t ok)
1474 {
1475     if (!ok) fatal("site:state_assert");
1476 }
1477
1478 static void enter_state_stop(struct site *st)
1479 {
1480     st->state=SITE_STOP;
1481     st->timeout=0;
1482     delete_keys(st,"entering state STOP",LOG_TIMEOUT_KEY);
1483     dispose_transform(&st->new_transform);
1484 }
1485
1486 static void set_link_quality(struct site *st)
1487 {
1488     uint32_t quality;
1489     if (current_valid(st))
1490         quality=LINK_QUALITY_UP;
1491     else if (st->state==SITE_WAIT || st->state==SITE_STOP)
1492         quality=LINK_QUALITY_DOWN;
1493     else if (st->addresses)
1494         quality=LINK_QUALITY_DOWN_CURRENT_ADDRESS;
1495     else if (transport_peers_valid(&st->peers))
1496         quality=LINK_QUALITY_DOWN_STALE_ADDRESS;
1497     else
1498         quality=LINK_QUALITY_DOWN;
1499
1500     st->netlink->set_quality(st->netlink->st,quality);
1501 }
1502
1503 static void enter_state_run(struct site *st)
1504 {
1505     slog(st,LOG_STATE,"entering state RUN%s",
1506          current_valid(st) ? " (keyed)" : " (unkeyed)");
1507     st->state=SITE_RUN;
1508     st->timeout=0;
1509
1510     st->setup_session_id=0;
1511     transport_peers_clear(st,&st->setup_peers);
1512     FILLZERO(st->localN);
1513     FILLZERO(st->remoteN);
1514     dispose_transform(&st->new_transform);
1515     memset(st->dhsecret,0,st->dh->len);
1516     if (st->sharedsecret) memset(st->sharedsecret,0,st->sharedsecretlen);
1517     set_link_quality(st);
1518
1519     if (st->keepalive && !current_valid(st))
1520         initiate_key_setup(st, "keepalive", 0);
1521 }
1522
1523 static bool_t ensure_resolving(struct site *st)
1524 {
1525     /* Reentrancy hazard: may call site_resolve_callback and hence
1526      * enter_new_state, enter_state_* and generate_msg*. */
1527     if (st->resolving_count)
1528         return True;
1529
1530     assert(st->addresses);
1531
1532     /* resolver->request might reentrantly call site_resolve_callback
1533      * which will decrement st->resolving, so we need to increment it
1534      * twice beforehand to prevent decrement from thinking we're
1535      * finished, and decrement it ourselves.  Alternatively if
1536      * everything fails then there are no callbacks due and we simply
1537      * set it to 0 and return false.. */
1538     st->resolving_n_results_stored=0;
1539     st->resolving_n_results_all=0;
1540     st->resolving_count+=2;
1541     const char **addrp=st->addresses;
1542     const char *address;
1543     bool_t anyok=False;
1544     for (; (address=*addrp++); ) {
1545         bool_t ok = st->resolver->request(st->resolver->st,address,
1546                                           st->remoteport,st->comms[0],
1547                                           site_resolve_callback,st);
1548         if (ok)
1549             st->resolving_count++;
1550         anyok|=ok;
1551     }
1552     if (!anyok) {
1553         st->resolving_count=0;
1554         return False;
1555     }
1556     decrement_resolving_count(st,2);
1557     return True;
1558 }
1559
1560 static bool_t enter_state_resolve(struct site *st)
1561 {
1562     /* Reentrancy hazard!  See ensure_resolving. */
1563     state_assert(st,st->state==SITE_RUN);
1564     slog(st,LOG_STATE,"entering state RESOLVE");
1565     st->state=SITE_RESOLVE;
1566     return ensure_resolving(st);
1567 }
1568
1569 static bool_t enter_new_state(struct site *st, uint32_t next)
1570 {
1571     bool_t (*gen)(struct site *st);
1572     int r;
1573
1574     slog(st,LOG_STATE,"entering state %s",state_name(next));
1575     switch(next) {
1576     case SITE_SENTMSG1:
1577         state_assert(st,st->state==SITE_RUN || st->state==SITE_RESOLVE);
1578         gen=generate_msg1;
1579         st->msg1_crossed_logged = False;
1580         break;
1581     case SITE_SENTMSG2:
1582         state_assert(st,st->state==SITE_RUN || st->state==SITE_RESOLVE ||
1583                      st->state==SITE_SENTMSG1 || st->state==SITE_WAIT);
1584         gen=generate_msg2;
1585         break;
1586     case SITE_SENTMSG3:
1587         state_assert(st,st->state==SITE_SENTMSG1);
1588         BUF_FREE(&st->buffer);
1589         gen=generate_msg3;
1590         break;
1591     case SITE_SENTMSG4:
1592         state_assert(st,st->state==SITE_SENTMSG2);
1593         BUF_FREE(&st->buffer);
1594         gen=generate_msg4;
1595         break;
1596     case SITE_SENTMSG5:
1597         state_assert(st,st->state==SITE_SENTMSG3);
1598         BUF_FREE(&st->buffer);
1599         gen=generate_msg5;
1600         break;
1601     case SITE_RUN:
1602         state_assert(st,st->state==SITE_SENTMSG4);
1603         BUF_FREE(&st->buffer);
1604         gen=generate_msg6;
1605         break;
1606     default:
1607         gen=NULL;
1608         fatal("enter_new_state(%s): invalid new state",state_name(next));
1609         break;
1610     }
1611
1612     if (hacky_par_start_failnow()) return False;
1613
1614     r= gen(st) && send_msg(st);
1615
1616     hacky_par_end(&r,
1617                   st->setup_retries, st->setup_retry_interval,
1618                   send_msg, st);
1619     
1620     if (r) {
1621         st->state=next;
1622         if (next==SITE_RUN) {
1623             BUF_FREE(&st->buffer); /* Never reused */
1624             st->timeout=0; /* Never retransmit */
1625             activate_new_key(st);
1626         }
1627         return True;
1628     }
1629     slog(st,LOG_ERROR,"error entering state %s",state_name(next));
1630     st->buffer.free=False; /* Unconditionally use the buffer; it may be
1631                               in either state, and enter_state_wait() will
1632                               do a BUF_FREE() */
1633     enter_state_wait(st);
1634     return False;
1635 }
1636
1637 /* msg7 tells our peer that we're about to forget our key */
1638 static bool_t send_msg7(struct site *st, cstring_t reason)
1639 {
1640     cstring_t transform_err;
1641
1642     if (current_valid(st) && st->buffer.free
1643         && transport_peers_valid(&st->peers)) {
1644         BUF_ALLOC(&st->buffer,"site:MSG7");
1645         buffer_init(&st->buffer,calculate_max_start_pad());
1646         buf_append_uint32(&st->buffer,LABEL_MSG7);
1647         buf_append_string(&st->buffer,reason);
1648         if (call_transform_forwards(st, st->current.transform,
1649                                     &st->buffer, &transform_err))
1650             goto free_out;
1651         buf_prepend_uint32(&st->buffer,LABEL_MSG0);
1652         buf_prepend_uint32(&st->buffer,st->index);
1653         buf_prepend_uint32(&st->buffer,st->current.remote_session_id);
1654         transport_xmit(st,&st->peers,&st->buffer,True);
1655         BUF_FREE(&st->buffer);
1656     free_out:
1657         return True;
1658     }
1659     return False;
1660 }
1661
1662 /* We go into this state if our peer becomes uncommunicative. Similar to
1663    the "stop" state, we forget all session keys for a while, before
1664    re-entering the "run" state. */
1665 static void enter_state_wait(struct site *st)
1666 {
1667     slog(st,LOG_STATE,"entering state WAIT");
1668     st->timeout=st->now+wait_timeout(st);
1669     st->state=SITE_WAIT;
1670     set_link_quality(st);
1671     BUF_FREE(&st->buffer); /* will have had an outgoing packet in it */
1672     /* XXX Erase keys etc. */
1673 }
1674
1675 static void generate_prod(struct site *st, struct buffer_if *buf)
1676 {
1677     buffer_init(buf,0);
1678     buf_append_uint32(buf,0);
1679     buf_append_uint32(buf,0);
1680     buf_append_uint32(buf,LABEL_PROD);
1681     buf_append_string(buf,st->localname);
1682     buf_append_string(buf,st->remotename);
1683 }
1684
1685 static void generate_send_prod(struct site *st,
1686                                const struct comm_addr *source)
1687 {
1688     if (!st->allow_send_prod) return; /* too soon */
1689     if (!(st->state==SITE_RUN || st->state==SITE_RESOLVE ||
1690           st->state==SITE_WAIT)) return; /* we'd ignore peer's MSG1 */
1691
1692     slog(st,LOG_SETUP_INIT,"prodding peer for key exchange");
1693     st->allow_send_prod=0;
1694     generate_prod(st,&st->scratch);
1695     bool_t ok = comm_addr_sendmsg(st, source, &st->scratch);
1696     dump_packet(st,&st->scratch,source,False,ok);
1697 }
1698
1699 static inline void site_settimeout(uint64_t timeout, int *timeout_io)
1700 {
1701     if (timeout) {
1702         int64_t offset=timeout-*now;
1703         if (offset<0) offset=0;
1704         if (offset>INT_MAX) offset=INT_MAX;
1705         if (*timeout_io<0 || offset<*timeout_io)
1706             *timeout_io=offset;
1707     }
1708 }
1709
1710 static int site_beforepoll(void *sst, struct pollfd *fds, int *nfds_io,
1711                            int *timeout_io)
1712 {
1713     struct site *st=sst;
1714
1715     BEFOREPOLL_WANT_FDS(0); /* We don't use any file descriptors */
1716     st->now=*now;
1717
1718     /* Work out when our next timeout is. The earlier of 'timeout' or
1719        'current.key_timeout'. A stored value of '0' indicates no timeout
1720        active. */
1721     site_settimeout(st->timeout, timeout_io);
1722     site_settimeout(st->current.key_timeout, timeout_io);
1723     site_settimeout(st->auxiliary_key.key_timeout, timeout_io);
1724
1725     return 0; /* success */
1726 }
1727
1728 static void check_expiry(struct site *st, struct data_key *key,
1729                          const char *which)
1730 {
1731     if (key->key_timeout && *now>key->key_timeout) {
1732         delete_one_key(st,key,"maximum life exceeded",which,LOG_TIMEOUT_KEY);
1733     }
1734 }
1735
1736 /* NB site_afterpoll will be called before site_beforepoll is ever called */
1737 static void site_afterpoll(void *sst, struct pollfd *fds, int nfds)
1738 {
1739     struct site *st=sst;
1740
1741     st->now=*now;
1742     if (st->timeout && *now>st->timeout) {
1743         st->timeout=0;
1744         if (st->state>=SITE_SENTMSG1 && st->state<=SITE_SENTMSG5) {
1745             if (!hacky_par_start_failnow())
1746                 send_msg(st);
1747         } else if (st->state==SITE_WAIT) {
1748             enter_state_run(st);
1749         } else {
1750             slog(st,LOG_ERROR,"site_afterpoll: unexpected timeout, state=%d",
1751                  st->state);
1752         }
1753     }
1754     check_expiry(st,&st->current,"current key");
1755     check_expiry(st,&st->auxiliary_key,"auxiliary key");
1756 }
1757
1758 /* This function is called by the netlink device to deliver packets
1759    intended for the remote network. The packet is in "raw" wire
1760    format, but is guaranteed to be word-aligned. */
1761 static void site_outgoing(void *sst, struct buffer_if *buf)
1762 {
1763     struct site *st=sst;
1764     cstring_t transform_err;
1765     
1766     if (st->state==SITE_STOP) {
1767         BUF_FREE(buf);
1768         return;
1769     }
1770
1771     st->allow_send_prod=1;
1772
1773     /* In all other states we consider delivering the packet if we have
1774        a valid key and a valid address to send it to. */
1775     if (current_valid(st) && transport_peers_valid(&st->peers)) {
1776         /* Transform it and send it */
1777         if (buf->size>0) {
1778             buf_prepend_uint32(buf,LABEL_MSG9);
1779             if (call_transform_forwards(st, st->current.transform,
1780                                         buf, &transform_err))
1781                 goto free_out;
1782             buf_prepend_uint32(buf,LABEL_MSG0);
1783             buf_prepend_uint32(buf,st->index);
1784             buf_prepend_uint32(buf,st->current.remote_session_id);
1785             transport_xmit(st,&st->peers,buf,False);
1786         }
1787     free_out:
1788         BUF_FREE(buf);
1789         return;
1790     }
1791
1792     slog(st,LOG_DROP,"discarding outgoing packet of size %d",buf->size);
1793     BUF_FREE(buf);
1794     initiate_key_setup(st,"outgoing packet",0);
1795 }
1796
1797 static bool_t named_for_us(struct site *st, const struct buffer_if *buf_in,
1798                            uint32_t type, struct msg *m)
1799     /* For packets which are identified by the local and remote names.
1800      * If it has our name and our peer's name in it it's for us. */
1801 {
1802     struct buffer_if buf[1];
1803     buffer_readonly_clone(buf,buf_in);
1804     return unpick_msg(st,type,buf,m)
1805         && name_matches(&m->remote,st->remotename)
1806         && name_matches(&m->local,st->localname);
1807 }
1808
1809 static bool_t we_have_priority(struct site *st, const struct msg *m) {
1810     if (st->local_capabilities & m->remote_capabilities &
1811         CAPAB_PRIORITY_MOBILE) {
1812         if (st->local_mobile) return True;
1813         if (st-> peer_mobile) return False;
1814     }
1815     return st->our_name_later;
1816 }
1817
1818 static bool_t setup_late_msg_ok(struct site *st, 
1819                                 const struct buffer_if *buf_in,
1820                                 uint32_t msgtype,
1821                                 const struct comm_addr *source) {
1822     /* For setup packets which seem from their type like they are
1823      * late.  Maybe they came via a different path.  All we do is make
1824      * a note of the sending address, iff they look like they are part
1825      * of the current key setup attempt. */
1826     struct msg m;
1827     if (!named_for_us(st,buf_in,msgtype,&m))
1828         /* named_for_us calls unpick_msg which gets the nonces */
1829         return False;
1830     if (!consttime_memeq(m.nR,st->remoteN,NONCELEN) ||
1831         !consttime_memeq(m.nL,st->localN, NONCELEN))
1832         /* spoof ?  from stale run ?  who knows */
1833         return False;
1834     transport_setup_msgok(st,source);
1835     return True;
1836 }
1837
1838 /* This function is called by the communication device to deliver
1839    packets from our peers.
1840    It should return True if the packet is recognised as being for
1841    this current site instance (and should therefore not be processed
1842    by other sites), even if the packet was otherwise ignored. */
1843 static bool_t site_incoming(void *sst, struct buffer_if *buf,
1844                             const struct comm_addr *source)
1845 {
1846     struct site *st=sst;
1847
1848     if (buf->size < 12) return False;
1849
1850     uint32_t dest=get_uint32(buf->start);
1851     uint32_t msgtype=get_uint32(buf->start+8);
1852     struct msg named_msg;
1853
1854     if (msgtype==LABEL_MSG1) {
1855         if (!named_for_us(st,buf,msgtype,&named_msg))
1856             return False;
1857         /* It's a MSG1 addressed to us. Decide what to do about it. */
1858         dump_packet(st,buf,source,True,True);
1859         if (st->state==SITE_RUN || st->state==SITE_RESOLVE ||
1860             st->state==SITE_WAIT) {
1861             /* We should definitely process it */
1862             transport_compute_setupinit_peers(st,0,0,source);
1863             if (process_msg1(st,buf,source,&named_msg)) {
1864                 slog(st,LOG_SETUP_INIT,"key setup initiated by peer");
1865                 bool_t entered=enter_new_state(st,SITE_SENTMSG2);
1866                 if (entered && st->addresses && st->local_mobile)
1867                     /* We must do this as the very last thing, because
1868                        the resolver callback might reenter us. */
1869                     ensure_resolving(st);
1870             } else {
1871                 slog(st,LOG_ERROR,"failed to process incoming msg1");
1872             }
1873             BUF_FREE(buf);
1874             return True;
1875         } else if (st->state==SITE_SENTMSG1) {
1876             /* We've just sent a message 1! They may have crossed on
1877                the wire. If we have priority then we ignore the
1878                incoming one, otherwise we process it as usual. */
1879             if (we_have_priority(st,&named_msg)) {
1880                 BUF_FREE(buf);
1881                 if (!st->msg1_crossed_logged++)
1882                     slog(st,LOG_SETUP_INIT,"crossed msg1s; we are higher "
1883                          "priority => ignore incoming msg1");
1884                 return True;
1885             } else {
1886                 slog(st,LOG_SETUP_INIT,"crossed msg1s; we are lower "
1887                      "priority => use incoming msg1");
1888                 if (process_msg1(st,buf,source,&named_msg)) {
1889                     BUF_FREE(&st->buffer); /* Free our old message 1 */
1890                     transport_setup_msgok(st,source);
1891                     enter_new_state(st,SITE_SENTMSG2);
1892                 } else {
1893                     slog(st,LOG_ERROR,"failed to process an incoming "
1894                          "crossed msg1 (we have low priority)");
1895                 }
1896                 BUF_FREE(buf);
1897                 return True;
1898             }
1899         } else if (st->state==SITE_SENTMSG2 ||
1900                    st->state==SITE_SENTMSG4) {
1901             if (consttime_memeq(named_msg.nR,st->remoteN,NONCELEN)) {
1902                 /* We are ahead in the protocol, but that msg1 had the
1903                  * peer's nonce so presumably it is from this key
1904                  * exchange run, via a slower route */
1905                 transport_setup_msgok(st,source);
1906             } else {
1907                 slog(st,LOG_UNEXPECTED,"competing incoming message 1");
1908             }
1909             BUF_FREE(buf);
1910             return True;
1911         }
1912         /* The message 1 was received at an unexpected stage of the
1913            key setup.  Well, they lost the race. */
1914         slog(st,LOG_UNEXPECTED,"unexpected incoming message 1");
1915         BUF_FREE(buf);
1916         return True;
1917     }
1918     if (msgtype==LABEL_PROD) {
1919         if (!named_for_us(st,buf,msgtype,&named_msg))
1920             return False;
1921         dump_packet(st,buf,source,True,True);
1922         if (st->state!=SITE_RUN) {
1923             slog(st,LOG_DROP,"ignoring PROD when not in state RUN");
1924         } else if (current_valid(st)) {
1925             slog(st,LOG_DROP,"ignoring PROD when we think we have a key");
1926         } else {
1927             initiate_key_setup(st,"peer sent PROD packet",source);
1928         }
1929         BUF_FREE(buf);
1930         return True;
1931     }
1932     if (dest==st->index) {
1933         /* Explicitly addressed to us */
1934         if (msgtype!=LABEL_MSG0) dump_packet(st,buf,source,True,True);
1935         switch (msgtype) {
1936         case LABEL_NAK:
1937             /* If the source is our current peer then initiate a key setup,
1938                because our peer's forgotten the key */
1939             if (get_uint32(buf->start+4)==st->current.remote_session_id) {
1940                 bool_t initiated;
1941                 initiated = initiate_key_setup(st,"received a NAK",source);
1942                 if (!initiated) generate_send_prod(st,source);
1943             } else {
1944                 slog(st,LOG_SEC,"bad incoming NAK");
1945             }
1946             break;
1947         case LABEL_MSG0:
1948             process_msg0(st,buf,source);
1949             break;
1950         case LABEL_MSG1:
1951             /* Setup packet: should not have been explicitly addressed
1952                to us */
1953             slog(st,LOG_SEC,"incoming explicitly addressed msg1");
1954             break;
1955         case LABEL_MSG2:
1956             /* Setup packet: expected only in state SENTMSG1 */
1957             if (st->state!=SITE_SENTMSG1) {
1958                 if ((st->state==SITE_SENTMSG3 ||
1959                      st->state==SITE_SENTMSG5) &&
1960                     setup_late_msg_ok(st,buf,msgtype,source))
1961                     break;
1962                 slog(st,LOG_UNEXPECTED,"unexpected MSG2");
1963             } else if (process_msg2(st,buf,source)) {
1964                 transport_setup_msgok(st,source);
1965                 enter_new_state(st,SITE_SENTMSG3);
1966             } else {
1967                 slog(st,LOG_SEC,"invalid MSG2");
1968             }
1969             break;
1970         case CASES_MSG3_KNOWN:
1971             /* Setup packet: expected only in state SENTMSG2 */
1972             if (st->state!=SITE_SENTMSG2) {
1973                 if ((st->state==SITE_SENTMSG4) &&
1974                     setup_late_msg_ok(st,buf,msgtype,source))
1975                     break;
1976                 slog(st,LOG_UNEXPECTED,"unexpected MSG3");
1977             } else if (process_msg3(st,buf,source,msgtype)) {
1978                 transport_setup_msgok(st,source);
1979                 enter_new_state(st,SITE_SENTMSG4);
1980             } else {
1981                 slog(st,LOG_SEC,"invalid MSG3");
1982             }
1983             break;
1984         case LABEL_MSG4:
1985             /* Setup packet: expected only in state SENTMSG3 */
1986             if (st->state!=SITE_SENTMSG3) {
1987                 if ((st->state==SITE_SENTMSG5) &&
1988                     setup_late_msg_ok(st,buf,msgtype,source))
1989                     break;
1990                 slog(st,LOG_UNEXPECTED,"unexpected MSG4");
1991             } else if (process_msg4(st,buf,source)) {
1992                 transport_setup_msgok(st,source);
1993                 enter_new_state(st,SITE_SENTMSG5);
1994             } else {
1995                 slog(st,LOG_SEC,"invalid MSG4");
1996             }
1997             break;
1998         case LABEL_MSG5:
1999             /* Setup packet: expected only in state SENTMSG4 */
2000             /* (may turn up in state RUN if our return MSG6 was lost
2001                and the new key has already been activated. In that
2002                case we discard it. The peer will realise that we
2003                are using the new key when they see our data packets.
2004                Until then the peer's data packets to us get discarded. */
2005             if (st->state==SITE_SENTMSG4) {
2006                 if (process_msg5(st,buf,source,st->new_transform)) {
2007                     transport_setup_msgok(st,source);
2008                     enter_new_state(st,SITE_RUN);
2009                 } else {
2010                     slog(st,LOG_SEC,"invalid MSG5");
2011                 }
2012             } else if (st->state==SITE_RUN) {
2013                 if (process_msg5(st,buf,source,st->current.transform)) {
2014                     slog(st,LOG_DROP,"got MSG5, retransmitting MSG6");
2015                     transport_setup_msgok(st,source);
2016                     create_msg6(st,st->current.transform,
2017                                 st->current.remote_session_id);
2018                     transport_xmit(st,&st->peers,&st->buffer,True);
2019                     BUF_FREE(&st->buffer);
2020                 } else {
2021                     slog(st,LOG_SEC,"invalid MSG5 (in state RUN)");
2022                 }
2023             } else {
2024                 slog(st,LOG_UNEXPECTED,"unexpected MSG5");
2025             }
2026             break;
2027         case LABEL_MSG6:
2028             /* Setup packet: expected only in state SENTMSG5 */
2029             if (st->state!=SITE_SENTMSG5) {
2030                 slog(st,LOG_UNEXPECTED,"unexpected MSG6");
2031             } else if (process_msg6(st,buf,source)) {
2032                 BUF_FREE(&st->buffer); /* Free message 5 */
2033                 transport_setup_msgok(st,source);
2034                 activate_new_key(st);
2035             } else {
2036                 slog(st,LOG_SEC,"invalid MSG6");
2037             }
2038             break;
2039         default:
2040             slog(st,LOG_SEC,"received message of unknown type 0x%08x",
2041                  msgtype);
2042             break;
2043         }
2044         BUF_FREE(buf);
2045         return True;
2046     }
2047
2048     return False;
2049 }
2050
2051 static void site_control(void *vst, bool_t run)
2052 {
2053     struct site *st=vst;
2054     if (run) enter_state_run(st);
2055     else enter_state_stop(st);
2056 }
2057
2058 static void site_phase_hook(void *sst, uint32_t newphase)
2059 {
2060     struct site *st=sst;
2061
2062     /* The program is shutting down; tell our peer */
2063     send_msg7(st,"shutting down");
2064 }
2065
2066 static void site_childpersist_clearkeys(void *sst, uint32_t newphase)
2067 {
2068     struct site *st=sst;
2069     dispose_transform(&st->current.transform);
2070     dispose_transform(&st->auxiliary_key.transform);
2071     dispose_transform(&st->new_transform);
2072     /* Not much point overwiting the signing key, since we loaded it
2073        from disk, and it is only valid prospectively if at all,
2074        anyway. */
2075     /* XXX it would be best to overwrite the DH state, because that
2076        _is_ relevant to forward secrecy.  However we have no
2077        convenient interface for doing that and in practice gmp has
2078        probably dribbled droppings all over the malloc arena.  A good
2079        way to fix this would be to have a privsep child for asymmetric
2080        crypto operations, but that's a task for another day. */
2081 }
2082
2083 static list_t *site_apply(closure_t *self, struct cloc loc, dict_t *context,
2084                           list_t *args)
2085 {
2086     static uint32_t index_sequence;
2087     struct site *st;
2088     item_t *item;
2089     dict_t *dict;
2090     int i;
2091
2092     NEW(st);
2093
2094     st->cl.description="site";
2095     st->cl.type=CL_SITE;
2096     st->cl.apply=NULL;
2097     st->cl.interface=&st->ops;
2098     st->ops.st=st;
2099     st->ops.control=site_control;
2100     st->ops.status=site_status;
2101
2102     /* First parameter must be a dict */
2103     item=list_elem(args,0);
2104     if (!item || item->type!=t_dict)
2105         cfgfatal(loc,"site","parameter must be a dictionary\n");
2106     
2107     dict=item->data.dict;
2108     st->localname=dict_read_string(dict, "local-name", True, "site", loc);
2109     st->remotename=dict_read_string(dict, "name", True, "site", loc);
2110
2111     st->keepalive=dict_read_bool(dict,"keepalive",False,"site",loc,False);
2112
2113     st->peer_mobile=dict_read_bool(dict,"mobile",False,"site",loc,False);
2114     st->local_mobile=
2115         dict_read_bool(dict,"local-mobile",False,"site",loc,False);
2116
2117     /* Sanity check (which also allows the 'sites' file to include
2118        site() closures for all sites including our own): refuse to
2119        talk to ourselves */
2120     if (strcmp(st->localname,st->remotename)==0) {
2121         Message(M_DEBUG,"site %s: local-name==name -> ignoring this site\n",
2122                 st->localname);
2123         if (st->peer_mobile != st->local_mobile)
2124             cfgfatal(loc,"site","site %s's peer-mobile=%d"
2125                     " but our local-mobile=%d\n",
2126                     st->localname, st->peer_mobile, st->local_mobile);
2127         free(st);
2128         return NULL;
2129     }
2130     if (st->peer_mobile && st->local_mobile) {
2131         Message(M_WARNING,"site %s: site is mobile but so are we"
2132                 " -> ignoring this site\n", st->remotename);
2133         free(st);
2134         return NULL;
2135     }
2136
2137     assert(index_sequence < 0xffffffffUL);
2138     st->index = ++index_sequence;
2139     st->local_capabilities = 0;
2140     st->early_capabilities = CAPAB_PRIORITY_MOBILE;
2141     st->netlink=find_cl_if(dict,"link",CL_NETLINK,True,"site",loc);
2142
2143 #define GET_CLOSURE_LIST(dictkey,things,nthings,CL_TYPE) do{            \
2144     list_t *things##_cfg=dict_lookup(dict,dictkey);                     \
2145     if (!things##_cfg)                                                  \
2146         cfgfatal(loc,"site","closure list \"%s\" not found\n",dictkey); \
2147     st->nthings=list_length(things##_cfg);                              \
2148     NEW_ARY(st->things,st->nthings);                                    \
2149     assert(st->nthings);                                                \
2150     for (i=0; i<st->nthings; i++) {                                     \
2151         item_t *item=list_elem(things##_cfg,i);                         \
2152         if (item->type!=t_closure)                                      \
2153             cfgfatal(loc,"site","%s is not a closure\n",dictkey);       \
2154         closure_t *cl=item->data.closure;                               \
2155         if (cl->type!=CL_TYPE)                                          \
2156             cfgfatal(loc,"site","%s closure wrong type\n",dictkey);     \
2157         st->things[i]=cl->interface;                                    \
2158     }                                                                   \
2159 }while(0)
2160
2161     GET_CLOSURE_LIST("comm",comms,ncomms,CL_COMM);
2162
2163     NEW_ARY(st->commclientinfos, st->ncomms);
2164     dict_t *comminfo = dict_read_dict(dict,"comm-info",False,"site",loc);
2165     for (i=0; i<st->ncomms; i++) {
2166         st->commclientinfos[i] =
2167             !comminfo ? 0 :
2168             st->comms[i]->clientinfo(st->comms[i],comminfo,loc);
2169     }
2170
2171     st->resolver=find_cl_if(dict,"resolver",CL_RESOLVER,True,"site",loc);
2172     st->log=find_cl_if(dict,"log",CL_LOG,True,"site",loc);
2173     st->random=find_cl_if(dict,"random",CL_RANDOMSRC,True,"site",loc);
2174
2175     st->privkey=find_cl_if(dict,"local-key",CL_SIGPRIVKEY,True,"site",loc);
2176     st->addresses=dict_read_string_array(dict,"address",False,"site",loc,0);
2177     if (st->addresses)
2178         st->remoteport=dict_read_number(dict,"port",True,"site",loc,0);
2179     else st->remoteport=0;
2180     st->pubkey=find_cl_if(dict,"key",CL_SIGPUBKEY,True,"site",loc);
2181
2182     GET_CLOSURE_LIST("transform",transforms,ntransforms,CL_TRANSFORM);
2183
2184     st->dh=find_cl_if(dict,"dh",CL_DH,True,"site",loc);
2185
2186     if (st->privkey->sethash || st->pubkey->sethash) {
2187         struct hash_if *hash=find_cl_if(dict,"hash",CL_HASH,True,"site",loc);
2188         if (st->privkey->sethash) st->privkey->sethash(st->privkey->st,hash);
2189         if (st->pubkey->sethash) st->pubkey->sethash(st->pubkey->st,hash);
2190     }
2191
2192 #define DEFAULT(D) (st->peer_mobile || st->local_mobile \
2193                     ? DEFAULT_MOBILE_##D : DEFAULT_##D)
2194 #define CFG_NUMBER(k,D) dict_read_number(dict,(k),False,"site",loc,DEFAULT(D));
2195
2196     st->key_lifetime=         CFG_NUMBER("key-lifetime",  KEY_LIFETIME);
2197     st->setup_retries=        CFG_NUMBER("setup-retries", SETUP_RETRIES);
2198     st->setup_retry_interval= CFG_NUMBER("setup-timeout", SETUP_RETRY_INTERVAL);
2199     st->wait_timeout_mean=    CFG_NUMBER("wait-time",     WAIT_TIME);
2200     st->mtu_target= dict_read_number(dict,"mtu-target",False,"site",loc,0);
2201
2202     st->mobile_peer_expiry= dict_read_number(
2203        dict,"mobile-peer-expiry",False,"site",loc,DEFAULT_MOBILE_PEER_EXPIRY);
2204
2205     const char *peerskey= st->peer_mobile
2206         ? "mobile-peers-max" : "static-peers-max";
2207     st->transport_peers_max= dict_read_number(
2208         dict,peerskey,False,"site",loc, st->addresses ? 4 : 3);
2209     if (st->transport_peers_max<1 ||
2210         st->transport_peers_max>MAX_PEER_ADDRS) {
2211         cfgfatal(loc,"site", "%s must be in range 1.."
2212                  STRING(MAX_PEER_ADDRS) "\n", peerskey);
2213     }
2214
2215     if (st->key_lifetime < DEFAULT(KEY_RENEGOTIATE_GAP)*2)
2216         st->key_renegotiate_time=st->key_lifetime/2;
2217     else
2218         st->key_renegotiate_time=st->key_lifetime-DEFAULT(KEY_RENEGOTIATE_GAP);
2219     st->key_renegotiate_time=dict_read_number(
2220         dict,"renegotiate-time",False,"site",loc,st->key_renegotiate_time);
2221     if (st->key_renegotiate_time > st->key_lifetime) {
2222         cfgfatal(loc,"site",
2223                  "renegotiate-time must be less than key-lifetime\n");
2224     }
2225
2226     st->log_events=string_list_to_word(dict_lookup(dict,"log-events"),
2227                                        log_event_table,"site");
2228
2229     st->resolving_count=0;
2230     st->allow_send_prod=0;
2231
2232     st->tunname=safe_malloc(strlen(st->localname)+strlen(st->remotename)+5,
2233                             "site_apply");
2234     sprintf(st->tunname,"%s<->%s",st->localname,st->remotename);
2235
2236     /* The information we expect to see in incoming messages of type 1 */
2237     /* fixme: lots of unchecked overflows here, but the results are only
2238        corrupted packets rather than undefined behaviour */
2239     st->our_name_later=(strcmp(st->localname,st->remotename)>0);
2240
2241     buffer_new(&st->buffer,SETUP_BUFFER_LEN);
2242
2243     buffer_new(&st->scratch,SETUP_BUFFER_LEN);
2244     BUF_ALLOC(&st->scratch,"site:scratch");
2245
2246     /* We are interested in poll(), but only for timeouts. We don't have
2247        any fds of our own. */
2248     register_for_poll(st, site_beforepoll, site_afterpoll, "site");
2249     st->timeout=0;
2250
2251     st->remote_capabilities=0;
2252     st->chosen_transform=0;
2253     st->current.key_timeout=0;
2254     st->auxiliary_key.key_timeout=0;
2255     transport_peers_clear(st,&st->peers);
2256     transport_peers_clear(st,&st->setup_peers);
2257     /* XXX mlock these */
2258     st->dhsecret=safe_malloc(st->dh->len,"site:dhsecret");
2259     st->sharedsecretlen=st->sharedsecretallocd=0;
2260     st->sharedsecret=0;
2261
2262 #define SET_CAPBIT(bit) do {                                            \
2263     uint32_t capflag = 1UL << (bit);                                    \
2264     if (st->local_capabilities & capflag)                               \
2265         slog(st,LOG_ERROR,"capability bit"                              \
2266              " %d (%#"PRIx32") reused", (bit), capflag);                \
2267     st->local_capabilities |= capflag;                                  \
2268 } while (0)
2269
2270     for (i=0; i<st->ntransforms; i++)
2271         SET_CAPBIT(st->transforms[i]->capab_bit);
2272
2273 #undef SET_CAPBIT
2274
2275     if (st->local_mobile || st->peer_mobile)
2276         st->local_capabilities |= CAPAB_PRIORITY_MOBILE;
2277
2278     /* We need to register the remote networks with the netlink device */
2279     uint32_t netlink_mtu; /* local virtual interface mtu */
2280     st->netlink->reg(st->netlink->st, site_outgoing, st, &netlink_mtu);
2281     if (!st->mtu_target)
2282         st->mtu_target=netlink_mtu;
2283     
2284     for (i=0; i<st->ncomms; i++)
2285         st->comms[i]->request_notify(st->comms[i]->st, st, site_incoming);
2286
2287     st->current.transform=0;
2288     st->auxiliary_key.transform=0;
2289     st->new_transform=0;
2290     st->auxiliary_is_new=0;
2291
2292     enter_state_stop(st);
2293
2294     add_hook(PHASE_SHUTDOWN,site_phase_hook,st);
2295     add_hook(PHASE_CHILDPERSIST,site_childpersist_clearkeys,st);
2296
2297     return new_closure(&st->cl);
2298 }
2299
2300 void site_module(dict_t *dict)
2301 {
2302     add_closure(dict,"site",site_apply);
2303 }
2304
2305
2306 /***** TRANSPORT PEERS definitions *****/
2307
2308 static void transport_peers_debug(struct site *st, transport_peers *dst,
2309                                   const char *didwhat,
2310                                   int nargs, const struct comm_addr *args,
2311                                   size_t stride) {
2312     int i;
2313     char *argp;
2314
2315     if (!(st->log_events & LOG_PEER_ADDRS))
2316         return; /* an optimisation */
2317
2318     slog(st, LOG_PEER_ADDRS, "peers (%s) %s nargs=%d => npeers=%d",
2319          (dst==&st->peers ? "data" :
2320           dst==&st->setup_peers ? "setup" : "UNKNOWN"),
2321          didwhat, nargs, dst->npeers);
2322
2323     for (i=0, argp=(void*)args;
2324          i<nargs;
2325          i++, (argp+=stride?stride:sizeof(*args))) {
2326         const struct comm_addr *ca=(void*)argp;
2327         slog(st, LOG_PEER_ADDRS, " args: addrs[%d]=%s",
2328              i, comm_addr_to_string(ca));
2329     }
2330     for (i=0; i<dst->npeers; i++) {
2331         struct timeval diff;
2332         timersub(tv_now,&dst->peers[i].last,&diff);
2333         const struct comm_addr *ca=&dst->peers[i].addr;
2334         slog(st, LOG_PEER_ADDRS, " peers: addrs[%d]=%s T-%ld.%06ld",
2335              i, comm_addr_to_string(ca),
2336              (unsigned long)diff.tv_sec, (unsigned long)diff.tv_usec);
2337     }
2338 }
2339
2340 static void transport_peers_expire(struct site *st, transport_peers *peers) {
2341     /* peers must be sorted first */
2342     int previous_peers=peers->npeers;
2343     struct timeval oldest;
2344     oldest.tv_sec  = tv_now->tv_sec - st->mobile_peer_expiry;
2345     oldest.tv_usec = tv_now->tv_usec;
2346     while (peers->npeers>1 &&
2347            timercmp(&peers->peers[peers->npeers-1].last, &oldest, <))
2348         peers->npeers--;
2349     if (peers->npeers != previous_peers)
2350         transport_peers_debug(st,peers,"expire", 0,0,0);
2351 }
2352
2353 static bool_t transport_peer_record_one(struct site *st, transport_peers *peers,
2354                                         const struct comm_addr *ca,
2355                                         const struct timeval *tv) {
2356     /* returns false if output is full */
2357     int search;
2358
2359     if (peers->npeers >= st->transport_peers_max)
2360         return 0;
2361
2362     for (search=0; search<peers->npeers; search++)
2363         if (comm_addr_equal(&peers->peers[search].addr, ca))
2364             return 1;
2365
2366     peers->peers[peers->npeers].addr = *ca;
2367     peers->peers[peers->npeers].last = *tv;
2368     peers->npeers++;
2369     return 1;
2370 }
2371
2372 static void transport_record_peers(struct site *st, transport_peers *peers,
2373                                    const struct comm_addr *addrs, int naddrs,
2374                                    const char *m) {
2375     /* We add addrs into peers.  The new entries end up at the front
2376      * and displace entries towards the end (perhaps even off the
2377      * end).  Any existing matching entries are moved up to the front.
2378      *
2379      * Caller must first call transport_peers_expire. */
2380
2381     if (naddrs==1) {
2382         /* avoids debug for uninteresting updates */
2383         int i;
2384         for (i=0; i<peers->npeers; i++) {
2385             if (comm_addr_equal(&addrs[0], &peers->peers[i].addr)) {
2386                 memmove(peers->peers+1, peers->peers,
2387                         sizeof(peers->peers[0]) * i);
2388                 peers->peers[0].addr = addrs[0];
2389                 peers->peers[0].last = *tv_now;
2390                 return;
2391             }
2392         }
2393     }
2394
2395     int old_npeers=peers->npeers;
2396     transport_peer old_peers[old_npeers];
2397     COPY_ARRAY(old_peers,peers->peers,old_npeers);
2398
2399     peers->npeers=0;
2400     int i;
2401     for (i=0; i<naddrs; i++) {
2402         if (!transport_peer_record_one(st,peers, &addrs[i], tv_now))
2403             break;
2404     }
2405     for (i=0; i<old_npeers; i++) {
2406         const transport_peer *old=&old_peers[i];
2407         if (!transport_peer_record_one(st,peers, &old->addr, &old->last))
2408             break;
2409     }
2410
2411     transport_peers_debug(st,peers,m, naddrs,addrs,0);
2412 }
2413
2414 static void transport_expire_record_peers(struct site *st,
2415                                           transport_peers *peers,
2416                                           const struct comm_addr *addrs,
2417                                           int naddrs, const char *m) {
2418     /* Convenience function */
2419     transport_peers_expire(st,peers);
2420     transport_record_peers(st,peers,addrs,naddrs,m);
2421 }
2422
2423 static bool_t transport_compute_setupinit_peers(struct site *st,
2424         const struct comm_addr *configured_addrs /* 0 if none or not found */,
2425         int n_configured_addrs /* 0 if none or not found */,
2426         const struct comm_addr *incoming_packet_addr /* 0 if none */) {
2427     if (!n_configured_addrs && !incoming_packet_addr &&
2428         !transport_peers_valid(&st->peers))
2429         return False;
2430
2431     slog(st,LOG_SETUP_INIT,
2432          "using: %d configured addr(s);%s %d old peer addrs(es)",
2433          n_configured_addrs,
2434          incoming_packet_addr ? " incoming packet address;" : "",
2435          st->peers.npeers);
2436
2437     /* Non-mobile peers try addresses until one is plausible.  The
2438      * effect is that this code always tries first the configured
2439      * address if supplied, or otherwise the address of the incoming
2440      * PROD, or finally the existing data peer if one exists; this is
2441      * as desired. */
2442
2443     transport_peers_copy(st,&st->setup_peers,&st->peers);
2444     transport_peers_expire(st,&st->setup_peers);
2445
2446     if (incoming_packet_addr)
2447         transport_record_peers(st,&st->setup_peers,
2448                                incoming_packet_addr,1, "incoming");
2449
2450     if (n_configured_addrs)
2451         transport_record_peers(st,&st->setup_peers,
2452                               configured_addrs,n_configured_addrs, "setupinit");
2453
2454     assert(transport_peers_valid(&st->setup_peers));
2455     return True;
2456 }
2457
2458 static void transport_setup_msgok(struct site *st, const struct comm_addr *a) {
2459     if (st->peer_mobile)
2460         transport_expire_record_peers(st,&st->setup_peers,a,1,"setupmsg");
2461 }
2462 static void transport_data_msgok(struct site *st, const struct comm_addr *a) {
2463     if (st->peer_mobile)
2464         transport_expire_record_peers(st,&st->peers,a,1,"datamsg");
2465 }
2466
2467 static int transport_peers_valid(transport_peers *peers) {
2468     return peers->npeers;
2469 }
2470 static void transport_peers_clear(struct site *st, transport_peers *peers) {
2471     peers->npeers= 0;
2472     transport_peers_debug(st,peers,"clear",0,0,0);
2473 }
2474 static void transport_peers_copy(struct site *st, transport_peers *dst,
2475                                  const transport_peers *src) {
2476     dst->npeers=src->npeers;
2477     COPY_ARRAY(dst->peers, src->peers, dst->npeers);
2478     transport_peers_debug(st,dst,"copy",
2479                           src->npeers, &src->peers->addr, sizeof(*src->peers));
2480 }
2481
2482 static void transport_resolve_complete(struct site *st,
2483                                        const struct comm_addr *addrs,
2484                                        int naddrs) {
2485     transport_expire_record_peers(st,&st->peers,addrs,naddrs,
2486                                   "resolved data");
2487     transport_expire_record_peers(st,&st->setup_peers,addrs,naddrs,
2488                                   "resolved setup");
2489 }
2490
2491 static void transport_resolve_complete_tardy(struct site *st,
2492                                              const struct comm_addr *addrs,
2493                                              int naddrs) {
2494     transport_expire_record_peers(st,&st->peers,addrs,naddrs,
2495                                   "resolved tardily");
2496 }
2497
2498 static void transport_peers__copy_by_mask(transport_peer *out, int *nout_io,
2499                                           unsigned mask,
2500                                           const transport_peers *inp) {
2501     /* out and in->peers may be the same region, or nonoverlapping */
2502     const transport_peer *in=inp->peers;
2503     int slot;
2504     for (slot=0; slot<inp->npeers; slot++) {
2505         if (!(mask & (1U << slot)))
2506             continue;
2507         if (!(out==in && slot==*nout_io))
2508             COPY_OBJ(out[*nout_io], in[slot]);
2509         (*nout_io)++;
2510     }
2511 }
2512
2513 void transport_xmit(struct site *st, transport_peers *peers,
2514                     struct buffer_if *buf, bool_t candebug) {
2515     int slot;
2516     transport_peers_expire(st, peers);
2517     unsigned failed=0; /* bitmask */
2518     assert(MAX_PEER_ADDRS < sizeof(unsigned)*CHAR_BIT);
2519
2520     int nfailed=0;
2521     for (slot=0; slot<peers->npeers; slot++) {
2522         transport_peer *peer=&peers->peers[slot];
2523         bool_t ok = comm_addr_sendmsg(st, &peer->addr, buf);
2524         if (candebug)
2525             dump_packet(st, buf, &peer->addr, False, ok);
2526         if (!ok) {
2527             failed |= 1U << slot;
2528             nfailed++;
2529         }
2530         if (ok && !st->peer_mobile)
2531             break;
2532     }
2533     /* Now we need to demote/delete failing addrs: if we are mobile we
2534      * merely demote them; otherwise we delete them. */
2535     if (st->local_mobile) {
2536         unsigned expected = ((1U << nfailed)-1) << (peers->npeers-nfailed);
2537         /* `expected' has all the failures at the end already */
2538         if (failed != expected) {
2539             int fslot=0;
2540             transport_peer failedpeers[nfailed];
2541             transport_peers__copy_by_mask(failedpeers, &fslot, failed,peers);
2542             assert(fslot == nfailed);
2543             int wslot=0;
2544             transport_peers__copy_by_mask(peers->peers,&wslot,~failed,peers);
2545             assert(wslot+nfailed == peers->npeers);
2546             COPY_ARRAY(peers->peers+wslot, failedpeers, nfailed);
2547             transport_peers_debug(st,peers,"mobile failure reorder",0,0,0);
2548         }
2549     } else {
2550         if (failed && peers->npeers > 1) {
2551             int wslot=0;
2552             transport_peers__copy_by_mask(peers->peers,&wslot,~failed,peers);
2553             peers->npeers=wslot;
2554             transport_peers_debug(st,peers,"non-mobile failure cleanup",0,0,0);
2555         }
2556     }
2557 }
2558
2559 /***** END of transport peers declarations *****/